Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorJakub Kicinski <kuba@kernel.org>
Tue, 31 Aug 2021 16:06:04 +0000 (09:06 -0700)
committerJakub Kicinski <kuba@kernel.org>
Tue, 31 Aug 2021 16:06:04 +0000 (09:06 -0700)
include/linux/netdevice.h
net/socket.c

  d0efb16294d1 ("net: don't unconditionally copy_from_user a struct ifreq for socket ioctls")

  876f0bf9d0d5 ("net: socket: simplify dev_ifconf handling")
  29c4964822aa ("net: socket: rework compat_ifreq_ioctl()")

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1814 files changed:
.mailmap
Documentation/admin-guide/kernel-parameters.txt
Documentation/bpf/index.rst
Documentation/bpf/libbpf/index.rst [new file with mode: 0644]
Documentation/bpf/libbpf/libbpf.rst [deleted file]
Documentation/bpf/libbpf/libbpf_api.rst [deleted file]
Documentation/bpf/libbpf/libbpf_naming_convention.rst
Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt [deleted file]
Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/net/can/bosch,c_can.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
Documentation/devicetree/bindings/net/can/c_can.txt [deleted file]
Documentation/devicetree/bindings/net/can/can-controller.yaml
Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml
Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml
Documentation/devicetree/bindings/net/fsl,fec.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/net/fsl-fec.txt [deleted file]
Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/net/litex,liteeth.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/net/macb.txt
Documentation/devicetree/bindings/net/qcom,ipa.yaml
Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
Documentation/driver-api/nfc/nfc-hci.rst
Documentation/networking/batman-adv.rst
Documentation/networking/bonding.rst
Documentation/networking/device_drivers/ethernet/freescale/dpaa2/index.rst
Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst [new file with mode: 0644]
Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
Documentation/networking/devlink/devlink-params.rst
Documentation/networking/devlink/hns3.rst [new file with mode: 0644]
Documentation/networking/devlink/index.rst
Documentation/networking/devlink/sja1105.rst [deleted file]
Documentation/networking/dsa/dsa.rst
Documentation/networking/dsa/sja1105.rst
Documentation/networking/ethtool-netlink.rst
Documentation/networking/filter.rst
Documentation/networking/index.rst
Documentation/networking/ioam6-sysctl.rst [new file with mode: 0644]
Documentation/networking/ip-sysctl.rst
Documentation/networking/mctp.rst [new file with mode: 0644]
Documentation/networking/mptcp-sysctl.rst
Documentation/networking/netdevices.rst
Documentation/networking/nf_conntrack-sysctl.rst
Documentation/networking/pktgen.rst
Documentation/networking/timestamping.rst
Documentation/networking/vrf.rst
MAINTAINERS
arch/alpha/include/uapi/asm/socket.h
arch/arm/boot/dts/imx35.dtsi
arch/arm/boot/dts/imx6q-novena.dts
arch/arm/boot/dts/imx6qdl-aristainetos2.dtsi
arch/arm/boot/dts/imx6qdl-nit6xlite.dtsi
arch/arm/boot/dts/imx6qdl-nitrogen6_max.dtsi
arch/arm/boot/dts/imx6qdl-nitrogen6x.dtsi
arch/arm/boot/dts/imx6qdl-sabrelite.dtsi
arch/arm/boot/dts/imx7-mba7.dtsi
arch/arm/boot/dts/imx7d-mba7.dts
arch/arm/mach-ixp4xx/common.c
arch/arm64/boot/dts/freescale/imx8mm.dtsi
arch/arm64/boot/dts/freescale/imx8mn.dtsi
arch/arm64/boot/dts/freescale/imx8qxp-ss-conn.dtsi
arch/arm64/boot/dts/microchip/sparx5.dtsi
arch/arm64/include/asm/compat.h
arch/mips/include/asm/compat.h
arch/mips/include/uapi/asm/socket.h
arch/parisc/include/asm/compat.h
arch/parisc/include/uapi/asm/socket.h
arch/powerpc/include/asm/compat.h
arch/s390/include/asm/ccwgroup.h
arch/s390/include/asm/compat.h
arch/sparc/include/asm/compat.h
arch/sparc/include/uapi/asm/socket.h
arch/um/drivers/vector_kern.c
arch/x86/include/asm/compat.h
arch/x86/include/asm/signal.h
arch/x86/net/bpf_jit_comp.c
drivers/atm/horizon.c
drivers/atm/idt77252.c
drivers/bcma/main.c
drivers/bcma/scan.c
drivers/bluetooth/btbcm.c
drivers/bluetooth/btintel.c
drivers/bluetooth/btintel.h
drivers/bluetooth/btmrvl_sdio.c
drivers/bluetooth/btrsi.c
drivers/bluetooth/btrtl.c
drivers/bluetooth/btusb.c
drivers/bluetooth/hci_bcm.c
drivers/bluetooth/hci_h5.c
drivers/bluetooth/hci_serdev.c
drivers/bluetooth/hci_uart.h
drivers/bus/fsl-mc/fsl-mc-bus.c
drivers/bus/mhi/pci_generic.c
drivers/char/pcmcia/synclink_cs.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/devx.c
drivers/infiniband/hw/mlx5/ib_rep.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/std_types.c
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/media/rc/bpf-lirc.c
drivers/net/Kconfig
drivers/net/Makefile
drivers/net/Space.c
drivers/net/appletalk/Kconfig
drivers/net/appletalk/ipddp.c
drivers/net/appletalk/ltpc.c
drivers/net/bonding/bond_3ad.c
drivers/net/bonding/bond_alb.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bond_netlink.c
drivers/net/bonding/bond_options.c
drivers/net/bonding/bond_procfs.c
drivers/net/bonding/bond_sysfs.c
drivers/net/can/Kconfig
drivers/net/can/at91_can.c
drivers/net/can/c_can/c_can.h
drivers/net/can/c_can/c_can_main.c
drivers/net/can/c_can/c_can_platform.c
drivers/net/can/dev/dev.c
drivers/net/can/dev/netlink.c
drivers/net/can/dev/rx-offload.c
drivers/net/can/flexcan.c
drivers/net/can/janz-ican3.c
drivers/net/can/m_can/m_can.c
drivers/net/can/m_can/m_can.h
drivers/net/can/m_can/m_can_pci.c
drivers/net/can/m_can/m_can_platform.c
drivers/net/can/m_can/tcan4x5x-core.c
drivers/net/can/mscan/mpc5xxx_can.c
drivers/net/can/rcar/Kconfig
drivers/net/can/rcar/rcar_canfd.c
drivers/net/can/sja1000/peak_pci.c
drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
drivers/net/can/spi/mcp251xfd/mcp251xfd.h
drivers/net/can/ti_hecc.c
drivers/net/can/usb/esd_usb2.c
drivers/net/can/usb/etas_es58x/es581_4.c
drivers/net/can/usb/etas_es58x/es58x_core.c
drivers/net/can/usb/etas_es58x/es58x_core.h
drivers/net/can/usb/etas_es58x/es58x_fd.c
drivers/net/can/usb/etas_es58x/es58x_fd.h
drivers/net/can/usb/peak_usb/pcan_usb.c
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_priv.h
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/hirschmann/hellcreek.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/mt7530.h
drivers/net/dsa/mv88e6xxx/Kconfig
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/ocelot/Kconfig
drivers/net/dsa/ocelot/felix.c
drivers/net/dsa/ocelot/felix.h
drivers/net/dsa/sja1105/Kconfig
drivers/net/dsa/sja1105/sja1105.h
drivers/net/dsa/sja1105/sja1105_devlink.c
drivers/net/dsa/sja1105/sja1105_dynamic_config.c
drivers/net/dsa/sja1105/sja1105_main.c
drivers/net/dsa/sja1105/sja1105_spi.c
drivers/net/dsa/sja1105/sja1105_vl.c
drivers/net/eql.c
drivers/net/ethernet/3com/3c509.c
drivers/net/ethernet/3com/3c515.c
drivers/net/ethernet/3com/3c574_cs.c
drivers/net/ethernet/3com/3c59x.c
drivers/net/ethernet/3com/Kconfig
drivers/net/ethernet/8390/Kconfig
drivers/net/ethernet/8390/apne.c
drivers/net/ethernet/8390/ax88796.c
drivers/net/ethernet/8390/axnet_cs.c
drivers/net/ethernet/8390/ne.c
drivers/net/ethernet/8390/pcnet_cs.c
drivers/net/ethernet/8390/smc-ultra.c
drivers/net/ethernet/8390/wd.c
drivers/net/ethernet/8390/xsurf100.c
drivers/net/ethernet/Kconfig
drivers/net/ethernet/Makefile
drivers/net/ethernet/actions/Kconfig
drivers/net/ethernet/actions/owl-emac.c
drivers/net/ethernet/adaptec/starfire.c
drivers/net/ethernet/agere/et131x.c
drivers/net/ethernet/allwinner/sun4i-emac.c
drivers/net/ethernet/amazon/ena/ena_ethtool.c
drivers/net/ethernet/amd/Kconfig
drivers/net/ethernet/amd/amd8111e.c
drivers/net/ethernet/amd/atarilance.c
drivers/net/ethernet/amd/au1000_eth.c
drivers/net/ethernet/amd/lance.c
drivers/net/ethernet/amd/mvme147.c
drivers/net/ethernet/amd/ni65.c
drivers/net/ethernet/amd/pcnet32.c
drivers/net/ethernet/amd/sun3lance.c
drivers/net/ethernet/amd/xgbe/xgbe-drv.c
drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
drivers/net/ethernet/aquantia/atlantic/aq_main.c
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
drivers/net/ethernet/arc/emac_main.c
drivers/net/ethernet/atheros/ag71xx.c
drivers/net/ethernet/atheros/alx/main.c
drivers/net/ethernet/atheros/atl1c/atl1c_main.c
drivers/net/ethernet/atheros/atl1e/atl1e_main.c
drivers/net/ethernet/atheros/atlx/atl1.c
drivers/net/ethernet/atheros/atlx/atl2.c
drivers/net/ethernet/broadcom/Kconfig
drivers/net/ethernet/broadcom/b44.c
drivers/net/ethernet/broadcom/bcm63xx_enet.c
drivers/net/ethernet/broadcom/bcmsysport.c
drivers/net/ethernet/broadcom/bgmac.c
drivers/net/ethernet/broadcom/bnx2.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
drivers/net/ethernet/broadcom/bnxt/Makefile
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c [new file with mode: 0644]
drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h [new file with mode: 0644]
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/broadcom/sb1250-mac.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/brocade/bna/bnad_ethtool.c
drivers/net/ethernet/cadence/Kconfig
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/cavium/Kconfig
drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
drivers/net/ethernet/cavium/thunder/nic_main.c
drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/chelsio/Kconfig
drivers/net/ethernet/chelsio/cxgb/cxgb2.c
drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
drivers/net/ethernet/chelsio/cxgb3/sge.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
drivers/net/ethernet/chelsio/cxgb4vf/sge.c
drivers/net/ethernet/cirrus/Kconfig
drivers/net/ethernet/cirrus/cs89x0.c
drivers/net/ethernet/cirrus/ep93xx_eth.c
drivers/net/ethernet/cisco/enic/enic_ethtool.c
drivers/net/ethernet/cortina/gemini.c
drivers/net/ethernet/davicom/dm9000.c
drivers/net/ethernet/dec/tulip/de4x5.c
drivers/net/ethernet/dec/tulip/media.c
drivers/net/ethernet/dec/tulip/tulip_core.c
drivers/net/ethernet/dec/tulip/winbond-840.c
drivers/net/ethernet/dlink/dl2k.c
drivers/net/ethernet/dlink/sundance.c
drivers/net/ethernet/dnet.c
drivers/net/ethernet/ec_bhf.c
drivers/net/ethernet/emulex/benet/be_ethtool.c
drivers/net/ethernet/ethoc.c
drivers/net/ethernet/faraday/ftgmac100.c
drivers/net/ethernet/faraday/ftmac100.c
drivers/net/ethernet/fealnx.c
drivers/net/ethernet/freescale/Kconfig
drivers/net/ethernet/freescale/dpaa/Kconfig
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
drivers/net/ethernet/freescale/dpaa2/Makefile
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
drivers/net/ethernet/freescale/dpaa2/dpsw.c
drivers/net/ethernet/freescale/dpaa2/dpsw.h
drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/freescale/enetc/enetc_vf.c
drivers/net/ethernet/freescale/fec.h
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fec_mpc52xx.c
drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
drivers/net/ethernet/freescale/gianfar.c
drivers/net/ethernet/freescale/gianfar_ethtool.c
drivers/net/ethernet/freescale/ucc_geth.c
drivers/net/ethernet/google/gve/gve_adminq.c
drivers/net/ethernet/hisilicon/Kconfig
drivers/net/ethernet/hisilicon/hip04_eth.c
drivers/net/ethernet/hisilicon/hisi_femac.c
drivers/net/ethernet/hisilicon/hns/hns_enet.c
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
drivers/net/ethernet/hisilicon/hns3/hnae3.h
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.h [new file with mode: 0644]
drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c [new file with mode: 0644]
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h [new file with mode: 0644]
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c [new file with mode: 0644]
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h [new file with mode: 0644]
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
drivers/net/ethernet/huawei/hinic/hinic_devlink.c
drivers/net/ethernet/huawei/hinic/hinic_devlink.h
drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
drivers/net/ethernet/huawei/hinic/hinic_main.c
drivers/net/ethernet/huawei/hinic/hinic_sriov.c
drivers/net/ethernet/i825xx/82596.c
drivers/net/ethernet/i825xx/sun3_82586.c
drivers/net/ethernet/ibm/emac/core.c
drivers/net/ethernet/ibm/ibmveth.c
drivers/net/ethernet/intel/Kconfig
drivers/net/ethernet/intel/e100.c
drivers/net/ethernet/intel/e1000/e1000_ethtool.c
drivers/net/ethernet/intel/e1000/e1000_main.c
drivers/net/ethernet/intel/e1000e/ethtool.c
drivers/net/ethernet/intel/e1000e/hw.h
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/e1000e/ich8lan.h
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/e1000e/ptp.c
drivers/net/ethernet/intel/e1000e/regs.h
drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_ptp.c
drivers/net/ethernet/intel/i40e/i40e_register.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/iavf/iavf.h
drivers/net/ethernet/intel/iavf/iavf_ethtool.c
drivers/net/ethernet/intel/iavf/iavf_main.c
drivers/net/ethernet/intel/ice/ice_devlink.c
drivers/net/ethernet/intel/ice/ice_ethtool.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/igb/e1000_mac.c
drivers/net/ethernet/intel/igb/igb_ethtool.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igbvf/ethtool.c
drivers/net/ethernet/intel/igbvf/netdev.c
drivers/net/ethernet/intel/igc/igc.h
drivers/net/ethernet/intel/igc/igc_base.c
drivers/net/ethernet/intel/igc/igc_defines.h
drivers/net/ethernet/intel/igc/igc_ethtool.c
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/intel/igc/igc_phy.c
drivers/net/ethernet/intel/igc/igc_ptp.c
drivers/net/ethernet/intel/igc/igc_regs.h
drivers/net/ethernet/intel/igc/igc_tsn.c
drivers/net/ethernet/intel/igc/igc_tsn.h
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbevf/ethtool.c
drivers/net/ethernet/jme.c
drivers/net/ethernet/korina.c
drivers/net/ethernet/lantiq_etop.c
drivers/net/ethernet/litex/Kconfig [new file with mode: 0644]
drivers/net/ethernet/litex/Makefile [new file with mode: 0644]
drivers/net/ethernet/litex/litex_liteeth.c [new file with mode: 0644]
drivers/net/ethernet/marvell/mv643xx_eth.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/octeontx2/Kconfig
drivers/net/ethernet/marvell/octeontx2/af/Makefile
drivers/net/ethernet/marvell/octeontx2/af/cgx.c
drivers/net/ethernet/marvell/octeontx2/af/cgx.h
drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
drivers/net/ethernet/marvell/octeontx2/af/common.h
drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h
drivers/net/ethernet/marvell/octeontx2/af/mbox.c
drivers/net/ethernet/marvell/octeontx2/af/mbox.h
drivers/net/ethernet/marvell/octeontx2/af/npc.h
drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
drivers/net/ethernet/marvell/octeontx2/af/ptp.c
drivers/net/ethernet/marvell/octeontx2/af/ptp.h
drivers/net/ethernet/marvell/octeontx2/af/rpm.c
drivers/net/ethernet/marvell/octeontx2/af/rpm.h
drivers/net/ethernet/marvell/octeontx2/af/rvu.c
drivers/net/ethernet/marvell/octeontx2/af/rvu.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_cn10k.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c [new file with mode: 0644]
drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h
drivers/net/ethernet/marvell/octeontx2/nic/Makefile
drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c [new file with mode: 0644]
drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.h [new file with mode: 0644]
drivers/net/ethernet/marvell/octeontx2/nic/otx2_dmac_flt.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
drivers/net/ethernet/marvell/prestera/prestera_devlink.c
drivers/net/ethernet/marvell/prestera/prestera_devlink.h
drivers/net/ethernet/marvell/prestera/prestera_main.c
drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
drivers/net/ethernet/marvell/prestera/prestera_switchdev.h
drivers/net/ethernet/marvell/pxa168_eth.c
drivers/net/ethernet/marvell/skge.c
drivers/net/ethernet/marvell/sky2.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mediatek/mtk_star_emac.c
drivers/net/ethernet/mellanox/mlx4/Kconfig
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/qp.c
drivers/net/ethernet/mellanox/mlx5/core/Kconfig
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/cq.c
drivers/net/ethernet/mellanox/mlx5/core/dev.c
drivers/net/ethernet/mellanox/mlx5/core/devlink.c
drivers/net/ethernet/mellanox/mlx5/core/devlink.h
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/channels.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/channels.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
drivers/net/ethernet/mellanox/mlx5/core/en/params.c
drivers/net/ethernet/mellanox/mlx5/core/en/params.h
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/rss.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/rss.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
drivers/net/ethernet/mellanox/mlx5/core/en/tir.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/tir.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
drivers/net/ethernet/mellanox/mlx5/core/en_common.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h
drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/events.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
drivers/net/ethernet/mellanox/mlx5/core/lag.c
drivers/net/ethernet/mellanox/mlx5/core/lag.h
drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
drivers/net/ethernet/mellanox/mlxsw/Kconfig
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/micrel/ks8851_common.c
drivers/net/ethernet/micrel/ksz884x.c
drivers/net/ethernet/microchip/Kconfig
drivers/net/ethernet/microchip/lan743x_main.c
drivers/net/ethernet/microchip/sparx5/Makefile
drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c [new file with mode: 0644]
drivers/net/ethernet/microchip/sparx5/sparx5_main.c
drivers/net/ethernet/microchip/sparx5/sparx5_main.h
drivers/net/ethernet/microchip/sparx5/sparx5_packet.c
drivers/net/ethernet/microchip/sparx5/sparx5_port.c
drivers/net/ethernet/microchip/sparx5/sparx5_port.h
drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
drivers/net/ethernet/microsoft/mana/gdma.h
drivers/net/ethernet/microsoft/mana/gdma_main.c
drivers/net/ethernet/microsoft/mana/hw_channel.c
drivers/net/ethernet/microsoft/mana/mana.h
drivers/net/ethernet/microsoft/mana/mana_en.c
drivers/net/ethernet/mscc/Kconfig
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/mscc/ocelot.h
drivers/net/ethernet/mscc/ocelot_net.c
drivers/net/ethernet/mscc/ocelot_vsc7514.c
drivers/net/ethernet/myricom/myri10ge/myri10ge.c
drivers/net/ethernet/natsemi/jazzsonic.c
drivers/net/ethernet/natsemi/natsemi.c
drivers/net/ethernet/natsemi/xtsonic.c
drivers/net/ethernet/neterion/s2io.c
drivers/net/ethernet/neterion/vxge/vxge-main.c
drivers/net/ethernet/netronome/Kconfig
drivers/net/ethernet/netronome/nfp/flower/action.c
drivers/net/ethernet/netronome/nfp/flower/conntrack.c
drivers/net/ethernet/netronome/nfp/flower/conntrack.h
drivers/net/ethernet/netronome/nfp/flower/main.h
drivers/net/ethernet/netronome/nfp/flower/match.c
drivers/net/ethernet/netronome/nfp/flower/metadata.c
drivers/net/ethernet/netronome/nfp/flower/offload.c
drivers/net/ethernet/netronome/nfp/nfp_main.c
drivers/net/ethernet/netronome/nfp/nfp_net.h
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
drivers/net/ethernet/netronome/nfp/nfp_net_main.c
drivers/net/ethernet/ni/nixge.c
drivers/net/ethernet/nvidia/forcedeth.c
drivers/net/ethernet/nxp/lpc_eth.c
drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_phy.c
drivers/net/ethernet/packetengines/hamachi.c
drivers/net/ethernet/packetengines/yellowfin.c
drivers/net/ethernet/pasemi/pasemi_mac.c
drivers/net/ethernet/pensando/Kconfig
drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
drivers/net/ethernet/pensando/ionic/ionic_dev.c
drivers/net/ethernet/pensando/ionic/ionic_dev.h
drivers/net/ethernet/pensando/ionic/ionic_devlink.c
drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
drivers/net/ethernet/pensando/ionic/ionic_if.h
drivers/net/ethernet/pensando/ionic/ionic_lif.c
drivers/net/ethernet/pensando/ionic/ionic_lif.h
drivers/net/ethernet/pensando/ionic/ionic_main.c
drivers/net/ethernet/pensando/ionic/ionic_phc.c
drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h
drivers/net/ethernet/pensando/ionic/ionic_txrx.c
drivers/net/ethernet/qlogic/Kconfig
drivers/net/ethernet/qlogic/netxen/netxen_nic.h
drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
drivers/net/ethernet/qlogic/qed/qed.h
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qed/qed_devlink.c
drivers/net/ethernet/qlogic/qed/qed_int.c
drivers/net/ethernet/qlogic/qed/qed_iwarp.c
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c
drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
drivers/net/ethernet/qlogic/qede/qede.h
drivers/net/ethernet/qlogic/qede/qede_ethtool.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
drivers/net/ethernet/qualcomm/emac/emac.c
drivers/net/ethernet/qualcomm/qca_spi.c
drivers/net/ethernet/qualcomm/qca_uart.c
drivers/net/ethernet/rdc/r6040.c
drivers/net/ethernet/realtek/8139cp.c
drivers/net/ethernet/realtek/8139too.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/renesas/Kconfig
drivers/net/ethernet/renesas/ravb.h
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/renesas/ravb_ptp.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/rocker/rocker.h
drivers/net/ethernet/rocker/rocker_main.c
drivers/net/ethernet/rocker/rocker_ofdpa.c
drivers/net/ethernet/samsung/Kconfig
drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
drivers/net/ethernet/sfc/Kconfig
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/ethtool.c
drivers/net/ethernet/sfc/falcon/efx.c
drivers/net/ethernet/sfc/falcon/ethtool.c
drivers/net/ethernet/sgi/ioc3-eth.c
drivers/net/ethernet/sgi/meth.c
drivers/net/ethernet/sis/sis190.c
drivers/net/ethernet/sis/sis900.c
drivers/net/ethernet/smsc/Kconfig
drivers/net/ethernet/smsc/epic100.c
drivers/net/ethernet/smsc/smc9194.c
drivers/net/ethernet/smsc/smc91c92_cs.c
drivers/net/ethernet/smsc/smsc911x.c
drivers/net/ethernet/smsc/smsc9420.c
drivers/net/ethernet/socionext/netsec.c
drivers/net/ethernet/socionext/sni_ave.c
drivers/net/ethernet/stmicro/stmmac/Kconfig
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/sun/cassini.c
drivers/net/ethernet/sun/niu.c
drivers/net/ethernet/sun/sungem.c
drivers/net/ethernet/sun/sunhme.c
drivers/net/ethernet/synopsys/dwc-xlgmac-ethtool.c
drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
drivers/net/ethernet/tehuti/tehuti.c
drivers/net/ethernet/ti/am65-cpsw-nuss.c
drivers/net/ethernet/ti/am65-cpsw-nuss.h
drivers/net/ethernet/ti/cpmac.c
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/ti/cpsw_ethtool.c
drivers/net/ethernet/ti/cpsw_new.c
drivers/net/ethernet/ti/cpsw_priv.h
drivers/net/ethernet/ti/davinci_emac.c
drivers/net/ethernet/ti/netcp_core.c
drivers/net/ethernet/ti/tlan.c
drivers/net/ethernet/toshiba/spider_net.c
drivers/net/ethernet/toshiba/tc35815.c
drivers/net/ethernet/tundra/tsi108_eth.c
drivers/net/ethernet/via/via-rhine.c
drivers/net/ethernet/via/via-velocity.c
drivers/net/ethernet/wiznet/w5100.c
drivers/net/ethernet/xilinx/ll_temac_main.c
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/ethernet/xilinx/xilinx_emaclite.c
drivers/net/ethernet/xircom/xirc2ps_cs.c
drivers/net/ethernet/xscale/Kconfig
drivers/net/ethernet/xscale/Makefile
drivers/net/ethernet/xscale/ixp46x_ts.h
drivers/net/ethernet/xscale/ixp4xx_eth.c
drivers/net/ethernet/xscale/ptp_ixp46x.c
drivers/net/fddi/skfp/skfddi.c
drivers/net/hamradio/baycom_epp.c
drivers/net/hamradio/baycom_par.c
drivers/net/hamradio/baycom_ser_fdx.c
drivers/net/hamradio/baycom_ser_hdx.c
drivers/net/hamradio/bpqether.c
drivers/net/hamradio/dmascc.c
drivers/net/hamradio/hdlcdrv.c
drivers/net/hamradio/scc.c
drivers/net/hamradio/yam.c
drivers/net/hippi/rrunner.c
drivers/net/hippi/rrunner.h
drivers/net/ipa/Makefile
drivers/net/ipa/gsi.c
drivers/net/ipa/gsi.h
drivers/net/ipa/gsi_trans.c
drivers/net/ipa/ipa.h
drivers/net/ipa/ipa_clock.c [deleted file]
drivers/net/ipa/ipa_clock.h [deleted file]
drivers/net/ipa/ipa_cmd.c
drivers/net/ipa/ipa_cmd.h
drivers/net/ipa/ipa_data-v3.1.c
drivers/net/ipa/ipa_data-v3.5.1.c
drivers/net/ipa/ipa_data-v4.11.c
drivers/net/ipa/ipa_data-v4.2.c
drivers/net/ipa/ipa_data-v4.5.c
drivers/net/ipa/ipa_data-v4.9.c
drivers/net/ipa/ipa_data.h
drivers/net/ipa/ipa_endpoint.c
drivers/net/ipa/ipa_interrupt.c
drivers/net/ipa/ipa_interrupt.h
drivers/net/ipa/ipa_main.c
drivers/net/ipa/ipa_modem.c
drivers/net/ipa/ipa_modem.h
drivers/net/ipa/ipa_power.c [new file with mode: 0644]
drivers/net/ipa/ipa_power.h [new file with mode: 0644]
drivers/net/ipa/ipa_qmi.c
drivers/net/ipa/ipa_qmi.h
drivers/net/ipa/ipa_reg.h
drivers/net/ipa/ipa_resource.c
drivers/net/ipa/ipa_smp2p.c
drivers/net/ipa/ipa_smp2p.h
drivers/net/ipa/ipa_table.c
drivers/net/ipa/ipa_table.h
drivers/net/ipa/ipa_uc.c
drivers/net/ipa/ipa_uc.h
drivers/net/ipvlan/ipvlan_main.c
drivers/net/macvlan.c
drivers/net/mctp/Kconfig [new file with mode: 0644]
drivers/net/mctp/Makefile [new file with mode: 0644]
drivers/net/mdio/Kconfig
drivers/net/mdio/mdio-ipq4019.c
drivers/net/mdio/mdio-mscc-miim.c
drivers/net/mhi/Makefile [deleted file]
drivers/net/mhi/mhi.h [deleted file]
drivers/net/mhi/net.c [deleted file]
drivers/net/mhi/proto_mbim.c [deleted file]
drivers/net/mhi_net.c [new file with mode: 0644]
drivers/net/mii.c
drivers/net/netdevsim/bus.c
drivers/net/netdevsim/dev.c
drivers/net/netdevsim/ethtool.c
drivers/net/netdevsim/fib.c
drivers/net/netdevsim/netdev.c
drivers/net/netdevsim/netdevsim.h
drivers/net/pcs/pcs-xpcs.c
drivers/net/phy/Kconfig
drivers/net/phy/Makefile
drivers/net/phy/at803x.c
drivers/net/phy/dp83822.c
drivers/net/phy/intel-xway.c
drivers/net/phy/marvell.c
drivers/net/phy/marvell10g.c
drivers/net/phy/mscc/mscc_ptp.c
drivers/net/phy/mxl-gpy.c [new file with mode: 0644]
drivers/net/phy/nxp-tja11xx.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/phylink.c
drivers/net/phy/xilinx_gmii2rgmii.c
drivers/net/plip/plip.c
drivers/net/ppp/ppp_generic.c
drivers/net/sb1000.c
drivers/net/slip/slip.c
drivers/net/team/team_mode_loadbalance.c
drivers/net/tun.c
drivers/net/usb/asix_devices.c
drivers/net/usb/ax88172a.c
drivers/net/usb/ax88179_178a.c
drivers/net/usb/cdc-phonet.c
drivers/net/usb/dm9601.c
drivers/net/usb/hso.c
drivers/net/usb/ipheth.c
drivers/net/usb/lan78xx.c
drivers/net/usb/mcs7830.c
drivers/net/usb/pegasus.c
drivers/net/usb/r8152.c
drivers/net/usb/rtl8150.c
drivers/net/usb/smsc75xx.c
drivers/net/usb/smsc95xx.c
drivers/net/usb/sr9700.c
drivers/net/usb/sr9800.c
drivers/net/usb/usbnet.c
drivers/net/veth.c
drivers/net/virtio_net.c
drivers/net/vmxnet3/Makefile
drivers/net/vmxnet3/upt1_defs.h
drivers/net/vmxnet3/vmxnet3_defs.h
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/vmxnet3/vmxnet3_ethtool.c
drivers/net/vmxnet3/vmxnet3_int.h
drivers/net/vrf.c
drivers/net/wan/Kconfig
drivers/net/wan/Makefile
drivers/net/wan/c101.c
drivers/net/wan/cosa.c
drivers/net/wan/farsync.c
drivers/net/wan/fsl_ucc_hdlc.c
drivers/net/wan/hdlc.c
drivers/net/wan/hdlc_cisco.c
drivers/net/wan/hdlc_fr.c
drivers/net/wan/hdlc_ppp.c
drivers/net/wan/hdlc_raw.c
drivers/net/wan/hdlc_raw_eth.c
drivers/net/wan/hdlc_x25.c
drivers/net/wan/hostess_sv11.c
drivers/net/wan/ixp4xx_hss.c
drivers/net/wan/lmc/lmc.h
drivers/net/wan/lmc/lmc_main.c
drivers/net/wan/lmc/lmc_proto.c
drivers/net/wan/lmc/lmc_proto.h
drivers/net/wan/n2.c
drivers/net/wan/pc300too.c
drivers/net/wan/pci200syn.c
drivers/net/wan/sbni.c [deleted file]
drivers/net/wan/sbni.h [deleted file]
drivers/net/wan/sealevel.c
drivers/net/wan/wanxl.c
drivers/net/wireless/ath/ath10k/pci.c
drivers/net/wireless/ath/ath11k/dp_rx.c
drivers/net/wireless/ath/ath11k/pci.c
drivers/net/wireless/ath/ath5k/pci.c
drivers/net/wireless/ath/ath6kl/wmi.c
drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
drivers/net/wireless/ath/ath9k/hw.c
drivers/net/wireless/ath/ath9k/pci.c
drivers/net/wireless/ath/wcn36xx/main.c
drivers/net/wireless/ath/wcn36xx/smd.c
drivers/net/wireless/ath/wcn36xx/txrx.c
drivers/net/wireless/ath/wcn36xx/wcn36xx.h
drivers/net/wireless/ath/wil6210/ethtool.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/Makefile
drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h
drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c [new file with mode: 0644]
drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.h [new file with mode: 0644]
drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
drivers/net/wireless/broadcom/brcm80211/include/soc.h
drivers/net/wireless/cisco/airo.c
drivers/net/wireless/intel/ipw2x00/libipw_rx.c
drivers/net/wireless/intel/ipw2x00/libipw_tx.c
drivers/net/wireless/intel/iwlegacy/3945-mac.c
drivers/net/wireless/intel/iwlegacy/3945.c
drivers/net/wireless/intel/iwlegacy/4965-mac.c
drivers/net/wireless/intel/iwlegacy/common.c
drivers/net/wireless/intel/iwlwifi/cfg/22000.c
drivers/net/wireless/intel/iwlwifi/cfg/9000.c
drivers/net/wireless/intel/iwlwifi/dvm/main.c
drivers/net/wireless/intel/iwlwifi/dvm/rx.c
drivers/net/wireless/intel/iwlwifi/fw/acpi.c
drivers/net/wireless/intel/iwlwifi/fw/acpi.h
drivers/net/wireless/intel/iwlwifi/fw/api/coex.h
drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
drivers/net/wireless/intel/iwlwifi/fw/api/location.h
drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
drivers/net/wireless/intel/iwlwifi/fw/api/offload.h
drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
drivers/net/wireless/intel/iwlwifi/fw/dbg.c
drivers/net/wireless/intel/iwlwifi/fw/dbg.h
drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
drivers/net/wireless/intel/iwlwifi/fw/file.h
drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
drivers/net/wireless/intel/iwlwifi/fw/pnvm.h
drivers/net/wireless/intel/iwlwifi/iwl-config.h
drivers/net/wireless/intel/iwlwifi/iwl-csr.h
drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.h
drivers/net/wireless/intel/iwlwifi/iwl-drv.c
drivers/net/wireless/intel/iwlwifi/iwl-io.c
drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
drivers/net/wireless/intel/iwlwifi/iwl-prph.h
drivers/net/wireless/intel/iwlwifi/iwl-trans.h
drivers/net/wireless/intel/iwlwifi/mvm/constants.h
drivers/net/wireless/intel/iwlwifi/mvm/d3.c
drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
drivers/net/wireless/intel/iwlwifi/mvm/fw.c
drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
drivers/net/wireless/intel/iwlwifi/mvm/ops.c
drivers/net/wireless/intel/iwlwifi/mvm/rfi.c
drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
drivers/net/wireless/intel/iwlwifi/mvm/scan.c
drivers/net/wireless/intel/iwlwifi/mvm/sta.c
drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
drivers/net/wireless/intel/iwlwifi/pcie/drv.c
drivers/net/wireless/intel/iwlwifi/pcie/internal.h
drivers/net/wireless/intel/iwlwifi/pcie/rx.c
drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
drivers/net/wireless/intel/iwlwifi/pcie/trans.c
drivers/net/wireless/intersil/Kconfig
drivers/net/wireless/intersil/Makefile
drivers/net/wireless/intersil/hostap/hostap.h
drivers/net/wireless/intersil/hostap/hostap_ioctl.c
drivers/net/wireless/intersil/hostap/hostap_main.c
drivers/net/wireless/intersil/prism54/Makefile [deleted file]
drivers/net/wireless/intersil/prism54/isl_38xx.c [deleted file]
drivers/net/wireless/intersil/prism54/isl_38xx.h [deleted file]
drivers/net/wireless/intersil/prism54/isl_ioctl.c [deleted file]
drivers/net/wireless/intersil/prism54/isl_ioctl.h [deleted file]
drivers/net/wireless/intersil/prism54/isl_oid.h [deleted file]
drivers/net/wireless/intersil/prism54/islpci_dev.c [deleted file]
drivers/net/wireless/intersil/prism54/islpci_dev.h [deleted file]
drivers/net/wireless/intersil/prism54/islpci_eth.c [deleted file]
drivers/net/wireless/intersil/prism54/islpci_eth.h [deleted file]
drivers/net/wireless/intersil/prism54/islpci_hotplug.c [deleted file]
drivers/net/wireless/intersil/prism54/islpci_mgt.c [deleted file]
drivers/net/wireless/intersil/prism54/islpci_mgt.h [deleted file]
drivers/net/wireless/intersil/prism54/oid_mgt.c [deleted file]
drivers/net/wireless/intersil/prism54/oid_mgt.h [deleted file]
drivers/net/wireless/intersil/prism54/prismcompat.h [deleted file]
drivers/net/wireless/marvell/libertas/ethtool.c
drivers/net/wireless/marvell/mwifiex/Makefile
drivers/net/wireless/marvell/mwifiex/cmdevt.c
drivers/net/wireless/marvell/mwifiex/pcie.c
drivers/net/wireless/marvell/mwifiex/pcie.h
drivers/net/wireless/marvell/mwifiex/pcie_quirks.c [new file with mode: 0644]
drivers/net/wireless/marvell/mwifiex/pcie_quirks.h [new file with mode: 0644]
drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
drivers/net/wireless/marvell/mwifiex/usb.h
drivers/net/wireless/microchip/wilc1000/sdio.c
drivers/net/wireless/microchip/wilc1000/spi.c
drivers/net/wireless/microchip/wilc1000/wlan.c
drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c
drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c
drivers/net/wireless/ray_cs.c
drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
drivers/net/wireless/realtek/rtw88/Makefile
drivers/net/wireless/realtek/rtw88/fw.c
drivers/net/wireless/realtek/rtw88/fw.h
drivers/net/wireless/realtek/rtw88/main.c
drivers/net/wireless/realtek/rtw88/main.h
drivers/net/wireless/realtek/rtw88/pci.c
drivers/net/wireless/realtek/rtw88/pci.h
drivers/net/wireless/realtek/rtw88/rtw8822c.c
drivers/net/wireless/realtek/rtw88/tx.c
drivers/net/wireless/realtek/rtw88/wow.c
drivers/net/wireless/rsi/rsi_91x_debugfs.c
drivers/net/wireless/rsi/rsi_91x_hal.c
drivers/net/wireless/rsi/rsi_91x_usb.c
drivers/net/wwan/Kconfig
drivers/net/wwan/Makefile
drivers/net/wwan/iosm/iosm_ipc_pcie.c
drivers/net/wwan/iosm/iosm_ipc_protocol.c
drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
drivers/net/wwan/mhi_wwan_mbim.c [new file with mode: 0644]
drivers/net/wwan/wwan_core.c
drivers/net/xen-netfront.c
drivers/nfc/fdp/fdp.c
drivers/nfc/fdp/fdp.h
drivers/nfc/fdp/i2c.c
drivers/nfc/mei_phy.c
drivers/nfc/mei_phy.h
drivers/nfc/microread/i2c.c
drivers/nfc/microread/mei.c
drivers/nfc/microread/microread.c
drivers/nfc/microread/microread.h
drivers/nfc/nfcmrvl/fw_dnld.c
drivers/nfc/nfcmrvl/i2c.c
drivers/nfc/nfcmrvl/main.c
drivers/nfc/nfcmrvl/nfcmrvl.h
drivers/nfc/nfcmrvl/spi.c
drivers/nfc/nfcmrvl/uart.c
drivers/nfc/nfcmrvl/usb.c
drivers/nfc/nfcsim.c
drivers/nfc/nxp-nci/core.c
drivers/nfc/pn533/pn533.c
drivers/nfc/pn544/i2c.c
drivers/nfc/pn544/pn544.c
drivers/nfc/pn544/pn544.h
drivers/nfc/port100.c
drivers/nfc/s3fwrn5/core.c
drivers/nfc/s3fwrn5/firmware.c
drivers/nfc/s3fwrn5/nci.c
drivers/nfc/s3fwrn5/nci.h
drivers/nfc/st-nci/core.c
drivers/nfc/st-nci/i2c.c
drivers/nfc/st-nci/ndlc.c
drivers/nfc/st-nci/ndlc.h
drivers/nfc/st-nci/spi.c
drivers/nfc/st-nci/vendor_cmds.c
drivers/nfc/st21nfca/core.c
drivers/nfc/st21nfca/i2c.c
drivers/nfc/st21nfca/st21nfca.h
drivers/nfc/st21nfca/vendor_cmds.c
drivers/nfc/st95hf/core.c
drivers/nfc/trf7970a.c
drivers/nfc/virtual_ncidev.c
drivers/pci/pci.h
drivers/pci/pcie/ptm.c
drivers/phy/marvell/phy-mvebu-a3700-comphy.c
drivers/phy/marvell/phy-mvebu-cp110-comphy.c
drivers/ptp/Kconfig
drivers/ptp/ptp_ocp.c
drivers/ptp/ptp_vclock.c
drivers/s390/cio/ccwgroup.c
drivers/s390/net/Kconfig
drivers/s390/net/ctcm_fsms.c
drivers/s390/net/ctcm_mpc.c
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_core_mpc.c
drivers/s390/net/qeth_core_mpc.h
drivers/s390/net/qeth_core_sys.c
drivers/s390/net/qeth_ethtool.c
drivers/s390/net/qeth_l2_main.c
drivers/s390/net/qeth_l3_main.c
drivers/scsi/cxgbi/cxgb4i/Kconfig
drivers/staging/octeon/ethernet.c
drivers/staging/qlge/qlge_ethtool.c
drivers/staging/qlge/qlge_main.c
drivers/staging/rtl8188eu/include/osdep_intf.h
drivers/staging/rtl8188eu/include/rtw_android.h
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
drivers/staging/rtl8188eu/os_dep/os_intfs.c
drivers/staging/rtl8188eu/os_dep/rtw_android.c
drivers/staging/rtl8723bs/include/osdep_intf.h
drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
drivers/staging/rtl8723bs/os_dep/os_intfs.c
drivers/staging/wlan-ng/p80211netdev.c
drivers/tty/synclink_gt.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vhost/net.c
include/asm-generic/compat.h
include/linux/bitops.h
include/linux/bpf-cgroup.h
include/linux/bpf.h
include/linux/bpf_types.h
include/linux/bpf_verifier.h
include/linux/bpfptr.h
include/linux/btf.h
include/linux/btf_ids.h
include/linux/can/bittiming.h
include/linux/can/dev.h
include/linux/can/platform/flexcan.h [new file with mode: 0644]
include/linux/can/rx-offload.h
include/linux/compat.h
include/linux/dsa/8021q.h
include/linux/dsa/sja1105.h
include/linux/ethtool.h
include/linux/filter.h
include/linux/fsl/mc.h
include/linux/genetlink.h
include/linux/hdlc.h
include/linux/hdlcdrv.h
include/linux/ieee80211.h
include/linux/if_bridge.h
include/linux/igmp.h
include/linux/inetdevice.h
include/linux/ioam6.h [new file with mode: 0644]
include/linux/ioam6_genl.h [new file with mode: 0644]
include/linux/ioam6_iptunnel.h [new file with mode: 0644]
include/linux/ipv6.h
include/linux/memcontrol.h
include/linux/mhi.h
include/linux/mii.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/eswitch.h
include/linux/mlx5/fs.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mm_types.h
include/linux/mmc/sdio_ids.h
include/linux/netdevice.h
include/linux/netfilter/x_tables.h
include/linux/netfilter_bridge/ebtables.h
include/linux/pci.h
include/linux/perf_event.h
include/linux/phy.h
include/linux/ptp_clock_kernel.h
include/linux/sched.h
include/linux/skbuff.h
include/linux/socket.h
include/linux/ssb/ssb.h
include/linux/ssb/ssb_driver_extif.h
include/linux/trace_events.h
include/linux/typecheck.h
include/net/Space.h
include/net/act_api.h
include/net/af_unix.h
include/net/ax88796.h
include/net/bluetooth/hci_core.h
include/net/bond_3ad.h
include/net/bond_options.h
include/net/bonding.h
include/net/cfg80211.h
include/net/compat.h
include/net/devlink.h
include/net/dn_fib.h
include/net/dsa.h
include/net/dst.h
include/net/flow_offload.h
include/net/ieee80211_radiotap.h
include/net/if_inet6.h
include/net/inet_hashtables.h
include/net/ioam6.h [new file with mode: 0644]
include/net/ip.h
include/net/ip6_route.h
include/net/ip_fib.h
include/net/ip_tunnels.h
include/net/ipx.h [deleted file]
include/net/lwtunnel.h
include/net/mac80211.h
include/net/mctp.h [new file with mode: 0644]
include/net/mctpdevice.h [new file with mode: 0644]
include/net/mptcp.h
include/net/net_namespace.h
include/net/netfilter/nf_conntrack_ecache.h
include/net/netfilter/nf_hooks_lwtunnel.h [new file with mode: 0644]
include/net/netfilter/nf_queue.h
include/net/netlink.h
include/net/netns/conntrack.h
include/net/netns/ipv4.h
include/net/netns/ipv6.h
include/net/netns/mctp.h [new file with mode: 0644]
include/net/netns/netfilter.h
include/net/netns/x_tables.h [deleted file]
include/net/netns/xfrm.h
include/net/nfc/digital.h
include/net/nfc/hci.h
include/net/nfc/nci_core.h
include/net/nfc/nfc.h
include/net/page_pool.h
include/net/pkt_cls.h
include/net/rtnetlink.h
include/net/sch_generic.h
include/net/sock.h
include/net/switchdev.h
include/net/tcp.h
include/net/xdp.h
include/net/xfrm.h
include/soc/mscc/ocelot.h
include/trace/events/qdisc.h
include/uapi/asm-generic/socket.h
include/uapi/linux/bpf.h
include/uapi/linux/can/j1939.h
include/uapi/linux/ethtool.h
include/uapi/linux/ethtool_netlink.h
include/uapi/linux/if_arp.h
include/uapi/linux/if_bridge.h
include/uapi/linux/if_ether.h
include/uapi/linux/if_link.h
include/uapi/linux/in.h
include/uapi/linux/in6.h
include/uapi/linux/ioam6.h [new file with mode: 0644]
include/uapi/linux/ioam6_genl.h [new file with mode: 0644]
include/uapi/linux/ioam6_iptunnel.h [new file with mode: 0644]
include/uapi/linux/ipv6.h
include/uapi/linux/ipx.h [deleted file]
include/uapi/linux/lwtunnel.h
include/uapi/linux/mctp.h [new file with mode: 0644]
include/uapi/linux/mptcp.h
include/uapi/linux/netfilter/nfnetlink_conntrack.h
include/uapi/linux/nl80211-vnd-intel.h [new file with mode: 0644]
include/uapi/linux/nl80211.h
include/uapi/linux/openvswitch.h
include/uapi/linux/pkt_cls.h
include/uapi/linux/socket.h
include/uapi/linux/tc_act/tc_skbmod.h
include/uapi/linux/xfrm.h
init/main.c
kernel/bpf/Kconfig
kernel/bpf/arraymap.c
kernel/bpf/bpf_iter.c
kernel/bpf/bpf_struct_ops.c
kernel/bpf/bpf_task_storage.c
kernel/bpf/btf.c
kernel/bpf/cgroup.c
kernel/bpf/core.c
kernel/bpf/cpumap.c
kernel/bpf/devmap.c
kernel/bpf/hashtab.c
kernel/bpf/helpers.c
kernel/bpf/local_storage.c
kernel/bpf/map_in_map.c
kernel/bpf/stackmap.c
kernel/bpf/syscall.c
kernel/bpf/task_iter.c
kernel/bpf/trampoline.c
kernel/bpf/verifier.c
kernel/events/core.c
kernel/fork.c
kernel/trace/bpf_trace.c
lib/test_bpf.c
mm/memcontrol.c
net/6lowpan/debugfs.c
net/802/Makefile
net/802/p8023.c [deleted file]
net/8021q/vlan.c
net/8021q/vlan_dev.c
net/Kconfig
net/Makefile
net/appletalk/ddp.c
net/ax25/ax25_ip.c
net/ax25/ax25_out.c
net/ax25/ax25_route.c
net/batman-adv/bat_iv_ogm.c
net/batman-adv/bat_v.c
net/batman-adv/bat_v_elp.c
net/batman-adv/bat_v_ogm.c
net/batman-adv/bridge_loop_avoidance.c
net/batman-adv/distributed-arp-table.c
net/batman-adv/fragmentation.c
net/batman-adv/gateway_client.c
net/batman-adv/gateway_client.h
net/batman-adv/gateway_common.c
net/batman-adv/hard-interface.c
net/batman-adv/hard-interface.h
net/batman-adv/main.h
net/batman-adv/multicast.c
net/batman-adv/netlink.c
net/batman-adv/network-coding.c
net/batman-adv/originator.c
net/batman-adv/originator.h
net/batman-adv/routing.c
net/batman-adv/send.c
net/batman-adv/soft-interface.c
net/batman-adv/soft-interface.h
net/batman-adv/tp_meter.c
net/batman-adv/translation-table.c
net/batman-adv/translation-table.h
net/batman-adv/tvlv.c
net/bluetooth/cmtp/cmtp.h
net/bluetooth/hci_core.c
net/bluetooth/hci_event.c
net/bluetooth/hci_request.c
net/bluetooth/hci_sysfs.c
net/bluetooth/mgmt.c
net/bluetooth/rfcomm/sock.c
net/bluetooth/sco.c
net/bpf/test_run.c
net/bridge/br.c
net/bridge/br_device.c
net/bridge/br_fdb.c
net/bridge/br_forward.c
net/bridge/br_if.c
net/bridge/br_input.c
net/bridge/br_ioctl.c
net/bridge/br_mdb.c
net/bridge/br_multicast.c
net/bridge/br_multicast_eht.c
net/bridge/br_netlink.c
net/bridge/br_private.h
net/bridge/br_private_mcast_eht.h
net/bridge/br_private_tunnel.h
net/bridge/br_switchdev.c
net/bridge/br_sysfs_br.c
net/bridge/br_sysfs_if.c
net/bridge/br_vlan.c
net/bridge/br_vlan_options.c
net/bridge/br_vlan_tunnel.c
net/bridge/netfilter/ebtable_broute.c
net/bridge/netfilter/ebtable_filter.c
net/bridge/netfilter/ebtable_nat.c
net/bridge/netfilter/ebtables.c
net/can/j1939/j1939-priv.h
net/can/j1939/socket.c
net/can/j1939/transport.c
net/can/raw.c
net/core/Makefile
net/core/bpf_sk_storage.c
net/core/dev.c
net/core/dev_addr_lists.c
net/core/dev_ioctl.c
net/core/devlink.c
net/core/drop_monitor.c
net/core/dst.c
net/core/fib_rules.c
net/core/filter.c
net/core/flow_dissector.c
net/core/flow_offload.c
net/core/lwtunnel.c
net/core/neighbour.c
net/core/net-procfs.c
net/core/net_namespace.c
net/core/page_pool.c
net/core/pktgen.c
net/core/ptp_classifier.c
net/core/rtnetlink.c
net/core/scm.c
net/core/selftests.c
net/core/skbuff.c
net/core/sock.c
net/core/sock_map.c
net/dccp/proto.c
net/decnet/dn_dev.c
net/decnet/dn_fib.c
net/decnet/dn_route.c
net/dsa/Kconfig
net/dsa/Makefile
net/dsa/dsa.c
net/dsa/dsa2.c
net/dsa/dsa_priv.h
net/dsa/master.c
net/dsa/port.c
net/dsa/slave.c
net/dsa/switch.c
net/dsa/tag_8021q.c
net/dsa/tag_ar9331.c
net/dsa/tag_brcm.c
net/dsa/tag_dsa.c
net/dsa/tag_gswip.c
net/dsa/tag_hellcreek.c
net/dsa/tag_ksz.c
net/dsa/tag_lan9303.c
net/dsa/tag_mtk.c
net/dsa/tag_ocelot.c
net/dsa/tag_ocelot_8021q.c
net/dsa/tag_qca.c
net/dsa/tag_rtl4_a.c
net/dsa/tag_sja1105.c
net/dsa/tag_trailer.c
net/dsa/tag_xrs700x.c
net/ethernet/eth.c
net/ethtool/coalesce.c
net/ethtool/ioctl.c
net/ethtool/netlink.c
net/ethtool/netlink.h
net/ieee802154/nl-phy.c
net/ieee802154/nl802154.c
net/ieee802154/socket.c
net/ipv4/af_inet.c
net/ipv4/bpf_tcp_ca.c
net/ipv4/devinet.c
net/ipv4/esp4.c
net/ipv4/fib_semantics.c
net/ipv4/fib_trie.c
net/ipv4/fou.c
net/ipv4/icmp.c
net/ipv4/igmp.c
net/ipv4/inet_connection_sock.c
net/ipv4/ip_gre.c
net/ipv4/ip_output.c
net/ipv4/ip_sockglue.c
net/ipv4/ip_tunnel.c
net/ipv4/ip_vti.c
net/ipv4/ipip.c
net/ipv4/netfilter/arptable_filter.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/iptable_filter.c
net/ipv4/netfilter/iptable_mangle.c
net/ipv4/netfilter/iptable_nat.c
net/ipv4/netfilter/iptable_raw.c
net/ipv4/netfilter/iptable_security.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_fastopen.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_output.c
net/ipv4/tcp_recovery.c
net/ipv4/udp.c
net/ipv4/udp_bpf.c
net/ipv4/udp_offload.c
net/ipv6/Kconfig
net/ipv6/Makefile
net/ipv6/addrconf.c
net/ipv6/af_inet6.c
net/ipv6/exthdrs.c
net/ipv6/ioam6.c [new file with mode: 0644]
net/ipv6/ioam6_iptunnel.c [new file with mode: 0644]
net/ipv6/ip6_fib.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ip6_vti.c
net/ipv6/ip6mr.c
net/ipv6/ipv6_sockglue.c
net/ipv6/mcast.c
net/ipv6/ndisc.c
net/ipv6/netfilter/ip6table_filter.c
net/ipv6/netfilter/ip6table_mangle.c
net/ipv6/netfilter/ip6table_nat.c
net/ipv6/netfilter/ip6table_raw.c
net/ipv6/netfilter/ip6table_security.c
net/ipv6/route.c
net/ipv6/seg6_iptunnel.c
net/ipv6/seg6_local.c
net/ipv6/sit.c
net/ipv6/sysctl_net_ipv6.c
net/ipv6/udp.c
net/iucv/af_iucv.c
net/iucv/iucv.c
net/llc/af_llc.c
net/mac80211/cfg.c
net/mac80211/driver-ops.h
net/mac80211/ibss.c
net/mac80211/ieee80211_i.h
net/mac80211/iface.c
net/mac80211/main.c
net/mac80211/rx.c
net/mac80211/s1g.c
net/mac80211/sta_info.c
net/mac80211/status.c
net/mac80211/trace.h
net/mac80211/tx.c
net/mac80211/util.c
net/mctp/Kconfig [new file with mode: 0644]
net/mctp/Makefile [new file with mode: 0644]
net/mctp/af_mctp.c [new file with mode: 0644]
net/mctp/device.c [new file with mode: 0644]
net/mctp/neigh.c [new file with mode: 0644]
net/mctp/route.c [new file with mode: 0644]
net/mpls/af_mpls.c
net/mptcp/ctrl.c
net/mptcp/mib.c
net/mptcp/mib.h
net/mptcp/options.c
net/mptcp/pm.c
net/mptcp/pm_netlink.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/netfilter/Makefile
net/netfilter/nf_conntrack_ecache.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nf_flow_table_core.c
net/netfilter/nf_flow_table_offload.c
net/netfilter/nf_hooks_lwtunnel.c [new file with mode: 0644]
net/netfilter/nf_queue.c
net/netfilter/nf_tables_offload.c
net/netfilter/nfnetlink_queue.c
net/netfilter/nft_compat.c
net/netfilter/x_tables.c
net/netfilter/xt_CT.c
net/netfilter/xt_bpf.c
net/netlabel/netlabel_cipso_v4.c
net/netlabel/netlabel_unlabeled.c
net/netlink/af_netlink.c
net/netlink/genetlink.c
net/netrom/nr_loopback.c
net/netrom/nr_route.c
net/nfc/af_nfc.c
net/nfc/core.c
net/nfc/digital_core.c
net/nfc/hci/core.c
net/nfc/hci/llc.c
net/nfc/hci/llc.h
net/nfc/hci/llc_nop.c
net/nfc/hci/llc_shdlc.c
net/nfc/llcp.h
net/nfc/llcp_commands.c
net/nfc/llcp_core.c
net/nfc/nci/core.c
net/nfc/nci/data.c
net/nfc/nci/hci.c
net/nfc/nci/ntf.c
net/nfc/nci/rsp.c
net/nfc/nci/spi.c
net/nfc/netlink.c
net/nfc/nfc.h
net/nfc/rawsock.c
net/openvswitch/actions.c
net/openvswitch/datapath.c
net/openvswitch/datapath.h
net/packet/af_packet.c
net/phonet/af_phonet.c
net/phonet/pn_dev.c
net/phonet/socket.c
net/qrtr/qrtr.c
net/rxrpc/Kconfig
net/sched/act_api.c
net/sched/act_bpf.c
net/sched/act_connmark.c
net/sched/act_csum.c
net/sched/act_ct.c
net/sched/act_ctinfo.c
net/sched/act_gact.c
net/sched/act_gate.c
net/sched/act_ife.c
net/sched/act_ipt.c
net/sched/act_mirred.c
net/sched/act_mpls.c
net/sched/act_nat.c
net/sched/act_pedit.c
net/sched/act_police.c
net/sched/act_sample.c
net/sched/act_simple.c
net/sched/act_skbedit.c
net/sched/act_skbmod.c
net/sched/act_tunnel_key.c
net/sched/act_vlan.c
net/sched/cls_api.c
net/sched/cls_basic.c
net/sched/cls_bpf.c
net/sched/cls_cgroup.c
net/sched/cls_flow.c
net/sched/cls_flower.c
net/sched/cls_fw.c
net/sched/cls_matchall.c
net/sched/cls_route.c
net/sched/cls_rsvp.h
net/sched/cls_tcindex.c
net/sched/cls_u32.c
net/sched/sch_api.c
net/sched/sch_atm.c
net/sched/sch_cake.c
net/sched/sch_cbq.c
net/sched/sch_drr.c
net/sched/sch_dsmark.c
net/sched/sch_ets.c
net/sched/sch_fq_codel.c
net/sched/sch_fq_pie.c
net/sched/sch_hfsc.c
net/sched/sch_htb.c
net/sched/sch_multiq.c
net/sched/sch_prio.c
net/sched/sch_qfq.c
net/sched/sch_sfb.c
net/sched/sch_sfq.c
net/sched/sch_taprio.c
net/smc/smc_core.c
net/smc/smc_ib.c
net/smc/smc_pnet.c
net/socket.c
net/switchdev/switchdev.c
net/tipc/socket.c
net/unix/Kconfig
net/unix/Makefile
net/unix/af_unix.c
net/unix/unix_bpf.c [new file with mode: 0644]
net/wireless/nl80211.c
net/wireless/radiotap.c
net/wireless/rdev-ops.h
net/wireless/reg.c
net/wireless/scan.c
net/wireless/trace.h
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_user.c
samples/bpf/.gitignore
samples/bpf/Makefile
samples/bpf/Makefile.target
samples/bpf/cookie_uid_helper_example.c
samples/bpf/offwaketime_kern.c
samples/bpf/test_override_return.sh
samples/bpf/tracex4_user.c
samples/bpf/tracex7_user.c
samples/bpf/xdp1_kern.c
samples/bpf/xdp2_kern.c
samples/bpf/xdp_monitor.bpf.c [new file with mode: 0644]
samples/bpf/xdp_monitor_kern.c [deleted file]
samples/bpf/xdp_monitor_user.c
samples/bpf/xdp_redirect.bpf.c [new file with mode: 0644]
samples/bpf/xdp_redirect_cpu.bpf.c [new file with mode: 0644]
samples/bpf/xdp_redirect_cpu_kern.c [deleted file]
samples/bpf/xdp_redirect_cpu_user.c
samples/bpf/xdp_redirect_kern.c [deleted file]
samples/bpf/xdp_redirect_map.bpf.c [new file with mode: 0644]
samples/bpf/xdp_redirect_map_kern.c [deleted file]
samples/bpf/xdp_redirect_map_multi.bpf.c [new file with mode: 0644]
samples/bpf/xdp_redirect_map_multi_kern.c [deleted file]
samples/bpf/xdp_redirect_map_multi_user.c
samples/bpf/xdp_redirect_map_user.c
samples/bpf/xdp_redirect_user.c
samples/bpf/xdp_sample.bpf.c [new file with mode: 0644]
samples/bpf/xdp_sample.bpf.h [new file with mode: 0644]
samples/bpf/xdp_sample_shared.h [new file with mode: 0644]
samples/bpf/xdp_sample_user.c [new file with mode: 0644]
samples/bpf/xdp_sample_user.h [new file with mode: 0644]
samples/bpf/xdpsock_user.c
samples/pktgen/functions.sh
samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
samples/pktgen/pktgen_sample01_simple.sh
samples/pktgen/pktgen_sample02_multiqueue.sh
samples/pktgen/pktgen_sample03_burst_single_flow.sh
samples/pktgen/pktgen_sample04_many_flows.sh
samples/pktgen/pktgen_sample05_flow_per_thread.sh
samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
scripts/bpf_doc.py
security/selinux/hooks.c
security/selinux/include/classmap.h
tools/bpf/bpftool/Documentation/bpftool-btf.rst
tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
tools/bpf/bpftool/Documentation/bpftool-feature.rst
tools/bpf/bpftool/Documentation/bpftool-gen.rst
tools/bpf/bpftool/Documentation/bpftool-iter.rst
tools/bpf/bpftool/Documentation/bpftool-link.rst
tools/bpf/bpftool/Documentation/bpftool-map.rst
tools/bpf/bpftool/Documentation/bpftool-net.rst
tools/bpf/bpftool/Documentation/bpftool-perf.rst
tools/bpf/bpftool/Documentation/bpftool-prog.rst
tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
tools/bpf/bpftool/Documentation/bpftool.rst
tools/bpf/bpftool/bash-completion/bpftool
tools/bpf/bpftool/btf.c
tools/bpf/bpftool/btf_dumper.c
tools/bpf/bpftool/cgroup.c
tools/bpf/bpftool/common.c
tools/bpf/bpftool/feature.c
tools/bpf/bpftool/gen.c
tools/bpf/bpftool/iter.c
tools/bpf/bpftool/link.c
tools/bpf/bpftool/main.c
tools/bpf/bpftool/main.h
tools/bpf/bpftool/map.c
tools/bpf/bpftool/net.c
tools/bpf/bpftool/perf.c
tools/bpf/bpftool/prog.c
tools/bpf/bpftool/struct_ops.c
tools/bpf/resolve_btfids/main.c
tools/include/uapi/linux/bpf.h
tools/include/uapi/linux/ethtool.h
tools/include/uapi/linux/if_link.h
tools/lib/bpf/Build
tools/lib/bpf/Makefile
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/btf.c
tools/lib/bpf/btf.h
tools/lib/bpf/btf_dump.c
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf.map
tools/lib/bpf/libbpf_internal.h
tools/lib/bpf/relo_core.c [new file with mode: 0644]
tools/lib/bpf/relo_core.h [new file with mode: 0644]
tools/perf/util/bpf-event.c
tools/perf/util/bpf_counter.c
tools/testing/selftests/Makefile
tools/testing/selftests/bpf/.gitignore
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/README.rst
tools/testing/selftests/bpf/bpf_tcp_helpers.h
tools/testing/selftests/bpf/netcnt_common.h
tools/testing/selftests/bpf/network_helpers.c
tools/testing/selftests/bpf/network_helpers.h
tools/testing/selftests/bpf/prog_tests/attach_probe.c
tools/testing/selftests/bpf/prog_tests/bpf_cookie.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/bpf_iter.c
tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
tools/testing/selftests/bpf/prog_tests/btf.c
tools/testing/selftests/bpf/prog_tests/btf_dump.c
tools/testing/selftests/bpf/prog_tests/btf_module.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/core_autosize.c
tools/testing/selftests/bpf/prog_tests/core_reloc.c
tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/kfunc_call.c
tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
tools/testing/selftests/bpf/prog_tests/netcnt.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/netns_cookie.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/perf_link.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/pinning.c
tools/testing/selftests/bpf/prog_tests/reference_tracking.c
tools/testing/selftests/bpf/prog_tests/send_signal.c
tools/testing/selftests/bpf/prog_tests/snprintf.c
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/task_pt_regs.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/tc_redirect.c
tools/testing/selftests/bpf/prog_tests/timer.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/timer_mim.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/xdp_bonding.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
tools/testing/selftests/bpf/progs/bpf_dctcp.c
tools/testing/selftests/bpf/progs/bpf_dctcp_release.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_iter.h
tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
tools/testing/selftests/bpf/progs/bpf_iter_unix.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/bpf_tracing_net.h
tools/testing/selftests/bpf/progs/get_func_ip_test.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
tools/testing/selftests/bpf/progs/netcnt_prog.c
tools/testing/selftests/bpf/progs/netns_cookie_prog.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/sockopt_sk.c
tools/testing/selftests/bpf/progs/test_bpf_cookie.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_core_autosize.c
tools/testing/selftests/bpf/progs/test_ksyms_weak.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_perf_link.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
tools/testing/selftests/bpf/progs/test_snprintf.c
tools/testing/selftests/bpf/progs/test_task_pt_regs.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_tc_tunnel.c
tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/timer.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/timer_mim.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/timer_mim_reject.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/xdp_tx.c
tools/testing/selftests/bpf/test_bpftool.sh
tools/testing/selftests/bpf/test_bpftool_build.sh
tools/testing/selftests/bpf/test_bpftool_synctypes.py [new file with mode: 0755]
tools/testing/selftests/bpf/test_doc_build.sh
tools/testing/selftests/bpf/test_maps.c
tools/testing/selftests/bpf/test_netcnt.c [deleted file]
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_progs.h
tools/testing/selftests/bpf/test_tc_tunnel.sh
tools/testing/selftests/bpf/test_xdp_veth.sh
tools/testing/selftests/bpf/test_xsk.sh
tools/testing/selftests/bpf/trace_helpers.c
tools/testing/selftests/bpf/trace_helpers.h
tools/testing/selftests/bpf/xdpxceiver.c
tools/testing/selftests/bpf/xdpxceiver.h
tools/testing/selftests/bpf/xsk_prereqs.sh
tools/testing/selftests/nci/nci_dev.c
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/af_unix/Makefile [new file with mode: 0644]
tools/testing/selftests/net/af_unix/test_unix_oob.c [new file with mode: 0644]
tools/testing/selftests/net/config
tools/testing/selftests/net/fcnal-test.sh
tools/testing/selftests/net/fib_rule_tests.sh
tools/testing/selftests/net/forwarding/devlink_lib.sh
tools/testing/selftests/net/forwarding/lib.sh
tools/testing/selftests/net/forwarding/router_mpath_nh.sh
tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
tools/testing/selftests/net/gro.c [new file with mode: 0644]
tools/testing/selftests/net/gro.sh [new file with mode: 0755]
tools/testing/selftests/net/ioam6.sh [new file with mode: 0755]
tools/testing/selftests/net/ioam6_parser.c [new file with mode: 0644]
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/mptcp/pm_nl_ctl.c
tools/testing/selftests/net/psock_fanout.c
tools/testing/selftests/net/psock_snd.sh
tools/testing/selftests/net/run_afpackettests
tools/testing/selftests/net/setup_loopback.sh [new file with mode: 0755]
tools/testing/selftests/net/setup_veth.sh [new file with mode: 0644]
tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
tools/testing/selftests/net/toeplitz.c [new file with mode: 0644]
tools/testing/selftests/net/toeplitz.sh [new file with mode: 0755]
tools/testing/selftests/net/toeplitz_client.sh [new file with mode: 0755]
tools/testing/selftests/net/unicast_extensions.sh
tools/testing/selftests/net/veth.sh
tools/testing/selftests/net/vrf_strict_mode_test.sh
tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json [new file with mode: 0644]
tools/testing/selftests/tc-testing/tdc_config.py

index a35ae24..6e84911 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -229,6 +229,7 @@ Matthew Wilcox <willy@infradead.org> <mawilcox@microsoft.com>
 Matthew Wilcox <willy@infradead.org> <willy@debian.org>
 Matthew Wilcox <willy@infradead.org> <willy@linux.intel.com>
 Matthew Wilcox <willy@infradead.org> <willy@parisc-linux.org>
+Matthias Fuchs <socketcan@esd.eu> <matthias.fuchs@esd.eu>
 Matthieu CASTET <castet.matthieu@free.fr>
 Matt Ranostay <matt.ranostay@konsulko.com> <matt@ranostay.consulting>
 Matt Ranostay <mranostay@gmail.com> Matthew Ranostay <mranostay@embeddedalley.com>
@@ -341,6 +342,7 @@ Sumit Semwal <sumit.semwal@ti.com>
 Takashi YOSHII <takashi.yoshii.zj@renesas.com>
 Tejun Heo <htejun@gmail.com>
 Thomas Graf <tgraf@suug.ch>
+Thomas Körper <socketcan@esd.eu> <thomas.koerper@esd.eu>
 Thomas Pedersen <twp@codeaurora.org>
 Tiezhu Yang <yangtiezhu@loongson.cn> <kernelpatch@126.com>
 Todor Tomov <todor.too@gmail.com> <todor.tomov@linaro.org>
index bdb2200..ee0569a 100644 (file)
        sa1100ir        [NET]
                        See drivers/net/irda/sa1100_ir.c.
 
-       sbni=           [NET] Granch SBNI12 leased line adapter
-
        sched_verbose   [KNL] Enables verbose scheduler debug messages.
 
        schedstats=     [KNL,X86] Enable or disable scheduled statistics.
index baea6c2..1ceb5d7 100644 (file)
@@ -15,15 +15,7 @@ that goes into great technical depth about the BPF Architecture.
 libbpf
 ======
 
-Libbpf is a userspace library for loading and interacting with bpf programs.
-
-.. toctree::
-   :maxdepth: 1
-
-   libbpf/libbpf
-   libbpf/libbpf_api
-   libbpf/libbpf_build
-   libbpf/libbpf_naming_convention
+Documentation/bpf/libbpf/libbpf.rst is a userspace library for loading and interacting with bpf programs.
 
 BPF Type Format (BTF)
 =====================
diff --git a/Documentation/bpf/libbpf/index.rst b/Documentation/bpf/libbpf/index.rst
new file mode 100644 (file)
index 0000000..4f8adfc
--- /dev/null
@@ -0,0 +1,22 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+libbpf
+======
+
+For API documentation see the `versioned API documentation site <https://libbpf.readthedocs.io/en/latest/api.html>`_.
+
+.. toctree::
+   :maxdepth: 1
+
+   libbpf_naming_convention
+   libbpf_build
+
+This is documentation for libbpf, a userspace library for loading and
+interacting with bpf programs.
+
+All general BPF questions, including kernel functionality, libbpf APIs and
+their application, should be sent to bpf@vger.kernel.org mailing list.
+You can `subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the
+mailing list search its `archive <https://lore.kernel.org/bpf/>`_.
+Please search the archive before asking new questions. It very well might
+be that this was already addressed or answered before.
diff --git a/Documentation/bpf/libbpf/libbpf.rst b/Documentation/bpf/libbpf/libbpf.rst
deleted file mode 100644 (file)
index 1b1e61d..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-libbpf
-======
-
-This is documentation for libbpf, a userspace library for loading and
-interacting with bpf programs.
-
-All general BPF questions, including kernel functionality, libbpf APIs and
-their application, should be sent to bpf@vger.kernel.org mailing list.
-You can `subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the
-mailing list search its `archive <https://lore.kernel.org/bpf/>`_.
-Please search the archive before asking new questions. It very well might
-be that this was already addressed or answered before.
diff --git a/Documentation/bpf/libbpf/libbpf_api.rst b/Documentation/bpf/libbpf/libbpf_api.rst
deleted file mode 100644 (file)
index f07eecd..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-API
-===
-
-This documentation is autogenerated from header files in libbpf, tools/lib/bpf
-
-.. kernel-doc:: tools/lib/bpf/libbpf.h
-   :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf.h
-   :internal:
-
-.. kernel-doc:: tools/lib/bpf/btf.h
-   :internal:
-
-.. kernel-doc:: tools/lib/bpf/xsk.h
-   :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf_tracing.h
-   :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf_core_read.h
-   :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf_endian.h
-   :internal:
\ No newline at end of file
index 6bf9c5a..9c68d50 100644 (file)
@@ -69,7 +69,7 @@ functions. These can be mixed and matched. Note that these functions
 are not reentrant for performance reasons.
 
 ABI
-==========
+---
 
 libbpf can be both linked statically or used as DSO. To avoid possible
 conflicts with other libraries an application is linked with, all
diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt
deleted file mode 100644 (file)
index e15589f..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-* Broadcom UniMAC MDIO bus controller
-
-Required properties:
-- compatible: should one from "brcm,genet-mdio-v1", "brcm,genet-mdio-v2",
-  "brcm,genet-mdio-v3", "brcm,genet-mdio-v4", "brcm,genet-mdio-v5" or
-  "brcm,unimac-mdio"
-- reg: address and length of the register set for the device, first one is the
-  base register, and the second one is optional and for indirect accesses to
-  larger than 16-bits MDIO transactions
-- reg-names: name(s) of the register must be "mdio" and optional "mdio_indir_rw"
-- #size-cells: must be 1
-- #address-cells: must be 0
-
-Optional properties:
-- interrupts: must be one if the interrupt is shared with the Ethernet MAC or
-  Ethernet switch this MDIO block is integrated from, or must be two, if there
-  are two separate interrupts, first one must be "mdio done" and second must be
-  for "mdio error"
-- interrupt-names: must be "mdio_done_error" when there is a share interrupt fed
-  to this hardware block, or must be "mdio_done" for the first interrupt and
-  "mdio_error" for the second when there are separate interrupts
-- clocks: A reference to the clock supplying the MDIO bus controller
-- clock-frequency: the MDIO bus clock that must be output by the MDIO bus
-  hardware, if absent, the default hardware values are used
-
-Child nodes of this MDIO bus controller node are standard Ethernet PHY device
-nodes as described in Documentation/devicetree/bindings/net/phy.txt
-
-Example:
-
-mdio@403c0 {
-       compatible = "brcm,unimac-mdio";
-       reg = <0x403c0 0x8 0x40300 0x18>;
-       reg-names = "mdio", "mdio_indir_rw";
-       #size-cells = <1>;
-       #address-cells = <0>;
-
-       ...
-       phy@0 {
-               compatible = "ethernet-phy-ieee802.3-c22";
-               reg = <0>;
-       };
-};
diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.yaml
new file mode 100644 (file)
index 0000000..f4f4c37
--- /dev/null
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/brcm,unimac-mdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom UniMAC MDIO bus controller
+
+maintainers:
+  - Rafał Miłecki <rafal@milecki.pl>
+
+allOf:
+  - $ref: mdio.yaml#
+
+properties:
+  compatible:
+    enum:
+      - brcm,genet-mdio-v1
+      - brcm,genet-mdio-v2
+      - brcm,genet-mdio-v3
+      - brcm,genet-mdio-v4
+      - brcm,genet-mdio-v5
+      - brcm,unimac-mdio
+
+  reg:
+    minItems: 1
+    items:
+      - description: base register
+      - description: indirect accesses to larger than 16-bits MDIO transactions
+
+  reg-names:
+    minItems: 1
+    items:
+      - const: mdio
+      - const: mdio_indir_rw
+
+  interrupts:
+    oneOf:
+      - description: >
+          Interrupt shared with the Ethernet MAC or Ethernet switch this MDIO
+          block is integrated from
+      - items:
+          - description: |
+              "mdio done" interrupt
+          - description: |
+              "mdio error" interrupt
+
+  interrupt-names:
+    oneOf:
+      - const: mdio_done_error
+      - items:
+          - const: mdio_done
+          - const: mdio_error
+
+  clocks:
+    description: A reference to the clock supplying the MDIO bus controller
+
+  clock-frequency:
+    description: >
+      The MDIO bus clock that must be output by the MDIO bus hardware, if
+      absent, the default hardware values are used
+
+unevaluatedProperties: false
+
+required:
+  - reg
+  - reg-names
+  - '#address-cells'
+  - '#size-cells'
+
+examples:
+  - |
+    mdio@403c0 {
+        compatible = "brcm,unimac-mdio";
+        reg = <0x403c0 0x8>, <0x40300 0x18>;
+        reg-names = "mdio", "mdio_indir_rw";
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        ethernet-phy@0 {
+            compatible = "ethernet-phy-ieee802.3-c22";
+            reg = <0>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
new file mode 100644 (file)
index 0000000..2cd145a
--- /dev/null
@@ -0,0 +1,119 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/can/bosch,c_can.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Bosch C_CAN/D_CAN controller Device Tree Bindings
+
+description: Bosch C_CAN/D_CAN controller for CAN bus
+
+maintainers:
+  - Dario Binacchi <dariobin@libero.it>
+
+allOf:
+  - $ref: can-controller.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - bosch,c_can
+          - bosch,d_can
+          - ti,dra7-d_can
+          - ti,am3352-d_can
+      - items:
+          - enum:
+              - ti,am4372-d_can
+          - const: ti,am3352-d_can
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 4
+
+  power-domains:
+    description: |
+      Should contain a phandle to a PM domain provider node and an args
+      specifier containing the DCAN device id value. It's mandatory for
+      Keystone 2 66AK2G SoCs only.
+    maxItems: 1
+
+  clocks:
+    description: |
+      CAN functional clock phandle.
+    maxItems: 1
+
+  clock-names:
+    maxItems: 1
+
+  syscon-raminit:
+    description: |
+      Handle to system control region that contains the RAMINIT register,
+      register offset to the RAMINIT register and the CAN instance number (0
+      offset).
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    items:
+      items:
+        - description: The phandle to the system control region.
+        - description: The register offset.
+        - description: The CAN instance number.
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+if:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - bosch,d_can
+
+then:
+  properties:
+    interrupts:
+      minItems: 4
+      maxItems: 4
+      items:
+        - description: Error and status IRQ
+        - description: Message object IRQ
+        - description: RAM ECC correctable error IRQ
+        - description: RAM ECC non-correctable error IRQ
+
+else:
+  properties:
+    interrupts:
+      maxItems: 1
+      items:
+        - description: Error and status IRQ
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/reset/altr,rst-mgr.h>
+
+    can@ffc00000 {
+       compatible = "bosch,d_can";
+       reg = <0xffc00000 0x1000>;
+       interrupts = <0 131 4>, <0 132 4>, <0 133 4>, <0 134 4>;
+       clocks = <&can0_clk>;
+       resets = <&rst CAN0_RESET>;
+    };
+  - |
+    can@0 {
+        compatible = "ti,am3352-d_can";
+        reg = <0x0 0x2000>;
+        clocks = <&dcan1_fck>;
+        clock-names = "fck";
+        syscon-raminit = <&scm_conf 0x644 1>;
+        interrupts = <55>;
+    };
index f84e313..fb547e2 100644 (file)
@@ -104,9 +104,18 @@ properties:
           maximum: 32
     maxItems: 1
 
+  power-domains:
+    description:
+      Power domain provider node and an args specifier containing
+      the can device id value.
+    maxItems: 1
+
   can-transceiver:
     $ref: can-transceiver.yaml#
 
+  phys:
+    maxItems: 1
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/net/can/c_can.txt b/Documentation/devicetree/bindings/net/can/c_can.txt
deleted file mode 100644 (file)
index 3664798..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-Bosch C_CAN/D_CAN controller Device Tree Bindings
--------------------------------------------------
-
-Required properties:
-- compatible           : Should be "bosch,c_can" for C_CAN controllers and
-                         "bosch,d_can" for D_CAN controllers.
-                         Can be "ti,dra7-d_can", "ti,am3352-d_can" or
-                         "ti,am4372-d_can".
-- reg                  : physical base address and size of the C_CAN/D_CAN
-                         registers map
-- interrupts           : property with a value describing the interrupt
-                         number
-
-The following are mandatory properties for DRA7x, AM33xx and AM43xx SoCs only:
-- ti,hwmods            : Must be "d_can<n>" or "c_can<n>", n being the
-                         instance number
-
-The following are mandatory properties for Keystone 2 66AK2G SoCs only:
-- power-domains                : Should contain a phandle to a PM domain provider node
-                         and an args specifier containing the DCAN device id
-                         value. This property is as per the binding,
-                         Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml
-- clocks               : CAN functional clock phandle. This property is as per the
-                         binding,
-                         Documentation/devicetree/bindings/clock/ti,sci-clk.yaml
-
-Optional properties:
-- syscon-raminit       : Handle to system control region that contains the
-                         RAMINIT register, register offset to the RAMINIT
-                         register and the CAN instance number (0 offset).
-
-Note: "ti,hwmods" field is used to fetch the base address and irq
-resources from TI, omap hwmod data base during device registration.
-Future plan is to migrate hwmod data base contents into device tree
-blob so that, all the required data will be used from device tree dts
-file.
-
-Example:
-
-Step1: SoC common .dtsi file
-
-       dcan1: d_can@481d0000 {
-               compatible = "bosch,d_can";
-               reg = <0x481d0000 0x2000>;
-               interrupts = <55>;
-               interrupt-parent = <&intc>;
-               status = "disabled";
-       };
-
-(or)
-
-       dcan1: d_can@481d0000 {
-               compatible = "bosch,d_can";
-               ti,hwmods = "d_can1";
-               reg = <0x481d0000 0x2000>;
-               interrupts = <55>;
-               interrupt-parent = <&intc>;
-               status = "disabled";
-       };
-
-Step 2: board specific .dts file
-
-       &dcan1 {
-               status = "okay";
-       };
index 9cf2ae0..1f0e980 100644 (file)
@@ -13,6 +13,15 @@ properties:
   $nodename:
     pattern: "^can(@.*)?$"
 
+  termination-gpios:
+    description: GPIO pin to enable CAN bus termination.
+    maxItems: 1
+
+  termination-ohms:
+    description: The resistance value of the CAN bus termination resistor.
+    minimum: 1
+    maximum: 65535
+
 additionalProperties: true
 
 ...
index 55bff15..3f0ee17 100644 (file)
@@ -119,6 +119,9 @@ properties:
     minimum: 0
     maximum: 2
 
+  termination-gpios: true
+  termination-ohms: true
+
 required:
   - compatible
   - reg
@@ -148,3 +151,17 @@ examples:
         fsl,stop-mode = <&gpr 0x34 28>;
         fsl,scu-index = /bits/ 8 <1>;
     };
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/gpio/gpio.h>
+
+    can@2090000 {
+        compatible = "fsl,imx6q-flexcan";
+        reg = <0x02090000 0x4000>;
+        interrupts = <0 110 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&clks 1>, <&clks 2>;
+        clock-names = "ipg", "per";
+        fsl,stop-mode = <&gpr 0x34 28>;
+        termination-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>;
+        termination-ohms = <120>;
+    };
index 0b33ba9..546c6e6 100644 (file)
@@ -30,13 +30,15 @@ properties:
               - renesas,r8a77995-canfd     # R-Car D3
           - const: renesas,rcar-gen3-canfd # R-Car Gen3 and RZ/G2
 
+      - items:
+          - enum:
+              - renesas,r9a07g044-canfd    # RZ/G2{L,LC}
+          - const: renesas,rzg2l-canfd     # RZ/G2L family
+
   reg:
     maxItems: 1
 
-  interrupts:
-    items:
-      - description: Channel interrupt
-      - description: Global interrupt
+  interrupts: true
 
   clocks:
     maxItems: 3
@@ -50,8 +52,7 @@ properties:
   power-domains:
     maxItems: 1
 
-  resets:
-    maxItems: 1
+  resets: true
 
   renesas,no-can-fd:
     $ref: /schemas/types.yaml#/definitions/flag
@@ -91,6 +92,62 @@ required:
   - channel0
   - channel1
 
+if:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - renesas,rzg2l-canfd
+then:
+  properties:
+    interrupts:
+      items:
+        - description: CAN global error interrupt
+        - description: CAN receive FIFO interrupt
+        - description: CAN0 error interrupt
+        - description: CAN0 transmit interrupt
+        - description: CAN0 transmit/receive FIFO receive completion interrupt
+        - description: CAN1 error interrupt
+        - description: CAN1 transmit interrupt
+        - description: CAN1 transmit/receive FIFO receive completion interrupt
+
+    interrupt-names:
+      items:
+        - const: g_err
+        - const: g_recc
+        - const: ch0_err
+        - const: ch0_rec
+        - const: ch0_trx
+        - const: ch1_err
+        - const: ch1_rec
+        - const: ch1_trx
+
+    resets:
+      maxItems: 2
+
+    reset-names:
+      items:
+        - const: rstp_n
+        - const: rstc_n
+
+  required:
+    - interrupt-names
+    - reset-names
+else:
+  properties:
+    interrupts:
+      items:
+        - description: Channel interrupt
+        - description: Global interrupt
+
+    interrupt-names:
+      items:
+        - const: ch_int
+        - const: g_int
+
+    resets:
+      maxItems: 1
+
 unevaluatedProperties: false
 
 examples:
diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml
new file mode 100644 (file)
index 0000000..eca4144
--- /dev/null
@@ -0,0 +1,244 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/fsl,fec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale Fast Ethernet Controller (FEC)
+
+maintainers:
+  - Joakim Zhang <qiangqing.zhang@nxp.com>
+
+allOf:
+  - $ref: ethernet-controller.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - fsl,imx25-fec
+          - fsl,imx27-fec
+          - fsl,imx28-fec
+          - fsl,imx6q-fec
+          - fsl,mvf600-fec
+      - items:
+          - enum:
+              - fsl,imx53-fec
+              - fsl,imx6sl-fec
+          - const: fsl,imx25-fec
+      - items:
+          - enum:
+              - fsl,imx35-fec
+              - fsl,imx51-fec
+          - const: fsl,imx27-fec
+      - items:
+          - enum:
+              - fsl,imx6ul-fec
+              - fsl,imx6sx-fec
+          - const: fsl,imx6q-fec
+      - items:
+          - enum:
+              - fsl,imx7d-fec
+          - const: fsl,imx6sx-fec
+      - items:
+          - const: fsl,imx8mq-fec
+          - const: fsl,imx6sx-fec
+      - items:
+          - enum:
+              - fsl,imx8mm-fec
+              - fsl,imx8mn-fec
+              - fsl,imx8mp-fec
+          - const: fsl,imx8mq-fec
+          - const: fsl,imx6sx-fec
+      - items:
+          - const: fsl,imx8qm-fec
+          - const: fsl,imx6sx-fec
+      - items:
+          - enum:
+              - fsl,imx8qxp-fec
+          - const: fsl,imx8qm-fec
+          - const: fsl,imx6sx-fec
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 4
+
+  interrupt-names:
+    oneOf:
+      - items:
+          - const: int0
+      - items:
+          - const: int0
+          - const: pps
+      - items:
+          - const: int0
+          - const: int1
+          - const: int2
+      - items:
+          - const: int0
+          - const: int1
+          - const: int2
+          - const: pps
+
+  clocks:
+    minItems: 2
+    maxItems: 5
+    description:
+      The "ipg", for MAC ipg_clk_s, ipg_clk_mac_s that are for register accessing.
+      The "ahb", for MAC ipg_clk, ipg_clk_mac that are bus clock.
+      The "ptp"(option), for IEEE1588 timer clock that requires the clock.
+      The "enet_clk_ref"(option), for MAC transmit/receiver reference clock like
+      RGMII TXC clock or RMII reference clock. It depends on board design,
+      the clock is required if RGMII TXC and RMII reference clock source from
+      SOC internal PLL.
+      The "enet_out"(option), output clock for external device, like supply clock
+      for PHY. The clock is required if PHY clock source from SOC.
+      The "enet_2x_txclk"(option), for RGMII sampling clock which fixed at 250Mhz.
+      The clock is required if SoC RGMII enable clock delay.
+
+  clock-names:
+    minItems: 2
+    maxItems: 5
+    items:
+      enum:
+        - ipg
+        - ahb
+        - ptp
+        - enet_clk_ref
+        - enet_out
+        - enet_2x_txclk
+
+  phy-mode: true
+
+  phy-handle: true
+
+  fixed-link: true
+
+  local-mac-address: true
+
+  mac-address: true
+
+  tx-internal-delay-ps:
+    enum: [0, 2000]
+
+  rx-internal-delay-ps:
+    enum: [0, 2000]
+
+  phy-supply:
+    description:
+      Regulator that powers the Ethernet PHY.
+
+  fsl,num-tx-queues:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      The property is valid for enet-avb IP, which supports hw multi queues.
+      Should specify the tx queue number, otherwise set tx queue number to 1.
+    enum: [1, 2, 3]
+
+  fsl,num-rx-queues:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      The property is valid for enet-avb IP, which supports hw multi queues.
+      Should specify the rx queue number, otherwise set rx queue number to 1.
+    enum: [1, 2, 3]
+
+  fsl,magic-packet:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      If present, indicates that the hardware supports waking up via magic packet.
+
+  fsl,err006687-workaround-present:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      If present indicates that the system has the hardware workaround for
+      ERR006687 applied and does not need a software workaround.
+
+  fsl,stop-mode:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    description:
+      Register bits of stop mode control, the format is <&gpr req_gpr req_bit>.
+      gpr is the phandle to general purpose register node.
+      req_gpr is the gpr register offset for ENET stop request.
+      req_bit is the gpr bit offset for ENET stop request.
+
+  mdio:
+    type: object
+    description:
+      Specifies the mdio bus in the FEC, used as a container for phy nodes.
+
+  # Deprecated optional properties:
+  # To avoid these, create a phy node according to ethernet-phy.yaml in the same
+  # directory, and point the FEC's "phy-handle" property to it. Then use
+  # the phy's reset binding, again described by ethernet-phy.yaml.
+
+  phy-reset-gpios:
+    deprecated: true
+    description:
+      Should specify the gpio for phy reset.
+
+  phy-reset-duration:
+    deprecated: true
+    description:
+      Reset duration in milliseconds.  Should present only if property
+      "phy-reset-gpios" is available.  Missing the property will have the
+      duration be 1 millisecond.  Numbers greater than 1000 are invalid
+      and 1 millisecond will be used instead.
+
+  phy-reset-active-high:
+    deprecated: true
+    description:
+      If present then the reset sequence using the GPIO specified in the
+      "phy-reset-gpios" property is reversed (H=reset state, L=operation state).
+
+  phy-reset-post-delay:
+    deprecated: true
+    description:
+      Post reset delay in milliseconds. If present then a delay of phy-reset-post-delay
+      milliseconds will be observed after the phy-reset-gpios has been toggled.
+      Can be omitted thus no delay is observed. Delay is in range of 1ms to 1000ms.
+      Other delays are invalid.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+# FIXME: We had better set additionalProperties to false to avoid invalid or at
+# least undocumented properties. However, PHY may have a deprecated option to
+# place PHY OF properties in the MAC node, such as Micrel PHY, and we can find
+# these boards which is based on i.MX6QDL.
+additionalProperties: false
+
+examples:
+  - |
+    ethernet@83fec000 {
+      compatible = "fsl,imx51-fec", "fsl,imx27-fec";
+      reg = <0x83fec000 0x4000>;
+      interrupts = <87>;
+      phy-mode = "mii";
+      phy-reset-gpios = <&gpio2 14 0>;
+      phy-supply = <&reg_fec_supply>;
+    };
+
+    ethernet@83fed000 {
+      compatible = "fsl,imx51-fec", "fsl,imx27-fec";
+      reg = <0x83fed000 0x4000>;
+      interrupts = <87>;
+      phy-mode = "mii";
+      phy-reset-gpios = <&gpio2 14 0>;
+      phy-supply = <&reg_fec_supply>;
+      phy-handle = <&ethphy0>;
+
+      mdio {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        ethphy0: ethernet-phy@0 {
+          compatible = "ethernet-phy-ieee802.3-c22";
+          reg = <0>;
+        };
+      };
+    };
diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
deleted file mode 100644 (file)
index 9b54378..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-* Freescale Fast Ethernet Controller (FEC)
-
-Required properties:
-- compatible : Should be "fsl,<soc>-fec"
-- reg : Address and length of the register set for the device
-- interrupts : Should contain fec interrupt
-- phy-mode : See ethernet.txt file in the same directory
-
-Optional properties:
-- phy-supply : regulator that powers the Ethernet PHY.
-- phy-handle : phandle to the PHY device connected to this device.
-- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
-  Use instead of phy-handle.
-- fsl,num-tx-queues : The property is valid for enet-avb IP, which supports
-  hw multi queues. Should specify the tx queue number, otherwise set tx queue
-  number to 1.
-- fsl,num-rx-queues : The property is valid for enet-avb IP, which supports
-  hw multi queues. Should specify the rx queue number, otherwise set rx queue
-  number to 1.
-- fsl,magic-packet : If present, indicates that the hardware supports waking
-  up via magic packet.
-- fsl,err006687-workaround-present: If present indicates that the system has
-  the hardware workaround for ERR006687 applied and does not need a software
-  workaround.
-- fsl,stop-mode: register bits of stop mode control, the format is
-                <&gpr req_gpr req_bit>.
-                gpr is the phandle to general purpose register node.
-                req_gpr is the gpr register offset for ENET stop request.
-                req_bit is the gpr bit offset for ENET stop request.
- -interrupt-names:  names of the interrupts listed in interrupts property in
-  the same order. The defaults if not specified are
-  __Number of interrupts__   __Default__
-       1                       "int0"
-       2                       "int0", "pps"
-       3                       "int0", "int1", "int2"
-       4                       "int0", "int1", "int2", "pps"
-  The order may be changed as long as they correspond to the interrupts
-  property. Currently, only i.mx7 uses "int1" and "int2". They correspond to
-  tx/rx queues 1 and 2. "int0" will be used for queue 0 and ENET_MII interrupts.
-  For imx6sx, "int0" handles all 3 queues and ENET_MII. "pps" is for the pulse
-  per second interrupt associated with 1588 precision time protocol(PTP).
-
-Optional subnodes:
-- mdio : specifies the mdio bus in the FEC, used as a container for phy nodes
-  according to phy.txt in the same directory
-
-Deprecated optional properties:
-       To avoid these, create a phy node according to phy.txt in the same
-       directory, and point the fec's "phy-handle" property to it. Then use
-       the phy's reset binding, again described by phy.txt.
-- phy-reset-gpios : Should specify the gpio for phy reset
-- phy-reset-duration : Reset duration in milliseconds.  Should present
-  only if property "phy-reset-gpios" is available.  Missing the property
-  will have the duration be 1 millisecond.  Numbers greater than 1000 are
-  invalid and 1 millisecond will be used instead.
-- phy-reset-active-high : If present then the reset sequence using the GPIO
-  specified in the "phy-reset-gpios" property is reversed (H=reset state,
-  L=operation state).
-- phy-reset-post-delay : Post reset delay in milliseconds. If present then
-  a delay of phy-reset-post-delay milliseconds will be observed after the
-  phy-reset-gpios has been toggled. Can be omitted thus no delay is
-  observed. Delay is in range of 1ms to 1000ms. Other delays are invalid.
-
-Example:
-
-ethernet@83fec000 {
-       compatible = "fsl,imx51-fec", "fsl,imx27-fec";
-       reg = <0x83fec000 0x4000>;
-       interrupts = <87>;
-       phy-mode = "mii";
-       phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */
-       local-mac-address = [00 04 9F 01 1B B9];
-       phy-supply = <&reg_fec_supply>;
-};
-
-Example with phy specified:
-
-ethernet@83fec000 {
-       compatible = "fsl,imx51-fec", "fsl,imx27-fec";
-       reg = <0x83fec000 0x4000>;
-       interrupts = <87>;
-       phy-mode = "mii";
-       phy-reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>; /* GPIO2_14 */
-       local-mac-address = [00 04 9F 01 1B B9];
-       phy-supply = <&reg_fec_supply>;
-       phy-handle = <&ethphy>;
-       mdio {
-               clock-frequency = <5000000>;
-               ethphy: ethernet-phy@6 {
-                       compatible = "ethernet-phy-ieee802.3-c22";
-                       reg = <6>;
-                       max-speed = <100>;
-               };
-       };
-};
diff --git a/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml b/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml
new file mode 100644 (file)
index 0000000..8b9b3f9
--- /dev/null
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2018 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/net/intel,ixp46x-ptp-timer.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP46x PTP Timer (TSYNC)
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+  The Intel IXP46x PTP timer is known in the manual as IEEE1588 Hardware
+  Assist and Time Synchronization Hardware Assist TSYNC provides a PTP
+  timer. It exists in the Intel IXP45x and IXP46x XScale SoCs.
+
+properties:
+  compatible:
+    const: intel,ixp46x-ptp-timer
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: Interrupt to trigger master mode snapshot from the
+          PRP timer, usually a GPIO interrupt.
+      - description: Interrupt to trigger slave mode snapshot from the
+          PRP timer, usually a GPIO interrupt.
+
+  interrupt-names:
+    items:
+      - const: master
+      - const: slave
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    ptp-timer@c8010000 {
+        compatible = "intel,ixp46x-ptp-timer";
+        reg = <0xc8010000 0x1000>;
+        interrupt-parent = <&gpio0>;
+        interrupts = <8 IRQ_TYPE_EDGE_FALLING>, <7 IRQ_TYPE_EDGE_FALLING>;
+        interrupt-names = "master", "slave";
+    };
diff --git a/Documentation/devicetree/bindings/net/litex,liteeth.yaml b/Documentation/devicetree/bindings/net/litex,liteeth.yaml
new file mode 100644 (file)
index 0000000..76c164a
--- /dev/null
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0-or-later OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/litex,liteeth.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: LiteX LiteETH ethernet device
+
+maintainers:
+  - Joel Stanley <joel@jms.id.au>
+
+description: |
+  LiteETH is a small footprint and configurable Ethernet core for FPGA based
+  system on chips.
+
+  The hardware source is Open Source and can be found on at
+  https://github.com/enjoy-digital/liteeth/.
+
+allOf:
+  - $ref: ethernet-controller.yaml#
+
+properties:
+  compatible:
+    const: litex,liteeth
+
+  reg:
+    items:
+      - description: MAC registers
+      - description: MDIO registers
+      - description: Packet buffer
+
+  reg-names:
+    items:
+      - const: mac
+      - const: mdio
+      - const: buffer
+
+  interrupts:
+    maxItems: 1
+
+  litex,rx-slots:
+    description: Number of slots in the receive buffer
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1
+    default: 2
+
+  litex,tx-slots:
+    description: Number of slots in the transmit buffer
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1
+    default: 2
+
+  litex,slot-size:
+    description: Size in bytes of a slot in the tx/rx buffer
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0x800
+    default: 0x800
+
+  mac-address: true
+  local-mac-address: true
+  phy-handle: true
+
+  mdio:
+    $ref: mdio.yaml#
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    mac: ethernet@8020000 {
+        compatible = "litex,liteeth";
+        reg = <0x8021000 0x100>,
+              <0x8020800 0x100>,
+              <0x8030000 0x2000>;
+        reg-names = "mac", "mdio", "buffer";
+        litex,rx-slots = <2>;
+        litex,tx-slots = <2>;
+        litex,slot-size = <0x800>;
+        interrupts = <0x11 0x1>;
+        phy-handle = <&eth_phy>;
+
+        mdio {
+          #address-cells = <1>;
+          #size-cells = <0>;
+
+          eth_phy: ethernet-phy@0 {
+            reg = <0>;
+          };
+        };
+    };
+...
+
+#  vim: set ts=2 sw=2 sts=2 tw=80 et cc=80 ft=yaml :
index a4d547e..af9df2f 100644 (file)
@@ -8,6 +8,7 @@ Required properties:
   Use "cdns,np4-macb" for NP4 SoC devices.
   Use "cdns,at32ap7000-macb" for other 10/100 usage or use the generic form: "cdns,macb".
   Use "atmel,sama5d2-gem" for the GEM IP (10/100) available on Atmel sama5d2 SoCs.
+  Use "atmel,sama5d29-gem" for GEM XL IP (10/100) available on Atmel sama5d29 SoCs.
   Use "atmel,sama5d3-macb" for the 10/100Mbit IP available on Atmel sama5d3 SoCs.
   Use "atmel,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs.
   Use "atmel,sama5d4-gem" for the GEM IP (10/100) available on Atmel sama5d4 SoCs.
index ed88ba4..b8a0b39 100644 (file)
@@ -87,16 +87,24 @@ properties:
       - const: ipa-setup-ready
 
   interconnects:
-    items:
-      - description: Interconnect path between IPA and main memory
-      - description: Interconnect path between IPA and internal memory
-      - description: Interconnect path between IPA and the AP subsystem
+    oneOf:
+      - items:
+          - description: Path leading to system memory
+          - description: Path between the AP and IPA config space
+      - items:
+          - description: Path leading to system memory
+          - description: Path leading to internal memory
+          - description: Path between the AP and IPA config space
 
   interconnect-names:
-    items:
-      - const: memory
-      - const: imem
-      - const: config
+    oneOf:
+      - items:
+          - const: memory
+          - const: config
+      - items:
+          - const: memory
+          - const: imem
+          - const: config
 
   qcom,smem-states:
     $ref: /schemas/types.yaml#/definitions/phandle-array
index 0c97331..2af3043 100644 (file)
@@ -14,7 +14,9 @@ allOf:
 
 properties:
   compatible:
-    const: qcom,ipq4019-mdio
+    enum:
+      - qcom,ipq4019-mdio
+      - qcom,ipq5018-mdio
 
   "#address-cells":
     const: 1
@@ -23,7 +25,18 @@ properties:
     const: 0
 
   reg:
+    minItems: 1
+    maxItems: 2
+    description:
+      the first Address and length of the register set for the MDIO controller.
+      the second Address and length of the register for ethernet LDO, this second
+      address range is only required by the platform IPQ50xx.
+
+  clocks:
     maxItems: 1
+    description: |
+      MDIO clock source frequency fixed to 100MHZ, this clock should be specified
+      by the platform IPQ807x, IPQ60xx and IPQ50xx.
 
 required:
   - compatible
index eb8a1a1..f10fe53 100644 (file)
@@ -181,7 +181,7 @@ xmit_from_hci():
 The llc must be registered with nfc before it can be used. Do that by
 calling::
 
-       nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
+       nfc_llc_register(const char *name, const struct nfc_llc_ops *ops);
 
 Again, note that the llc does not handle the physical link. It is thus very
 easy to mix any physical link with any llc for a given chip driver.
index 74821d2..b85563e 100644 (file)
@@ -157,7 +157,7 @@ Contact
 Please send us comments, experiences, questions, anything :)
 
 IRC:
-  #batman on irc.freenode.org
+  #batadv on ircs://irc.hackint.org/
 Mailing-list:
   b.a.t.m.a.n@open-mesh.org (optional subscription at
   https://lists.open-mesh.org/mailman3/postorius/lists/b.a.t.m.a.n.lists.open-mesh.org/)
index 62f2aab..31cfd7d 100644 (file)
@@ -501,6 +501,18 @@ fail_over_mac
        This option was added in bonding version 3.2.0.  The "follow"
        policy was added in bonding version 3.3.0.
 
+lacp_active
+       Option specifying whether to send LACPDU frames periodically.
+
+       off or 0
+               LACPDU frames acts as "speak when spoken to".
+
+       on or 1
+               LACPDU frames are sent along the configured links
+               periodically. See lacp_rate for more details.
+
+       The default is on.
+
 lacp_rate
 
        Option specifying the rate in which we'll ask our link partner
diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst
new file mode 100644 (file)
index 0000000..8bf411b
--- /dev/null
@@ -0,0 +1,217 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===================
+DPAA2 Switch driver
+===================
+
+:Copyright: |copy| 2021 NXP
+
+The DPAA2 Switch driver probes on the Datapath Switch (DPSW) object which can
+be instantiated on the following DPAA2 SoCs and their variants: LS2088A and
+LX2160A.
+
+The driver uses the switch device driver model and exposes each switch port as
+a network interface, which can be included in a bridge or used as a standalone
+interface. Traffic switched between ports is offloaded into the hardware.
+
+The DPSW can have ports connected to DPNIs or to DPMACs for external access.
+::
+
+         [ethA]     [ethB]      [ethC]     [ethD]     [ethE]     [ethF]
+            :          :          :          :          :          :
+            :          :          :          :          :          :
+       [dpaa2-eth]  [dpaa2-eth]  [              dpaa2-switch              ]
+            :          :          :          :          :          :        kernel
+       =============================================================================
+            :          :          :          :          :          :        hardware
+         [DPNI]      [DPNI]     [============= DPSW =================]
+            |          |          |          |          |          |
+            |           ----------           |       [DPMAC]    [DPMAC]
+             -------------------------------            |          |
+                                                        |          |
+                                                      [PHY]      [PHY]
+
+Creating an Ethernet Switch
+===========================
+
+The dpaa2-switch driver probes on DPSW devices found on the fsl-mc bus. These
+devices can be either created statically through the boot time configuration
+file - DataPath Layout (DPL) - or at runtime using the DPAA2 object APIs
+(incorporated already into the restool userspace tool).
+
+At the moment, the dpaa2-switch driver imposes the following restrictions on
+the DPSW object that it will probe:
+
+ * The minimum number of FDBs should be at least equal to the number of switch
+   interfaces. This is necessary so that separation of switch ports can be
+   done, ie when not under a bridge, each switch port will have its own FDB.
+   ::
+
+        fsl_dpaa2_switch dpsw.0: The number of FDBs is lower than the number of ports, cannot probe
+
+ * Both the broadcast and flooding configuration should be per FDB. This
+   enables the driver to restrict the broadcast and flooding domains of each
+   FDB depending on the switch ports that are sharing it (aka are under the
+   same bridge).
+   ::
+
+        fsl_dpaa2_switch dpsw.0: Flooding domain is not per FDB, cannot probe
+        fsl_dpaa2_switch dpsw.0: Broadcast domain is not per FDB, cannot probe
+
+ * The control interface of the switch should not be disabled
+   (DPSW_OPT_CTRL_IF_DIS not passed as a create time option). Without the
+   control interface, the driver is not capable to provide proper Rx/Tx traffic
+   support on the switch port netdevices.
+   ::
+
+        fsl_dpaa2_switch dpsw.0: Control Interface is disabled, cannot probe
+
+Besides the configuration of the actual DPSW object, the dpaa2-switch driver
+will need the following DPAA2 objects:
+
+ * 1 DPMCP - A Management Command Portal object is needed for any interraction
+   with the MC firmware.
+
+ * 1 DPBP - A Buffer Pool is used for seeding buffers intended for the Rx path
+   on the control interface.
+
+ * Access to at least one DPIO object (Software Portal) is needed for any
+   enqueue/dequeue operation to be performed on the control interface queues.
+   The DPIO object will be shared, no need for a private one.
+
+Switching features
+==================
+
+The driver supports the configuration of L2 forwarding rules in hardware for
+port bridging as well as standalone usage of the independent switch interfaces.
+
+The hardware is not configurable with respect to VLAN awareness, thus any DPAA2
+switch port should be used only in usecases with a VLAN aware bridge::
+
+        $ ip link add dev br0 type bridge vlan_filtering 1
+
+        $ ip link add dev br1 type bridge
+        $ ip link set dev ethX master br1
+        Error: fsl_dpaa2_switch: Cannot join a VLAN-unaware bridge
+
+Topology and loop detection through STP is supported when ``stp_state 1`` is
+used at bridge create ::
+
+        $ ip link add dev br0 type bridge vlan_filtering 1 stp_state 1
+
+L2 FDB manipulation (add/delete/dump) is supported.
+
+HW FDB learning can be configured on each switch port independently through
+bridge commands. When the HW learning is disabled, a fast age procedure will be
+run and any previously learnt addresses will be removed.
+::
+
+        $ bridge link set dev ethX learning off
+        $ bridge link set dev ethX learning on
+
+Restricting the unknown unicast and multicast flooding domain is supported, but
+not independently of each other::
+
+        $ ip link set dev ethX type bridge_slave flood off mcast_flood off
+        $ ip link set dev ethX type bridge_slave flood off mcast_flood on
+        Error: fsl_dpaa2_switch: Cannot configure multicast flooding independently of unicast.
+
+Broadcast flooding on a switch port can be disabled/enabled through the brport sysfs::
+
+        $ echo 0 > /sys/bus/fsl-mc/devices/dpsw.Y/net/ethX/brport/broadcast_flood
+
+Offloads
+========
+
+Routing actions (redirect, trap, drop)
+--------------------------------------
+
+The DPAA2 switch is able to offload flow-based redirection of packets making
+use of ACL tables. Shared filter blocks are supported by sharing a single ACL
+table between multiple ports.
+
+The following flow keys are supported:
+
+ * Ethernet: dst_mac/src_mac
+ * IPv4: dst_ip/src_ip/ip_proto/tos
+ * VLAN: vlan_id/vlan_prio/vlan_tpid/vlan_dei
+ * L4: dst_port/src_port
+
+Also, the matchall filter can be used to redirect the entire traffic received
+on a port.
+
+As per flow actions, the following are supported:
+
+ * drop
+ * mirred egress redirect
+ * trap
+
+Each ACL entry (filter) can be setup with only one of the listed
+actions.
+
+Example 1: send frames received on eth4 with a SA of 00:01:02:03:04:05 to the
+CPU::
+
+        $ tc qdisc add dev eth4 clsact
+        $ tc filter add dev eth4 ingress flower src_mac 00:01:02:03:04:05 skip_sw action trap
+
+Example 2: drop frames received on eth4 with VID 100 and PCP of 3::
+
+        $ tc filter add dev eth4 ingress protocol 802.1q flower skip_sw vlan_id 100 vlan_prio 3 action drop
+
+Example 3: redirect all frames received on eth4 to eth1::
+
+        $ tc filter add dev eth4 ingress matchall action mirred egress redirect dev eth1
+
+Example 4: Use a single shared filter block on both eth5 and eth6::
+
+        $ tc qdisc add dev eth5 ingress_block 1 clsact
+        $ tc qdisc add dev eth6 ingress_block 1 clsact
+        $ tc filter add block 1 ingress flower dst_mac 00:01:02:03:04:04 skip_sw \
+                action trap
+        $ tc filter add block 1 ingress protocol ipv4 flower src_ip 192.168.1.1 skip_sw \
+                action mirred egress redirect dev eth3
+
+Mirroring
+~~~~~~~~~
+
+The DPAA2 switch supports only per port mirroring and per VLAN mirroring.
+Adding mirroring filters in shared blocks is also supported.
+
+When using the tc-flower classifier with the 802.1q protocol, only the
+''vlan_id'' key will be accepted. Mirroring based on any other fields from the
+802.1q protocol will be rejected::
+
+        $ tc qdisc add dev eth8 ingress_block 1 clsact
+        $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_prio 3 action mirred egress mirror dev eth6
+        Error: fsl_dpaa2_switch: Only matching on VLAN ID supported.
+        We have an error talking to the kernel
+
+If a mirroring VLAN filter is requested on a port, the VLAN must to be
+installed on the switch port in question either using ''bridge'' or by creating
+a VLAN upper device if the switch port is used as a standalone interface::
+
+        $ tc qdisc add dev eth8 ingress_block 1 clsact
+        $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6
+        Error: VLAN must be installed on the switch port.
+        We have an error talking to the kernel
+
+        $ bridge vlan add vid 200 dev eth8
+        $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6
+
+        $ ip link add link eth8 name eth8.200 type vlan id 200
+        $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6
+
+Also, it should be noted that the mirrored traffic will be subject to the same
+egress restrictions as any other traffic. This means that when a mirrored
+packet will reach the mirror port, if the VLAN found in the packet is not
+installed on the port it will get dropped.
+
+The DPAA2 switch supports only a single mirroring destination, thus multiple
+mirror rules can be installed but their ''to'' port has to be the same::
+
+        $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 200 action mirred egress mirror dev eth6
+        $ tc filter add block 1 ingress protocol 802.1q flower skip_sw vlan_id 100 action mirred egress mirror dev eth7
+        Error: fsl_dpaa2_switch: Multiple mirror ports not supported.
+        We have an error talking to the kernel
index ef8cb62..4b59cf2 100644 (file)
@@ -656,3 +656,47 @@ Bridge offloads tracepoints:
     $ cat /sys/kernel/debug/tracing/trace
     ...
     ip-5387    [000] ...1       573713: mlx5_esw_bridge_vport_cleanup: vport_num=1
+
+Eswitch QoS tracepoints:
+
+- mlx5_esw_vport_qos_create: trace creation of transmit scheduler arbiter for vport::
+
+    $ echo mlx5:mlx5_esw_vport_qos_create >> /sys/kernel/debug/tracing/set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    <...>-23496   [018] .... 73136.838831: mlx5_esw_vport_qos_create: (0000:82:00.0) vport=2 tsar_ix=4 bw_share=0, max_rate=0 group=000000007b576bb3
+
+- mlx5_esw_vport_qos_config: trace configuration of transmit scheduler arbiter for vport::
+
+    $ echo mlx5:mlx5_esw_vport_qos_config >> /sys/kernel/debug/tracing/set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    <...>-26548   [023] .... 75754.223823: mlx5_esw_vport_qos_config: (0000:82:00.0) vport=1 tsar_ix=3 bw_share=34, max_rate=10000 group=000000007b576bb3
+
+- mlx5_esw_vport_qos_destroy: trace deletion of transmit scheduler arbiter for vport::
+
+    $ echo mlx5:mlx5_esw_vport_qos_destroy >> /sys/kernel/debug/tracing/set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    <...>-27418   [004] .... 76546.680901: mlx5_esw_vport_qos_destroy: (0000:82:00.0) vport=1 tsar_ix=3
+
+- mlx5_esw_group_qos_create: trace creation of transmit scheduler arbiter for rate group::
+
+    $ echo mlx5:mlx5_esw_group_qos_create >> /sys/kernel/debug/tracing/set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    <...>-26578   [008] .... 75776.022112: mlx5_esw_group_qos_create: (0000:82:00.0) group=000000008dac63ea tsar_ix=5
+
+- mlx5_esw_group_qos_config: trace configuration of transmit scheduler arbiter for rate group::
+
+    $ echo mlx5:mlx5_esw_group_qos_config >> /sys/kernel/debug/tracing/set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    <...>-27303   [020] .... 76461.455356: mlx5_esw_group_qos_config: (0000:82:00.0) group=000000008dac63ea tsar_ix=5 bw_share=100 max_rate=20000
+
+- mlx5_esw_group_qos_destroy: trace deletion of transmit scheduler arbiter for group::
+
+    $ echo mlx5:mlx5_esw_group_qos_destroy >> /sys/kernel/debug/tracing/set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    <...>-27418   [006] .... 76547.187258: mlx5_esw_group_qos_destroy: (0000:82:00.0) group=000000007b576bb3 tsar_ix=1
index 54c9f10..4878907 100644 (file)
@@ -97,6 +97,18 @@ own name.
    * - ``enable_roce``
      - Boolean
      - Enable handling of RoCE traffic in the device.
+   * - ``enable_eth``
+     - Boolean
+     - When enabled, the device driver will instantiate Ethernet specific
+       auxiliary device of the devlink device.
+   * - ``enable_rdma``
+     - Boolean
+     - When enabled, the device driver will instantiate RDMA specific
+       auxiliary device of the devlink device.
+   * - ``enable_vnet``
+     - Boolean
+     - When enabled, the device driver will instantiate VDPA networking
+       specific auxiliary device of the devlink device.
    * - ``internal_err_reset``
      - Boolean
      - When enabled, the device driver will reset the device on internal
diff --git a/Documentation/networking/devlink/hns3.rst b/Documentation/networking/devlink/hns3.rst
new file mode 100644 (file)
index 0000000..4562a6e
--- /dev/null
@@ -0,0 +1,25 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+hns3 devlink support
+====================
+
+This document describes the devlink features implemented by the ``hns3``
+device driver.
+
+The ``hns3`` driver supports reloading via ``DEVLINK_CMD_RELOAD``.
+
+Info versions
+=============
+
+The ``hns3`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+   :widths: 10 10 80
+
+   * - Name
+     - Type
+     - Description
+   * - ``fw``
+     - running
+     - Used to represent the firmware version.
index b3b9e06..45b5f8b 100644 (file)
@@ -34,6 +34,7 @@ parameters, info versions, and other features it supports.
    :maxdepth: 1
 
    bnxt
+   hns3
    ionic
    ice
    mlx4
@@ -42,7 +43,6 @@ parameters, info versions, and other features it supports.
    mv88e6xxx
    netdevsim
    nfp
-   sja1105
    qed
    ti-cpsw-switch
    am65-nuss-cpsw-switch
diff --git a/Documentation/networking/devlink/sja1105.rst b/Documentation/networking/devlink/sja1105.rst
deleted file mode 100644 (file)
index e2679c2..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-=======================
-sja1105 devlink support
-=======================
-
-This document describes the devlink features implemented
-by the ``sja1105`` device driver.
-
-Parameters
-==========
-
-.. list-table:: Driver-specific parameters implemented
-  :widths: 5 5 5 85
-
-  * - Name
-    - Type
-    - Mode
-    - Description
-  * - ``best_effort_vlan_filtering``
-    - Boolean
-    - runtime
-    - Allow plain ETH_P_8021Q headers to be used as DSA tags.
-
-      Benefits:
-
-      - Can terminate untagged traffic over switch net
-        devices even when enslaved to a bridge with
-        vlan_filtering=1.
-      - Can terminate VLAN-tagged traffic over switch net
-        devices even when enslaved to a bridge with
-        vlan_filtering=1, with some constraints (no more than
-        7 non-pvid VLANs per user port).
-      - Can do QoS based on VLAN PCP and VLAN membership
-        admission control for autonomously forwarded frames
-        (regardless of whether they can be terminated on the
-        CPU or not).
-
-      Drawbacks:
-
-      - User cannot use VLANs in range 1024-3071. If the
-       switch receives frames with such VIDs, it will
-       misinterpret them as DSA tags.
-      - Switch uses Shared VLAN Learning (FDB lookup uses
-       only DMAC as key).
-      - When VLANs span cross-chip topologies, the total
-       number of permitted VLANs may be less than 7 per
-       port, due to a maximum number of 32 VLAN retagging
-       rules per switch.
index 20baacf..89bb4fa 100644 (file)
@@ -200,19 +200,6 @@ receive all frames regardless of the value of the MAC DA. This can be done by
 setting the ``promisc_on_master`` property of the ``struct dsa_device_ops``.
 Note that this assumes a DSA-unaware master driver, which is the norm.
 
-Hardware manufacturers are strongly discouraged to do this, but some tagging
-protocols might not provide source port information on RX for all packets, but
-e.g. only for control traffic (link-local PDUs). In this case, by implementing
-the ``filter`` method of ``struct dsa_device_ops``, the tagger might select
-which packets are to be redirected on RX towards the virtual DSA user network
-interfaces, and which are to be left in the DSA master's RX data path.
-
-It might also happen (although silicon vendors are strongly discouraged to
-produce hardware like this) that a tagging protocol splits the switch-specific
-information into a header portion and a tail portion, therefore not falling
-cleanly into any of the above 3 categories. DSA does not support this
-configuration.
-
 Master network devices
 ----------------------
 
@@ -663,6 +650,22 @@ Bridge layer
   CPU port, and flooding towards the CPU port should also be enabled, due to a
   lack of an explicit address filtering mechanism in the DSA core.
 
+- ``port_bridge_tx_fwd_offload``: bridge layer function invoked after
+  ``port_bridge_join`` when a driver sets ``ds->num_fwd_offloading_bridges`` to
+  a non-zero value. Returning success in this function activates the TX
+  forwarding offload bridge feature for this port, which enables the tagging
+  protocol driver to inject data plane packets towards the bridging domain that
+  the port is a part of. Data plane packets are subject to FDB lookup, hardware
+  learning on the CPU port, and do not override the port STP state.
+  Additionally, replication of data plane packets (multicast, flooding) is
+  handled in hardware and the bridge driver will transmit a single skb for each
+  packet that needs replication. The method is provided as a configuration
+  point for drivers that need to configure the hardware for enabling this
+  feature.
+
+- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoken when a driver
+  leaves a bridge port which had the TX forwarding offload feature enabled.
+
 Bridge VLAN filtering
 ---------------------
 
index da4057b..564caee 100644 (file)
@@ -65,199 +65,6 @@ If that changed setting can be transmitted to the switch through the dynamic
 reconfiguration interface, it is; otherwise the switch is reset and
 reprogrammed with the updated static configuration.
 
-Traffic support
-===============
-
-The switches do not have hardware support for DSA tags, except for "slow
-protocols" for switch control as STP and PTP. For these, the switches have two
-programmable filters for link-local destination MACs.
-These are used to trap BPDUs and PTP traffic to the master netdevice, and are
-further used to support STP and 1588 ordinary clock/boundary clock
-functionality. For frames trapped to the CPU, source port and switch ID
-information is encoded by the hardware into the frames.
-
-But by leveraging ``CONFIG_NET_DSA_TAG_8021Q`` (a software-defined DSA tagging
-format based on VLANs), general-purpose traffic termination through the network
-stack can be supported under certain circumstances.
-
-Depending on VLAN awareness state, the following operating modes are possible
-with the switch:
-
-- Mode 1 (VLAN-unaware): a port is in this mode when it is used as a standalone
-  net device, or when it is enslaved to a bridge with ``vlan_filtering=0``.
-- Mode 2 (fully VLAN-aware): a port is in this mode when it is enslaved to a
-  bridge with ``vlan_filtering=1``. Access to the entire VLAN range is given to
-  the user through ``bridge vlan`` commands, but general-purpose (anything
-  other than STP, PTP etc) traffic termination is not possible through the
-  switch net devices. The other packets can be still by user space processed
-  through the DSA master interface (similar to ``DSA_TAG_PROTO_NONE``).
-- Mode 3 (best-effort VLAN-aware): a port is in this mode when enslaved to a
-  bridge with ``vlan_filtering=1``, and the devlink property of its parent
-  switch named ``best_effort_vlan_filtering`` is set to ``true``. When
-  configured like this, the range of usable VIDs is reduced (0 to 1023 and 3072
-  to 4094), so is the number of usable VIDs (maximum of 7 non-pvid VLANs per
-  port*), and shared VLAN learning is performed (FDB lookup is done only by
-  DMAC, not also by VID).
-
-To summarize, in each mode, the following types of traffic are supported over
-the switch net devices:
-
-+-------------+-----------+--------------+------------+
-|             |   Mode 1  |    Mode 2    |   Mode 3   |
-+=============+===========+==============+============+
-|   Regular   |    Yes    | No           |     Yes    |
-|   traffic   |           | (use master) |            |
-+-------------+-----------+--------------+------------+
-| Management  |    Yes    |     Yes      |     Yes    |
-| traffic     |           |              |            |
-| (BPDU, PTP) |           |              |            |
-+-------------+-----------+--------------+------------+
-
-To configure the switch to operate in Mode 3, the following steps can be
-followed::
-
-  ip link add dev br0 type bridge
-  # swp2 operates in Mode 1 now
-  ip link set dev swp2 master br0
-  # swp2 temporarily moves to Mode 2
-  ip link set dev br0 type bridge vlan_filtering 1
-  [   61.204770] sja1105 spi0.1: Reset switch and programmed static config. Reason: VLAN filtering
-  [   61.239944] sja1105 spi0.1: Disabled switch tagging
-  # swp3 now operates in Mode 3
-  devlink dev param set spi/spi0.1 name best_effort_vlan_filtering value true cmode runtime
-  [   64.682927] sja1105 spi0.1: Reset switch and programmed static config. Reason: VLAN filtering
-  [   64.711925] sja1105 spi0.1: Enabled switch tagging
-  # Cannot use VLANs in range 1024-3071 while in Mode 3.
-  bridge vlan add dev swp2 vid 1025 untagged pvid
-  RTNETLINK answers: Operation not permitted
-  bridge vlan add dev swp2 vid 100
-  bridge vlan add dev swp2 vid 101 untagged
-  bridge vlan
-  port    vlan ids
-  swp5     1 PVID Egress Untagged
-
-  swp2     1 PVID Egress Untagged
-           100
-           101 Egress Untagged
-
-  swp3     1 PVID Egress Untagged
-
-  swp4     1 PVID Egress Untagged
-
-  br0      1 PVID Egress Untagged
-  bridge vlan add dev swp2 vid 102
-  bridge vlan add dev swp2 vid 103
-  bridge vlan add dev swp2 vid 104
-  bridge vlan add dev swp2 vid 105
-  bridge vlan add dev swp2 vid 106
-  bridge vlan add dev swp2 vid 107
-  # Cannot use mode than 7 VLANs per port while in Mode 3.
-  [ 3885.216832] sja1105 spi0.1: No more free subvlans
-
-\* "maximum of 7 non-pvid VLANs per port": Decoding VLAN-tagged packets on the
-CPU in mode 3 is possible through VLAN retagging of packets that go from the
-switch to the CPU. In cross-chip topologies, the port that goes to the CPU
-might also go to other switches. In that case, those other switches will see
-only a retagged packet (which only has meaning for the CPU). So if they are
-interested in this VLAN, they need to apply retagging in the reverse direction,
-to recover the original value from it. This consumes extra hardware resources
-for this switch. There is a maximum of 32 entries in the Retagging Table of
-each switch device.
-
-As an example, consider this cross-chip topology::
-
-  +-------------------------------------------------+
-  | Host SoC                                        |
-  |           +-------------------------+           |
-  |           | DSA master for embedded |           |
-  |           |   switch (non-sja1105)  |           |
-  |  +--------+-------------------------+--------+  |
-  |  |   embedded L2 switch                      |  |
-  |  |                                           |  |
-  |  |   +--------------+     +--------------+   |  |
-  |  |   |DSA master for|     |DSA master for|   |  |
-  |  |   |  SJA1105 1   |     |  SJA1105 2   |   |  |
-  +--+---+--------------+-----+--------------+---+--+
-
-  +-----------------------+ +-----------------------+
-  |   SJA1105 switch 1    | |   SJA1105 switch 2    |
-  +-----+-----+-----+-----+ +-----+-----+-----+-----+
-  |sw1p0|sw1p1|sw1p2|sw1p3| |sw2p0|sw2p1|sw2p2|sw2p3|
-  +-----+-----+-----+-----+ +-----+-----+-----+-----+
-
-To reach the CPU, SJA1105 switch 1 (spi/spi2.1) uses the same port as is uses
-to reach SJA1105 switch 2 (spi/spi2.2), which would be port 4 (not drawn).
-Similarly for SJA1105 switch 2.
-
-Also consider the following commands, that add VLAN 100 to every sja1105 user
-port::
-
-  devlink dev param set spi/spi2.1 name best_effort_vlan_filtering value true cmode runtime
-  devlink dev param set spi/spi2.2 name best_effort_vlan_filtering value true cmode runtime
-  ip link add dev br0 type bridge
-  for port in sw1p0 sw1p1 sw1p2 sw1p3 \
-              sw2p0 sw2p1 sw2p2 sw2p3; do
-      ip link set dev $port master br0
-  done
-  ip link set dev br0 type bridge vlan_filtering 1
-  for port in sw1p0 sw1p1 sw1p2 sw1p3 \
-              sw2p0 sw2p1 sw2p2; do
-      bridge vlan add dev $port vid 100
-  done
-  ip link add link br0 name br0.100 type vlan id 100 && ip link set dev br0.100 up
-  ip addr add 192.168.100.3/24 dev br0.100
-  bridge vlan add dev br0 vid 100 self
-
-  bridge vlan
-  port    vlan ids
-  sw1p0    1 PVID Egress Untagged
-           100
-
-  sw1p1    1 PVID Egress Untagged
-           100
-
-  sw1p2    1 PVID Egress Untagged
-           100
-
-  sw1p3    1 PVID Egress Untagged
-           100
-
-  sw2p0    1 PVID Egress Untagged
-           100
-
-  sw2p1    1 PVID Egress Untagged
-           100
-
-  sw2p2    1 PVID Egress Untagged
-           100
-
-  sw2p3    1 PVID Egress Untagged
-
-  br0      1 PVID Egress Untagged
-           100
-
-SJA1105 switch 1 consumes 1 retagging entry for each VLAN on each user port
-towards the CPU. It also consumes 1 retagging entry for each non-pvid VLAN that
-it is also interested in, which is configured on any port of any neighbor
-switch.
-
-In this case, SJA1105 switch 1 consumes a total of 11 retagging entries, as
-follows:
-
-- 8 retagging entries for VLANs 1 and 100 installed on its user ports
-  (``sw1p0`` - ``sw1p3``)
-- 3 retagging entries for VLAN 100 installed on the user ports of SJA1105
-  switch 2 (``sw2p0`` - ``sw2p2``), because it also has ports that are
-  interested in it. The VLAN 1 is a pvid on SJA1105 switch 2 and does not need
-  reverse retagging.
-
-SJA1105 switch 2 also consumes 11 retagging entries, but organized as follows:
-
-- 7 retagging entries for the bridge VLANs on its user ports (``sw2p0`` -
-  ``sw2p3``).
-- 4 retagging entries for VLAN 100 installed on the user ports of SJA1105
-  switch 1 (``sw1p0`` - ``sw1p3``).
-
 Switching features
 ==================
 
@@ -282,33 +89,10 @@ untagged), and therefore this mode is also supported.
 
 Segregating the switch ports in multiple bridges is supported (e.g. 2 + 2), but
 all bridges should have the same level of VLAN awareness (either both have
-``vlan_filtering`` 0, or both 1). Also an inevitable limitation of the fact
-that VLAN awareness is global at the switch level is that once a bridge with
-``vlan_filtering`` enslaves at least one switch port, the other un-bridged
-ports are no longer available for standalone traffic termination.
+``vlan_filtering`` 0, or both 1).
 
 Topology and loop detection through STP is supported.
 
-L2 FDB manipulation (add/delete/dump) is currently possible for the first
-generation devices. Aging time of FDB entries, as well as enabling fully static
-management (no address learning and no flooding of unknown traffic) is not yet
-configurable in the driver.
-
-A special comment about bridging with other netdevices (illustrated with an
-example):
-
-A board has eth0, eth1, swp0@eth1, swp1@eth1, swp2@eth1, swp3@eth1.
-The switch ports (swp0-3) are under br0.
-It is desired that eth0 is turned into another switched port that communicates
-with swp0-3.
-
-If br0 has vlan_filtering 0, then eth0 can simply be added to br0 with the
-intended results.
-If br0 has vlan_filtering 1, then a new br1 interface needs to be created that
-enslaves eth0 and eth1 (the DSA master of the switch ports). This is because in
-this mode, the switch ports beneath br0 are not capable of regular traffic, and
-are only used as a conduit for switchdev operations.
-
 Offloads
 ========
 
index c86628e..d9b55b7 100644 (file)
@@ -595,6 +595,14 @@ Link extended substates:
                                                                        that is not formally
                                                                        supported, which led to
                                                                        signal integrity issues
+
+  ``ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST``        The external clock signal for
+                                                                       SerDes is too weak or
+                                                                       unavailable.
+
+  ``ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS``                        The received signal for
+                                                                       SerDes is too weak because
+                                                                       analog loss of signal.
   =================================================================    =============================
 
   Cable issue substates:
@@ -939,12 +947,25 @@ Kernel response contents:
   ``ETHTOOL_A_COALESCE_TX_USECS_HIGH``         u32     delay (us), high Tx
   ``ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH``    u32     max packets, high Tx
   ``ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL``  u32     rate sampling interval
+  ``ETHTOOL_A_COALESCE_USE_CQE_TX``            bool    timer reset mode, Tx
+  ``ETHTOOL_A_COALESCE_USE_CQE_RX``            bool    timer reset mode, Rx
   ===========================================  ======  =======================
 
 Attributes are only included in reply if their value is not zero or the
 corresponding bit in ``ethtool_ops::supported_coalesce_params`` is set (i.e.
 they are declared as supported by driver).
 
+Timer reset mode (``ETHTOOL_A_COALESCE_USE_CQE_TX`` and
+``ETHTOOL_A_COALESCE_USE_CQE_RX``) controls the interaction between packet
+arrival and the various time based delay parameters. By default timers are
+expected to limit the max delay between any packet arrival/departure and a
+corresponding interrupt. In this mode timer should be started by packet
+arrival (sometimes delivery of previous interrupt) and reset when interrupt
+is delivered.
+Setting the appropriate attribute to 1 will enable ``CQE`` mode, where
+each packet event resets the timer. In this mode timer is used to force
+the interrupt if queue goes idle, while busy queues depend on the packet
+limit to trigger interrupts.
 
 COALESCE_SET
 ============
@@ -977,6 +998,8 @@ Request contents:
   ``ETHTOOL_A_COALESCE_TX_USECS_HIGH``         u32     delay (us), high Tx
   ``ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH``    u32     max packets, high Tx
   ``ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL``  u32     rate sampling interval
+  ``ETHTOOL_A_COALESCE_USE_CQE_TX``            bool    timer reset mode, Tx
+  ``ETHTOOL_A_COALESCE_USE_CQE_RX``            bool    timer reset mode, Rx
   ===========================================  ======  =======================
 
 Request is rejected if it attributes declared as unsupported by driver (i.e.
index 3e2221f..ce2b8e8 100644 (file)
@@ -320,13 +320,6 @@ Examples for low-level BPF:
   ret #-1
   drop: ret #0
 
-**(Accelerated) VLAN w/ id 10**::
-
-  ld vlan_tci
-  jneq #10, drop
-  ret #-1
-  drop: ret #0
-
 **icmp random packet sampling, 1 in 4**::
 
   ldh [12]
@@ -358,6 +351,22 @@ Examples for low-level BPF:
   bad: ret #0             /* SECCOMP_RET_KILL_THREAD */
   good: ret #0x7fff0000   /* SECCOMP_RET_ALLOW */
 
+Examples for low-level BPF extension:
+
+**Packet for interface index 13**::
+
+  ld ifidx
+  jneq #13, drop
+  ret #-1
+  drop: ret #0
+
+**(Accelerated) VLAN w/ id 10**::
+
+  ld vlan_tci
+  jneq #10, drop
+  ret #-1
+  drop: ret #0
+
 The above example code can be placed into a file (here called "foo"), and
 then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf
 and cls_bpf understands and can directly be loaded with. Example with above
@@ -629,8 +638,8 @@ extension, PTP dissector/classifier, and much more. They are all internally
 converted by the kernel into the new instruction set representation and run
 in the eBPF interpreter. For in-kernel handlers, this all works transparently
 by using bpf_prog_create() for setting up the filter, resp.
-bpf_prog_destroy() for destroying it. The macro
-BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed
+bpf_prog_destroy() for destroying it. The function
+bpf_prog_run(filter, ctx) transparently invokes eBPF interpreter or JITed
 code to run the filter. 'filter' is a pointer to struct bpf_prog that we
 got from bpf_prog_create(), and 'ctx' the given context (e.g.
 skb pointer). All constraints and restrictions from bpf_check_classic() apply
index e9ce559..58bc8cd 100644 (file)
@@ -57,6 +57,7 @@ Contents:
    gen_stats
    gtp
    ila
+   ioam6-sysctl
    ipddp
    ip_dynaddr
    ipsec
@@ -68,6 +69,7 @@ Contents:
    l2tp
    lapb-module
    mac80211-injection
+   mctp
    mpls-sysctl
    mptcp-sysctl
    multiqueue
diff --git a/Documentation/networking/ioam6-sysctl.rst b/Documentation/networking/ioam6-sysctl.rst
new file mode 100644 (file)
index 0000000..c18cab2
--- /dev/null
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+IOAM6 Sysfs variables
+=====================
+
+
+/proc/sys/net/conf/<iface>/ioam6_* variables:
+=============================================
+
+ioam6_enabled - BOOL
+        Accept (= enabled) or ignore (= disabled) IPv6 IOAM options on ingress
+        for this interface.
+
+        * 0 - disabled (default)
+        * 1 - enabled
+
+ioam6_id - SHORT INTEGER
+        Define the IOAM id of this interface.
+
+        Default is ~0.
+
+ioam6_id_wide - INTEGER
+        Define the wide IOAM id of this interface.
+
+        Default is ~0.
index 316c7df..d91ab28 100644 (file)
@@ -1926,6 +1926,23 @@ fib_notify_on_flag_change - INTEGER
         - 1 - Emit notifications.
         - 2 - Emit notifications only for RTM_F_OFFLOAD_FAILED flag change.
 
+ioam6_id - INTEGER
+        Define the IOAM id of this node. Uses only 24 bits out of 32 in total.
+
+        Min: 0
+        Max: 0xFFFFFF
+
+        Default: 0xFFFFFF
+
+ioam6_id_wide - LONG INTEGER
+        Define the wide IOAM id of this node. Uses only 56 bits out of 64 in
+        total. Can be different from ioam6_id.
+
+        Min: 0
+        Max: 0xFFFFFFFFFFFFFF
+
+        Default: 0xFFFFFFFFFFFFFF
+
 IPv6 Fragmentation:
 
 ip6frag_high_thresh - INTEGER
diff --git a/Documentation/networking/mctp.rst b/Documentation/networking/mctp.rst
new file mode 100644 (file)
index 0000000..6100cdc
--- /dev/null
@@ -0,0 +1,213 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================
+Management Component Transport Protocol (MCTP)
+==============================================
+
+net/mctp/ contains protocol support for MCTP, as defined by DMTF standard
+DSP0236. Physical interface drivers ("bindings" in the specification) are
+provided in drivers/net/mctp/.
+
+The core code provides a socket-based interface to send and receive MCTP
+messages, through an AF_MCTP, SOCK_DGRAM socket.
+
+Structure: interfaces & networks
+================================
+
+The kernel models the local MCTP topology through two items: interfaces and
+networks.
+
+An interface (or "link") is an instance of an MCTP physical transport binding
+(as defined by DSP0236, section 3.2.47), likely connected to a specific hardware
+device. This is represented as a ``struct netdevice``.
+
+A network defines a unique address space for MCTP endpoints by endpoint-ID
+(described by DSP0236, section 3.2.31). A network has a user-visible identifier
+to allow references from userspace. Route definitions are specific to one
+network.
+
+Interfaces are associated with one network. A network may be associated with one
+or more interfaces.
+
+If multiple networks are present, each may contain endpoint IDs (EIDs) that are
+also present on other networks.
+
+Sockets API
+===========
+
+Protocol definitions
+--------------------
+
+MCTP uses ``AF_MCTP`` / ``PF_MCTP`` for the address- and protocol- families.
+Since MCTP is message-based, only ``SOCK_DGRAM`` sockets are supported.
+
+.. code-block:: C
+
+    int sd = socket(AF_MCTP, SOCK_DGRAM, 0);
+
+The only (current) value for the ``protocol`` argument is 0.
+
+As with all socket address families, source and destination addresses are
+specified with a ``sockaddr`` type, with a single-byte endpoint address:
+
+.. code-block:: C
+
+    typedef __u8               mctp_eid_t;
+
+    struct mctp_addr {
+            mctp_eid_t         s_addr;
+    };
+
+    struct sockaddr_mctp {
+            unsigned short int smctp_family;
+            int                        smctp_network;
+            struct mctp_addr   smctp_addr;
+            __u8               smctp_type;
+            __u8               smctp_tag;
+    };
+
+    #define MCTP_NET_ANY       0x0
+    #define MCTP_ADDR_ANY      0xff
+
+
+Syscall behaviour
+-----------------
+
+The following sections describe the MCTP-specific behaviours of the standard
+socket system calls. These behaviours have been chosen to map closely to the
+existing sockets APIs.
+
+``bind()`` : set local socket address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sockets that receive incoming request packets will bind to a local address,
+using the ``bind()`` syscall.
+
+.. code-block:: C
+
+    struct sockaddr_mctp addr;
+
+    addr.smctp_family = AF_MCTP;
+    addr.smctp_network = MCTP_NET_ANY;
+    addr.smctp_addr.s_addr = MCTP_ADDR_ANY;
+    addr.smctp_type = MCTP_TYPE_PLDM;
+    addr.smctp_tag = MCTP_TAG_OWNER;
+
+    int rc = bind(sd, (struct sockaddr *)&addr, sizeof(addr));
+
+This establishes the local address of the socket. Incoming MCTP messages that
+match the network, address, and message type will be received by this socket.
+The reference to 'incoming' is important here; a bound socket will only receive
+messages with the TO bit set, to indicate an incoming request message, rather
+than a response.
+
+The ``smctp_tag`` value will configure the tags accepted from the remote side of
+this socket. Given the above, the only valid value is ``MCTP_TAG_OWNER``, which
+will result in remotely "owned" tags being routed to this socket. Since
+``MCTP_TAG_OWNER`` is set, the 3 least-significant bits of ``smctp_tag`` are not
+used; callers must set them to zero.
+
+A ``smctp_network`` value of ``MCTP_NET_ANY`` will configure the socket to
+receive incoming packets from any locally-connected network. A specific network
+value will cause the socket to only receive incoming messages from that network.
+
+The ``smctp_addr`` field specifies a local address to bind to. A value of
+``MCTP_ADDR_ANY`` configures the socket to receive messages addressed to any
+local destination EID.
+
+The ``smctp_type`` field specifies which message types to receive. Only the
+lower 7 bits of the type is matched on incoming messages (ie., the
+most-significant IC bit is not part of the match). This results in the socket
+receiving packets with and without a message integrity check footer.
+
+``sendto()``, ``sendmsg()``, ``send()`` : transmit an MCTP message
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An MCTP message is transmitted using one of the ``sendto()``, ``sendmsg()`` or
+``send()`` syscalls. Using ``sendto()`` as the primary example:
+
+.. code-block:: C
+
+    struct sockaddr_mctp addr;
+    char buf[14];
+    ssize_t len;
+
+    /* set message destination */
+    addr.smctp_family = AF_MCTP;
+    addr.smctp_network = 0;
+    addr.smctp_addr.s_addr = 8;
+    addr.smctp_tag = MCTP_TAG_OWNER;
+    addr.smctp_type = MCTP_TYPE_ECHO;
+
+    /* arbitrary message to send, with message-type header */
+    buf[0] = MCTP_TYPE_ECHO;
+    memcpy(buf + 1, "hello, world!", sizeof(buf) - 1);
+
+    len = sendto(sd, buf, sizeof(buf), 0,
+                    (struct sockaddr_mctp *)&addr, sizeof(addr));
+
+The network and address fields of ``addr`` define the remote address to send to.
+If ``smctp_tag`` has the ``MCTP_TAG_OWNER``, the kernel will ignore any bits set
+in ``MCTP_TAG_VALUE``, and generate a tag value suitable for the destination
+EID. If ``MCTP_TAG_OWNER`` is not set, the message will be sent with the tag
+value as specified. If a tag value cannot be allocated, the system call will
+report an errno of ``EAGAIN``.
+
+The application must provide the message type byte as the first byte of the
+message buffer passed to ``sendto()``. If a message integrity check is to be
+included in the transmitted message, it must also be provided in the message
+buffer, and the most-significant bit of the message type byte must be 1.
+
+The ``sendmsg()`` system call allows a more compact argument interface, and the
+message buffer to be specified as a scatter-gather list. At present no ancillary
+message types (used for the ``msg_control`` data passed to ``sendmsg()``) are
+defined.
+
+Transmitting a message on an unconnected socket with ``MCTP_TAG_OWNER``
+specified will cause an allocation of a tag, if no valid tag is already
+allocated for that destination. The (destination-eid,tag) tuple acts as an
+implicit local socket address, to allow the socket to receive responses to this
+outgoing message. If any previous allocation has been performed (to for a
+different remote EID), that allocation is lost.
+
+Sockets will only receive responses to requests they have sent (with TO=1) and
+may only respond (with TO=0) to requests they have received.
+
+``recvfrom()``, ``recvmsg()``, ``recv()`` : receive an MCTP message
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An MCTP message can be received by an application using one of the
+``recvfrom()``, ``recvmsg()``, or ``recv()`` system calls. Using ``recvfrom()``
+as the primary example:
+
+.. code-block:: C
+
+    struct sockaddr_mctp addr;
+    socklen_t addrlen;
+    char buf[14];
+    ssize_t len;
+
+    addrlen = sizeof(addr);
+
+    len = recvfrom(sd, buf, sizeof(buf), 0,
+                    (struct sockaddr_mctp *)&addr, &addrlen);
+
+    /* We can expect addr to describe an MCTP address */
+    assert(addrlen >= sizeof(buf));
+    assert(addr.smctp_family == AF_MCTP);
+
+    printf("received %zd bytes from remote EID %d\n", rc, addr.smctp_addr);
+
+The address argument to ``recvfrom`` and ``recvmsg`` is populated with the
+remote address of the incoming message, including tag value (this will be needed
+in order to reply to the message).
+
+The first byte of the message buffer will contain the message type byte. If an
+integrity check follows the message, it will be included in the received buffer.
+
+The ``recv()`` system call behaves in a similar way, but does not provide a
+remote address to the application. Therefore, these are only useful if the
+remote address is already known, or the message does not require a reply.
+
+Like the send calls, sockets will only receive responses to requests they have
+sent (TO=1) and may only respond (TO=0) to requests they have received.
index 76d939e..b0d4da7 100644 (file)
@@ -45,3 +45,15 @@ allow_join_initial_addr_port - BOOLEAN
        This is a per-namespace sysctl.
 
        Default: 1
+
+stale_loss_cnt - INTEGER
+       The number of MPTCP-level retransmission intervals with no traffic and
+       pending outstanding data on a given subflow required to declare it stale.
+       The packet scheduler ignores stale subflows.
+       A low stale_loss_cnt  value allows for fast active-backup switch-over,
+       an high value maximize links utilization on edge scenarios e.g. lossy
+       link with high BER or peer pausing the data processing.
+
+       This is a per-namespace sysctl.
+
+       Default: 4
index 17bdcb7..9e4cccb 100644 (file)
@@ -222,6 +222,35 @@ ndo_do_ioctl:
        Synchronization: rtnl_lock() semaphore.
        Context: process
 
+        This is only called by network subsystems internally,
+        not by user space calling ioctl as it was in before
+        linux-5.14.
+
+ndo_siocbond:
+        Synchronization: rtnl_lock() semaphore.
+        Context: process
+
+        Used by the bonding driver for the SIOCBOND family of
+        ioctl commands.
+
+ndo_siocwandev:
+       Synchronization: rtnl_lock() semaphore.
+       Context: process
+
+       Used by the drivers/net/wan framework to handle
+       the SIOCWANDEV ioctl with the if_settings structure.
+
+ndo_siocdevprivate:
+       Synchronization: rtnl_lock() semaphore.
+       Context: process
+
+       This is used to implement SIOCDEVPRIVATE ioctl helpers.
+       These should not be added to new drivers, so don't use.
+
+ndo_eth_ioctl:
+       Synchronization: rtnl_lock() semaphore.
+       Context: process
+
 ndo_get_stats:
        Synchronization: rtnl_lock() semaphore, dev_base_lock rwlock, or RCU.
        Context: atomic (can't sleep under rwlock or RCU)
index 024d784..34ca762 100644 (file)
@@ -184,6 +184,13 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds)
        This extended timeout will be used in case there is an GRE stream
        detected.
 
+nf_hooks_lwtunnel - BOOLEAN
+       - 0 - disabled (default)
+       - not 0 - enabled
+
+       If this option is enabled, the lightweight tunnel netfilter hooks are
+       enabled. This option cannot be disabled once it is enabled.
+
 nf_flowtable_tcp_timeout - INTEGER (seconds)
         default 30
 
index 7afa1c9..1225f0f 100644 (file)
@@ -248,26 +248,24 @@ Usage:::
 
   -i : ($DEV)       output interface/device (required)
   -s : ($PKT_SIZE)  packet size
-  -d : ($DEST_IP)   destination IP
+  -d : ($DEST_IP)   destination IP. CIDR (e.g. 198.18.0.0/15) is also allowed
   -m : ($DST_MAC)   destination MAC-addr
+  -p : ($DST_PORT)  destination PORT range (e.g. 433-444) is also allowed
   -t : ($THREADS)   threads to start
+  -f : ($F_THREAD)  index of first thread (zero indexed CPU number)
   -c : ($SKB_CLONE) SKB clones send before alloc new SKB
+  -n : ($COUNT)     num messages to send per thread, 0 means indefinitely
   -b : ($BURST)     HW level bursting of SKBs
   -v : ($VERBOSE)   verbose
   -x : ($DEBUG)     debug
+  -6 : ($IP6)       IPv6
+  -w : ($DELAY)     Tx Delay value (ns)
+  -a : ($APPEND)    Script will not reset generator's state, but will append its config
 
 The global variables being set are also listed.  E.g. the required
 interface/device parameter "-i" sets variable $DEV.  Copy the
 pktgen_sampleXX scripts and modify them to fit your own needs.
 
-The old scripts::
-
-    pktgen.conf-1-2                  # 1 CPU 2 dev
-    pktgen.conf-1-1-rdos             # 1 CPU 1 dev w. route DoS
-    pktgen.conf-1-1-ip6              # 1 CPU 1 dev ipv6
-    pktgen.conf-1-1-ip6-rdos         # 1 CPU 1 dev ipv6  w. route DoS
-    pktgen.conf-1-1-flows            # 1 CPU 1 dev multiple flows.
-
 
 Interrupt affinity
 ===================
@@ -398,7 +396,7 @@ Current commands and configuration options
 References:
 
 - ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/
-- tp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/
+- ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/
 
 Paper from Linux-Kongress in Erlangen 2004.
 - ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/pktgen_paper.pdf
index 7db3985..a722eb3 100644 (file)
@@ -625,7 +625,7 @@ interfaces of a DSA switch to share the same PHC.
 By design, PTP timestamping with a DSA switch does not need any special
 handling in the driver for the host port it is attached to.  However, when the
 host port also supports PTP timestamping, DSA will take care of intercepting
-the ``.ndo_do_ioctl`` calls towards the host port, and block attempts to enable
+the ``.ndo_eth_ioctl`` calls towards the host port, and block attempts to enable
 hardware timestamping on it. This is because the SO_TIMESTAMPING API does not
 allow the delivery of multiple hardware timestamps for the same packet, so
 anybody else except for the DSA switch port must be prevented from doing so.
@@ -688,7 +688,7 @@ ethtool ioctl operations for them need to be mediated by their respective MAC
 driver.  Therefore, as opposed to DSA switches, modifications need to be done
 to each individual MAC driver for PHY timestamping support. This entails:
 
-- Checking, in ``.ndo_do_ioctl``, whether ``phy_has_hwtstamp(netdev->phydev)``
+- Checking, in ``.ndo_eth_ioctl``, whether ``phy_has_hwtstamp(netdev->phydev)``
   is true or not. If it is, then the MAC driver should not process this request
   but instead pass it on to the PHY using ``phy_mii_ioctl()``.
 
@@ -747,7 +747,7 @@ For example, a typical driver design for TX timestamping might be to split the
 transmission part into 2 portions:
 
 1. "TX": checks whether PTP timestamping has been previously enabled through
-   the ``.ndo_do_ioctl`` ("``priv->hwtstamp_tx_enabled == true``") and the
+   the ``.ndo_eth_ioctl`` ("``priv->hwtstamp_tx_enabled == true``") and the
    current skb requires a TX timestamp ("``skb_shinfo(skb)->tx_flags &
    SKBTX_HW_TSTAMP``"). If this is true, it sets the
    "``skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS``" flag. Note: as
index 0dde145..0a9a6f9 100644 (file)
@@ -144,6 +144,19 @@ default VRF are only handled by a socket not bound to any VRF::
 netfilter rules on the VRF device can be used to limit access to services
 running in the default VRF context as well.
 
+Using VRF-aware applications (applications which simultaneously create sockets
+outside and inside VRFs) in conjunction with ``net.ipv4.tcp_l3mdev_accept=1``
+is possible but may lead to problems in some situations. With that sysctl
+value, it is unspecified which listening socket will be selected to handle
+connections for VRF traffic; ie. either a socket bound to the VRF or an unbound
+socket may be used to accept new connections from a VRF. This somewhat
+unexpected behavior can lead to problems if sockets are configured with extra
+options (ex. TCP MD5 keys) with the expectation that VRF traffic will
+exclusively be handled by sockets bound to VRFs, as would be the case with
+``net.ipv4.tcp_l3mdev_accept=0``. Finally and as a reminder, regardless of
+which listening socket is selected, established sockets will be created in the
+VRF based on the ingress interface, as documented earlier.
+
 --------------------------------------------------------------------------------
 
 Using iproute2 for VRFs
index d7b4f32..2f12abc 100644 (file)
@@ -3197,7 +3197,7 @@ S:        Maintained
 W:     https://www.open-mesh.org/
 Q:     https://patchwork.open-mesh.org/project/batman/list/
 B:     https://www.open-mesh.org/projects/batman-adv/issues
-C:     irc://chat.freenode.net/batman
+C:     ircs://irc.hackint.org/batadv
 T:     git https://git.open-mesh.org/linux-merge.git
 F:     Documentation/networking/batman-adv.rst
 F:     include/uapi/linux/batadv_packet.h
@@ -3409,7 +3409,6 @@ F:        drivers/net/ethernet/netronome/nfp/bpf/
 
 BPF JIT for POWERPC (32-BIT AND 64-BIT)
 M:     Naveen N. Rao <naveen.n.rao@linux.ibm.com>
-M:     Sandipan Das <sandipan@linux.ibm.com>
 L:     netdev@vger.kernel.org
 L:     bpf@vger.kernel.org
 S:     Maintained
@@ -5694,6 +5693,7 @@ DPAA2 ETHERNET SWITCH DRIVER
 M:     Ioana Ciornei <ioana.ciornei@nxp.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
+F:     Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst
 F:     drivers/net/ethernet/freescale/dpaa2/dpaa2-switch*
 F:     drivers/net/ethernet/freescale/dpaa2/dpsw*
 
@@ -6914,6 +6914,12 @@ M:       Mark Einon <mark.einon@gmail.com>
 S:     Odd Fixes
 F:     drivers/net/ethernet/agere/
 
+ETAS ES58X CAN/USB DRIVER
+M:     Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+L:     linux-can@vger.kernel.org
+S:     Maintained
+F:     drivers/net/can/usb/etas_es58x/
+
 ETHERNET BRIDGE
 M:     Roopa Prabhu <roopa@nvidia.com>
 M:     Nikolay Aleksandrov <nikolay@nvidia.com>
@@ -9759,11 +9765,6 @@ M:       David Sterba <dsterba@suse.com>
 S:     Odd Fixes
 F:     drivers/tty/ipwireless/
 
-IPX NETWORK LAYER
-L:     netdev@vger.kernel.org
-S:     Obsolete
-F:     include/uapi/linux/ipx.h
-
 IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
 M:     Marc Zyngier <maz@kernel.org>
 S:     Maintained
@@ -10398,6 +10399,7 @@ F:      net/core/skmsg.c
 F:     net/core/sock_map.c
 F:     net/ipv4/tcp_bpf.c
 F:     net/ipv4/udp_bpf.c
+F:     net/unix/unix_bpf.c
 
 LANDLOCK SECURITY MODULE
 M:     Mickaël Salaün <mic@digikod.net>
@@ -11040,6 +11042,18 @@ F:     drivers/mailbox/arm_mhuv2.c
 F:     include/linux/mailbox/arm_mhuv2_message.h
 F:     Documentation/devicetree/bindings/mailbox/arm,mhuv2.yaml
 
+MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP)
+M:     Jeremy Kerr <jk@codeconstruct.com.au>
+M:     Matt Johnston <matt@codeconstruct.com.au>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     Documentation/networking/mctp.rst
+F:     drivers/net/mctp/
+F:     include/net/mctp.h
+F:     include/net/mctpdevice.h
+F:     include/net/netns/mctp.h
+F:     net/mctp/
+
 MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
 M:     Michael Kerrisk <mtk.manpages@gmail.com>
 L:     linux-man@vger.kernel.org
@@ -11337,6 +11351,12 @@ W:     https://linuxtv.org
 T:     git git://linuxtv.org/media_tree.git
 F:     drivers/media/radio/radio-maxiradio*
 
+MAXLINEAR ETHERNET PHY DRIVER
+M:     Xu Liang <lxu@maxlinear.com>
+L:     netdev@vger.kernel.org
+S:     Supported
+F:     drivers/net/phy/mxl-gpy.c
+
 MCBA MICROCHIP CAN BUS ANALYZER TOOL DRIVER
 R:     Yasushi SHOJI <yashi@spacecubics.com>
 L:     linux-can@vger.kernel.org
@@ -13880,6 +13900,12 @@ F:     Documentation/devicetree/
 F:     arch/*/boot/dts/
 F:     include/dt-bindings/
 
+OPENCOMPUTE PTP CLOCK DRIVER
+M:     Jonathan Lemon <jonathan.lemon@gmail.com>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     drivers/ptp/ptp_ocp.c
+
 OPENCORES I2C BUS DRIVER
 M:     Peter Korsgaard <peter@korsgaard.com>
 M:     Andrew Lunn <andrew@lunn.ch>
@@ -14943,13 +14969,6 @@ S:     Maintained
 F:     include/linux/printk.h
 F:     kernel/printk/
 
-PRISM54 WIRELESS DRIVER
-M:     Luis Chamberlain <mcgrof@kernel.org>
-L:     linux-wireless@vger.kernel.org
-S:     Obsolete
-W:     https://wireless.wiki.kernel.org/en/users/Drivers/p54
-F:     drivers/net/wireless/intersil/prism54/
-
 PROC FILESYSTEM
 L:     linux-kernel@vger.kernel.org
 L:     linux-fsdevel@vger.kernel.org
index 6b3daba..1dd9baf 100644 (file)
 
 #define SO_NETNS_COOKIE                71
 
+#define SO_BUF_LOCK            72
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
index 98ccc81..8e41c8b 100644 (file)
                                status = "disabled";
                        };
 
-                       fec: fec@50038000 {
+                       fec: ethernet@50038000 {
                                compatible = "fsl,imx35-fec", "fsl,imx27-fec";
                                reg = <0x50038000 0x4000>;
                                clocks = <&clks 46>, <&clks 8>;
index 52e3567..225cf6b 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_enet_novena>;
        phy-mode = "rgmii";
+       phy-handle = <&ethphy>;
        phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>;
-       rxc-skew-ps = <3000>;
-       rxdv-skew-ps = <0>;
-       txc-skew-ps = <3000>;
-       txen-skew-ps = <0>;
-       rxd0-skew-ps = <0>;
-       rxd1-skew-ps = <0>;
-       rxd2-skew-ps = <0>;
-       rxd3-skew-ps = <0>;
-       txd0-skew-ps = <3000>;
-       txd1-skew-ps = <3000>;
-       txd2-skew-ps = <3000>;
-       txd3-skew-ps = <3000>;
        status = "okay";
+
+       mdio {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               ethphy: ethernet-phy {
+                       compatible = "ethernet-phy-ieee802.3-c22";
+                       rxc-skew-ps = <3000>;
+                       rxdv-skew-ps = <0>;
+                       txc-skew-ps = <3000>;
+                       txen-skew-ps = <0>;
+                       rxd0-skew-ps = <0>;
+                       rxd1-skew-ps = <0>;
+                       rxd2-skew-ps = <0>;
+                       rxd3-skew-ps = <0>;
+                       txd0-skew-ps = <3000>;
+                       txd1-skew-ps = <3000>;
+                       txd2-skew-ps = <3000>;
+                       txd3-skew-ps = <3000>;
+               };
+       };
 };
 
 &hdmi {
index ead7ba2..563bf9d 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_enet>;
        phy-mode = "rgmii";
+       phy-handle = <&ethphy>;
        phy-reset-gpios = <&gpio7 18 GPIO_ACTIVE_LOW>;
-       txd0-skew-ps = <0>;
-       txd1-skew-ps = <0>;
-       txd2-skew-ps = <0>;
-       txd3-skew-ps = <0>;
        status = "okay";
+
+       mdio {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               ethphy: ethernet-phy {
+                       compatible = "ethernet-phy-ieee802.3-c22";
+                       txd0-skew-ps = <0>;
+                       txd1-skew-ps = <0>;
+                       txd2-skew-ps = <0>;
+                       txd3-skew-ps = <0>;
+               };
+       };
 };
 
 &gpmi {
index d526f01..ac34709 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_enet>;
        phy-mode = "rgmii";
+       phy-handle = <&ethphy>;
        phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
-       txen-skew-ps = <0>;
-       txc-skew-ps = <3000>;
-       rxdv-skew-ps = <0>;
-       rxc-skew-ps = <3000>;
-       rxd0-skew-ps = <0>;
-       rxd1-skew-ps = <0>;
-       rxd2-skew-ps = <0>;
-       rxd3-skew-ps = <0>;
-       txd0-skew-ps = <0>;
-       txd1-skew-ps = <0>;
-       txd2-skew-ps = <0>;
-       txd3-skew-ps = <0>;
        interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>,
                              <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>;
        fsl,err006687-workaround-present;
        status = "okay";
+
+       mdio {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               ethphy: ethernet-phy {
+                       compatible = "ethernet-phy-ieee802.3-c22";
+                       txen-skew-ps = <0>;
+                       txc-skew-ps = <3000>;
+                       rxdv-skew-ps = <0>;
+                       rxc-skew-ps = <3000>;
+                       rxd0-skew-ps = <0>;
+                       rxd1-skew-ps = <0>;
+                       rxd2-skew-ps = <0>;
+                       rxd3-skew-ps = <0>;
+                       txd0-skew-ps = <0>;
+                       txd1-skew-ps = <0>;
+                       txd2-skew-ps = <0>;
+                       txd3-skew-ps = <0>;
+               };
+       };
 };
 
 &hdmi {
index a091782..c96f4d7 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_enet>;
        phy-mode = "rgmii";
+       phy-handle = <&ethphy>;
        phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
-       txen-skew-ps = <0>;
-       txc-skew-ps = <3000>;
-       rxdv-skew-ps = <0>;
-       rxc-skew-ps = <3000>;
-       rxd0-skew-ps = <0>;
-       rxd1-skew-ps = <0>;
-       rxd2-skew-ps = <0>;
-       rxd3-skew-ps = <0>;
-       txd0-skew-ps = <0>;
-       txd1-skew-ps = <0>;
-       txd2-skew-ps = <0>;
-       txd3-skew-ps = <0>;
        interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>,
                              <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>;
        fsl,err006687-workaround-present;
        status = "okay";
+
+       mdio {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               ethphy: ethernet-phy {
+                       compatible = "ethernet-phy-ieee802.3-c22";
+                       txen-skew-ps = <0>;
+                       txc-skew-ps = <3000>;
+                       rxdv-skew-ps = <0>;
+                       rxc-skew-ps = <3000>;
+                       rxd0-skew-ps = <0>;
+                       rxd1-skew-ps = <0>;
+                       rxd2-skew-ps = <0>;
+                       rxd3-skew-ps = <0>;
+                       txd0-skew-ps = <0>;
+                       txd1-skew-ps = <0>;
+                       txd2-skew-ps = <0>;
+                       txd3-skew-ps = <0>;
+               };
+       };
 };
 
 &hdmi {
index 1243677..49da30d 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_enet>;
        phy-mode = "rgmii";
+       phy-handle = <&ethphy>;
        phy-reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
-       txen-skew-ps = <0>;
-       txc-skew-ps = <3000>;
-       rxdv-skew-ps = <0>;
-       rxc-skew-ps = <3000>;
-       rxd0-skew-ps = <0>;
-       rxd1-skew-ps = <0>;
-       rxd2-skew-ps = <0>;
-       rxd3-skew-ps = <0>;
-       txd0-skew-ps = <0>;
-       txd1-skew-ps = <0>;
-       txd2-skew-ps = <0>;
-       txd3-skew-ps = <0>;
        interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_HIGH>,
                              <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>;
        fsl,err006687-workaround-present;
        status = "okay";
+
+       mdio {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               ethphy: ethernet-phy {
+                       compatible = "ethernet-phy-ieee802.3-c22";
+                       txen-skew-ps = <0>;
+                       txc-skew-ps = <3000>;
+                       rxdv-skew-ps = <0>;
+                       rxc-skew-ps = <3000>;
+                       rxd0-skew-ps = <0>;
+                       rxd1-skew-ps = <0>;
+                       rxd2-skew-ps = <0>;
+                       rxd3-skew-ps = <0>;
+                       txd0-skew-ps = <0>;
+                       txd1-skew-ps = <0>;
+                       txd2-skew-ps = <0>;
+                       txd3-skew-ps = <0>;
+               };
+       };
 };
 
 &hdmi {
index fdc3aa9..eb9a0b1 100644 (file)
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_enet>;
        phy-mode = "rgmii";
+       phy-handle = <&ethphy>;
        phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>;
-       txen-skew-ps = <0>;
-       txc-skew-ps = <3000>;
-       rxdv-skew-ps = <0>;
-       rxc-skew-ps = <3000>;
-       rxd0-skew-ps = <0>;
-       rxd1-skew-ps = <0>;
-       rxd2-skew-ps = <0>;
-       rxd3-skew-ps = <0>;
-       txd0-skew-ps = <0>;
-       txd1-skew-ps = <0>;
-       txd2-skew-ps = <0>;
-       txd3-skew-ps = <0>;
        status = "okay";
+
+       mdio {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               ethphy: ethernet-phy {
+                       compatible = "ethernet-phy-ieee802.3-c22";
+                       txen-skew-ps = <0>;
+                       txc-skew-ps = <3000>;
+                       rxdv-skew-ps = <0>;
+                       rxc-skew-ps = <3000>;
+                       rxd0-skew-ps = <0>;
+                       rxd1-skew-ps = <0>;
+                       rxd2-skew-ps = <0>;
+                       rxd3-skew-ps = <0>;
+                       txd0-skew-ps = <0>;
+                       txd1-skew-ps = <0>;
+                       txd2-skew-ps = <0>;
+                       txd3-skew-ps = <0>;
+               };
+       };
 };
 
 &hdmi {
index c6d1c63..5e6bef2 100644 (file)
        phy-mode = "rgmii-id";
        phy-reset-gpios = <&gpio7 15 GPIO_ACTIVE_LOW>;
        phy-reset-duration = <1>;
-       phy-reset-delay = <1>;
        phy-supply = <&reg_fec1_pwdn>;
        phy-handle = <&ethphy1_0>;
        fsl,magic-packet;
index 23856a8..36ef6a3 100644 (file)
@@ -23,7 +23,6 @@
        phy-mode = "rgmii-id";
        phy-reset-gpios = <&gpio2 28 GPIO_ACTIVE_LOW>;
        phy-reset-duration = <1>;
-       phy-reset-delay = <1>;
        phy-supply = <&reg_fec2_pwdn>;
        phy-handle = <&ethphy2_0>;
        fsl,magic-packet;
index b5eadd7..cdc720f 100644 (file)
@@ -268,9 +268,23 @@ static struct platform_device ixp46x_i2c_controller = {
        .resource       = ixp46x_i2c_resources
 };
 
+static struct resource ixp46x_ptp_resources[] = {
+       DEFINE_RES_MEM(IXP4XX_TIMESYNC_BASE_PHYS, SZ_4K),
+       DEFINE_RES_IRQ_NAMED(IRQ_IXP4XX_GPIO8, "master"),
+       DEFINE_RES_IRQ_NAMED(IRQ_IXP4XX_GPIO7, "slave"),
+};
+
+static struct platform_device ixp46x_ptp = {
+       .name           = "ptp-ixp46x",
+       .id             = -1,
+       .resource       = ixp46x_ptp_resources,
+       .num_resources  = ARRAY_SIZE(ixp46x_ptp_resources),
+};
+
 static struct platform_device *ixp46x_devices[] __initdata = {
        &ixp46x_hwrandom_device,
        &ixp46x_i2c_controller,
+       &ixp46x_ptp,
 };
 
 unsigned long ixp4xx_exp_bus_size;
index e7648c3..1608a48 100644 (file)
                        };
 
                        fec1: ethernet@30be0000 {
-                               compatible = "fsl,imx8mm-fec", "fsl,imx6sx-fec";
+                               compatible = "fsl,imx8mm-fec", "fsl,imx8mq-fec", "fsl,imx6sx-fec";
                                reg = <0x30be0000 0x10000>;
                                interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
                                             <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
index d4231e0..e6de293 100644 (file)
                        };
 
                        fec1: ethernet@30be0000 {
-                               compatible = "fsl,imx8mn-fec", "fsl,imx6sx-fec";
+                               compatible = "fsl,imx8mn-fec", "fsl,imx8mq-fec", "fsl,imx6sx-fec";
                                reg = <0x30be0000 0x10000>;
                                interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
                                             <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
index f5f5895..46da21a 100644 (file)
@@ -17,9 +17,9 @@
 };
 
 &fec1 {
-       compatible = "fsl,imx8qxp-fec", "fsl,imx6sx-fec";
+       compatible = "fsl,imx8qxp-fec", "fsl,imx8qm-fec", "fsl,imx6sx-fec";
 };
 
 &fec2 {
-       compatible = "fsl,imx8qxp-fec", "fsl,imx6sx-fec";
+       compatible = "fsl,imx8qxp-fec", "fsl,imx8qm-fec", "fsl,imx6sx-fec";
 };
index ad07fff..787ebce 100644 (file)
                                <0x6 0x10004000 0x7fc000>,
                                <0x6 0x11010000 0xaf0000>;
                        reg-names = "cpu", "dev", "gcb";
-                       interrupt-names = "xtr";
-                       interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
+                       interrupt-names = "xtr", "fdma";
+                       interrupts =    <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>,
+                                       <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>;
                        resets = <&reset 0>;
                        reset-names = "switch";
                };
index 23a9fb7..79c1a75 100644 (file)
@@ -5,6 +5,9 @@
 #ifndef __ASM_COMPAT_H
 #define __ASM_COMPAT_H
 
+#define compat_mode_t compat_mode_t
+typedef u16            compat_mode_t;
+
 #include <asm-generic/compat.h>
 
 #ifdef CONFIG_COMPAT
@@ -27,13 +30,9 @@ typedef u16          __compat_uid_t;
 typedef u16            __compat_gid_t;
 typedef u16            __compat_uid16_t;
 typedef u16            __compat_gid16_t;
-typedef u32            __compat_uid32_t;
-typedef u32            __compat_gid32_t;
-typedef u16            compat_mode_t;
 typedef u32            compat_dev_t;
 typedef s32            compat_nlink_t;
 typedef u16            compat_ipc_pid_t;
-typedef u32            compat_caddr_t;
 typedef __kernel_fsid_t        compat_fsid_t;
 
 struct compat_stat {
@@ -103,13 +102,6 @@ struct compat_statfs {
 
 #define COMPAT_RLIM_INFINITY           0xffffffff
 
-typedef u32            compat_old_sigset_t;
-
-#define _COMPAT_NSIG           64
-#define _COMPAT_NSIG_BPW       32
-
-typedef u32            compat_sigset_word;
-
 #define COMPAT_OFF_T_MAX       0x7fffffff
 
 #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
index 6597571..53f015a 100644 (file)
@@ -9,20 +9,25 @@
 #include <asm/page.h>
 #include <asm/ptrace.h>
 
+typedef s32            __compat_uid_t;
+typedef s32            __compat_gid_t;
+typedef __compat_uid_t __compat_uid32_t;
+typedef __compat_gid_t __compat_gid32_t;
+#define __compat_uid32_t __compat_uid32_t
+#define __compat_gid32_t __compat_gid32_t
+
+#define _COMPAT_NSIG           128             /* Don't ask !$@#% ...  */
+#define _COMPAT_NSIG_BPW       32
+typedef u32            compat_sigset_word;
+
 #include <asm-generic/compat.h>
 
 #define COMPAT_USER_HZ         100
 #define COMPAT_UTS_MACHINE     "mips\0\0\0"
 
-typedef s32            __compat_uid_t;
-typedef s32            __compat_gid_t;
-typedef __compat_uid_t __compat_uid32_t;
-typedef __compat_gid_t __compat_gid32_t;
-typedef u32            compat_mode_t;
 typedef u32            compat_dev_t;
 typedef u32            compat_nlink_t;
 typedef s32            compat_ipc_pid_t;
-typedef s32            compat_caddr_t;
 typedef struct {
        s32     val[2];
 } compat_fsid_t;
@@ -89,13 +94,6 @@ struct compat_statfs {
 
 #define COMPAT_RLIM_INFINITY   0x7fffffffUL
 
-typedef u32            compat_old_sigset_t;    /* at least 32 bits */
-
-#define _COMPAT_NSIG           128             /* Don't ask !$@#% ...  */
-#define _COMPAT_NSIG_BPW       32
-
-typedef u32            compat_sigset_word;
-
 #define COMPAT_OFF_T_MAX       0x7fffffff
 
 static inline void __user *arch_compat_alloc_user_space(long len)
index cdf404a..1eaf6a1 100644 (file)
 
 #define SO_NETNS_COOKIE                71
 
+#define SO_BUF_LOCK            72
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
index 1a609d3..b5d90e8 100644 (file)
@@ -8,6 +8,9 @@
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 
+#define compat_mode_t compat_mode_t
+typedef u16    compat_mode_t;
+
 #include <asm-generic/compat.h>
 
 #define COMPAT_USER_HZ                 100
 
 typedef u32    __compat_uid_t;
 typedef u32    __compat_gid_t;
-typedef u32    __compat_uid32_t;
-typedef u32    __compat_gid32_t;
-typedef u16    compat_mode_t;
 typedef u32    compat_dev_t;
 typedef u16    compat_nlink_t;
 typedef u16    compat_ipc_pid_t;
-typedef u32    compat_caddr_t;
 
 struct compat_stat {
        compat_dev_t            st_dev; /* dev_t is 32 bits on parisc */
@@ -96,13 +95,6 @@ struct compat_sigcontext {
 
 #define COMPAT_RLIM_INFINITY 0xffffffff
 
-typedef u32            compat_old_sigset_t;    /* at least 32 bits */
-
-#define _COMPAT_NSIG           64
-#define _COMPAT_NSIG_BPW       32
-
-typedef u32            compat_sigset_word;
-
 #define COMPAT_OFF_T_MAX       0x7fffffff
 
 struct compat_ipc64_perm {
index 5b5351c..8baaad5 100644 (file)
 
 #define SO_NETNS_COOKIE                0x4045
 
+#define SO_BUF_LOCK            0x4046
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
index 9191fc2..e33dcf1 100644 (file)
 
 typedef u32            __compat_uid_t;
 typedef u32            __compat_gid_t;
-typedef u32            __compat_uid32_t;
-typedef u32            __compat_gid32_t;
-typedef u32            compat_mode_t;
 typedef u32            compat_dev_t;
 typedef s16            compat_nlink_t;
 typedef u16            compat_ipc_pid_t;
-typedef u32            compat_caddr_t;
 typedef __kernel_fsid_t        compat_fsid_t;
 
 struct compat_stat {
@@ -85,13 +81,6 @@ struct compat_statfs {
 
 #define COMPAT_RLIM_INFINITY           0xffffffff
 
-typedef u32            compat_old_sigset_t;
-
-#define _COMPAT_NSIG           64
-#define _COMPAT_NSIG_BPW       32
-
-typedef u32            compat_sigset_word;
-
 #define COMPAT_OFF_T_MAX       0x7fffffff
 
 static inline void __user *arch_compat_alloc_user_space(long len)
index 20f169b..36dbf50 100644 (file)
@@ -53,8 +53,6 @@ extern int  ccwgroup_driver_register   (struct ccwgroup_driver *cdriver);
 extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver);
 int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv,
                        int num_devices, const char *buf);
-struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv,
-                                                char *bus_id);
 
 extern int ccwgroup_set_online(struct ccwgroup_device *gdev);
 extern int ccwgroup_set_offline(struct ccwgroup_device *gdev);
index ea5b9c3..8d49505 100644 (file)
@@ -9,6 +9,9 @@
 #include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
 
+#define compat_mode_t  compat_mode_t
+typedef u16            compat_mode_t;
+
 #include <asm-generic/compat.h>
 
 #define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \
 
 typedef u16            __compat_uid_t;
 typedef u16            __compat_gid_t;
-typedef u32            __compat_uid32_t;
-typedef u32            __compat_gid32_t;
-typedef u16            compat_mode_t;
 typedef u16            compat_dev_t;
 typedef u16            compat_nlink_t;
 typedef u16            compat_ipc_pid_t;
-typedef u32            compat_caddr_t;
 typedef __kernel_fsid_t        compat_fsid_t;
 
 typedef struct {
@@ -155,13 +154,6 @@ struct compat_statfs64 {
 
 #define COMPAT_RLIM_INFINITY           0xffffffff
 
-typedef u32            compat_old_sigset_t;    /* at least 32 bits */
-
-#define _COMPAT_NSIG           64
-#define _COMPAT_NSIG_BPW       32
-
-typedef u32            compat_sigset_word;
-
 #define COMPAT_OFF_T_MAX       0x7fffffff
 
 /*
index b85842c..8b63410 100644 (file)
@@ -6,6 +6,9 @@
  */
 #include <linux/types.h>
 
+#define compat_mode_t  compat_mode_t
+typedef u16            compat_mode_t;
+
 #include <asm-generic/compat.h>
 
 #define COMPAT_USER_HZ         100
 
 typedef u16            __compat_uid_t;
 typedef u16            __compat_gid_t;
-typedef u32            __compat_uid32_t;
-typedef u32            __compat_gid32_t;
-typedef u16            compat_mode_t;
 typedef u16            compat_dev_t;
 typedef s16            compat_nlink_t;
 typedef u16            compat_ipc_pid_t;
-typedef u32            compat_caddr_t;
 typedef __kernel_fsid_t        compat_fsid_t;
 
 struct compat_stat {
@@ -115,13 +114,6 @@ struct compat_statfs {
 
 #define COMPAT_RLIM_INFINITY 0x7fffffff
 
-typedef u32            compat_old_sigset_t;
-
-#define _COMPAT_NSIG           64
-#define _COMPAT_NSIG_BPW       32
-
-typedef u32            compat_sigset_word;
-
 #define COMPAT_OFF_T_MAX       0x7fffffff
 
 #ifdef CONFIG_COMPAT
index 92675dc..e80ee86 100644 (file)
 
 #define SO_NETNS_COOKIE          0x0050
 
+#define SO_BUF_LOCK              0x0051
+
 #if !defined(__KERNEL__)
 
 
index d27a2a9..cde6db1 100644 (file)
@@ -1488,7 +1488,9 @@ static void vector_get_ethtool_stats(struct net_device *dev,
 }
 
 static int vector_get_coalesce(struct net_device *netdev,
-                                       struct ethtool_coalesce *ec)
+                              struct ethtool_coalesce *ec,
+                              struct kernel_ethtool_coalesce *kernel_coal,
+                              struct netlink_ext_ack *extack)
 {
        struct vector_private *vp = netdev_priv(netdev);
 
@@ -1497,7 +1499,9 @@ static int vector_get_coalesce(struct net_device *netdev,
 }
 
 static int vector_set_coalesce(struct net_device *netdev,
-                                       struct ethtool_coalesce *ec)
+                              struct ethtool_coalesce *ec,
+                              struct kernel_ethtool_coalesce *kernel_coal,
+                              struct netlink_ext_ack *extack)
 {
        struct vector_private *vp = netdev_priv(netdev);
 
index be09c7e..4ae01cd 100644 (file)
@@ -12,6 +12,9 @@
 #include <asm/user32.h>
 #include <asm/unistd.h>
 
+#define compat_mode_t  compat_mode_t
+typedef u16            compat_mode_t;
+
 #include <asm-generic/compat.h>
 
 #define COMPAT_USER_HZ         100
 
 typedef u16            __compat_uid_t;
 typedef u16            __compat_gid_t;
-typedef u32            __compat_uid32_t;
-typedef u32            __compat_gid32_t;
-typedef u16            compat_mode_t;
 typedef u16            compat_dev_t;
 typedef u16            compat_nlink_t;
 typedef u16            compat_ipc_pid_t;
-typedef u32            compat_caddr_t;
 typedef __kernel_fsid_t        compat_fsid_t;
 
 struct compat_stat {
@@ -92,13 +91,6 @@ struct compat_statfs {
 
 #define COMPAT_RLIM_INFINITY           0xffffffff
 
-typedef u32            compat_old_sigset_t;    /* at least 32 bits */
-
-#define _COMPAT_NSIG           64
-#define _COMPAT_NSIG_BPW       32
-
-typedef u32               compat_sigset_word;
-
 #define COMPAT_OFF_T_MAX       0x7fffffff
 
 struct compat_ipc64_perm {
index 6fd8410..2dfb5fe 100644 (file)
@@ -29,6 +29,7 @@ typedef struct {
 #define SA_X32_ABI     0x01000000u
 
 #ifndef CONFIG_COMPAT
+#define compat_sigset_t compat_sigset_t
 typedef sigset_t compat_sigset_t;
 #endif
 
index 16d76f8..0fe6aac 100644 (file)
@@ -1961,6 +1961,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
        if (flags & BPF_TRAMP_F_CALL_ORIG)
                stack_size += 8; /* room for return value of orig_call */
 
+       if (flags & BPF_TRAMP_F_IP_ARG)
+               stack_size += 8; /* room for IP address argument */
+
        if (flags & BPF_TRAMP_F_SKIP_FRAME)
                /* skip patched call instruction and point orig_call to actual
                 * body of the kernel function.
@@ -1974,6 +1977,22 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
        EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
        EMIT1(0x53);             /* push rbx */
 
+       if (flags & BPF_TRAMP_F_IP_ARG) {
+               /* Store IP address of the traced function:
+                * mov rax, QWORD PTR [rbp + 8]
+                * sub rax, X86_PATCH_SIZE
+                * mov QWORD PTR [rbp - stack_size], rax
+                */
+               emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
+               EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE);
+               emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_size);
+
+               /* Continue with stack_size for regs storage, stack will
+                * be correctly restored with 'leave' instruction.
+                */
+               stack_size -= 8;
+       }
+
        save_regs(m, &prog, nr_args, stack_size);
 
        if (flags & BPF_TRAMP_F_CALL_ORIG) {
index 4f2951c..d0e67ec 100644 (file)
@@ -2167,10 +2167,10 @@ static int hrz_open (struct atm_vcc *atm_vcc)
   
   // Part of the job is done by atm_pcr_goal which gives us a PCR
   // specification which says: EITHER grab the maximum available PCR
-  // (and perhaps a lower bound which we musn't pass), OR grab this
+  // (and perhaps a lower bound which we must not pass), OR grab this
   // amount, rounding down if you have to (and perhaps a lower bound
-  // which we musn't pass) OR grab this amount, rounding up if you
-  // have to (and perhaps an upper bound which we musn't pass). If any
+  // which we must not pass) OR grab this amount, rounding up if you
+  // have to (and perhaps an upper bound which we must not pass). If any
   // bounds ARE passed we fail. Note that rounding is only rounding to
   // match device limitations, we do not round down to satisfy
   // bandwidth availability even if this would not violate any given
index 9e4bd75..81ce81a 100644 (file)
@@ -3536,7 +3536,7 @@ static int idt77252_preset(struct idt77252_dev *card)
                return -1;
        }
        if (!(pci_command & PCI_COMMAND_IO)) {
-               printk("%s: PCI_COMMAND: %04x (???)\n",
+               printk("%s: PCI_COMMAND: %04x (?)\n",
                       card->name, pci_command);
                deinit_card(card);
                return (-1);
index 6535614..1df2b58 100644 (file)
@@ -236,6 +236,7 @@ EXPORT_SYMBOL(bcma_core_irq);
 
 void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core)
 {
+       device_initialize(&core->dev);
        core->dev.release = bcma_release_core_dev;
        core->dev.bus = &bcma_bus_type;
        dev_set_name(&core->dev, "bcma%d:%d", bus->num, core->core_index);
@@ -277,11 +278,10 @@ static void bcma_register_core(struct bcma_bus *bus, struct bcma_device *core)
 {
        int err;
 
-       err = device_register(&core->dev);
+       err = device_add(&core->dev);
        if (err) {
                bcma_err(bus, "Could not register dev for core 0x%03X\n",
                         core->id.id);
-               put_device(&core->dev);
                return;
        }
        core->dev_registered = true;
@@ -372,7 +372,7 @@ void bcma_unregister_cores(struct bcma_bus *bus)
        /* Now noone uses internally-handled cores, we can free them */
        list_for_each_entry_safe(core, tmp, &bus->cores, list) {
                list_del(&core->list);
-               kfree(core);
+               put_device(&core->dev);
        }
 }
 
index d49e7c0..26d12a7 100644 (file)
@@ -141,8 +141,7 @@ static const char *bcma_device_name(const struct bcma_device_id *id)
        return "UNKNOWN";
 }
 
-static u32 bcma_scan_read32(struct bcma_bus *bus, u8 current_coreidx,
-                      u16 offset)
+static u32 bcma_scan_read32(struct bcma_bus *bus, u16 offset)
 {
        return readl(bus->mmio + offset);
 }
@@ -443,7 +442,7 @@ void bcma_detect_chip(struct bcma_bus *bus)
 
        bcma_scan_switch_core(bus, BCMA_ADDR_BASE);
 
-       tmp = bcma_scan_read32(bus, 0, BCMA_CC_ID);
+       tmp = bcma_scan_read32(bus, BCMA_CC_ID);
        chipinfo->id = (tmp & BCMA_CC_ID_ID) >> BCMA_CC_ID_ID_SHIFT;
        chipinfo->rev = (tmp & BCMA_CC_ID_REV) >> BCMA_CC_ID_REV_SHIFT;
        chipinfo->pkg = (tmp & BCMA_CC_ID_PKG) >> BCMA_CC_ID_PKG_SHIFT;
@@ -465,7 +464,7 @@ int bcma_bus_scan(struct bcma_bus *bus)
        if (bus->nr_cores)
                return 0;
 
-       erombase = bcma_scan_read32(bus, 0, BCMA_CC_EROM);
+       erombase = bcma_scan_read32(bus, BCMA_CC_EROM);
        if (bus->hosttype == BCMA_HOSTTYPE_SOC) {
                eromptr = ioremap(erombase, BCMA_CORE_SIZE);
                if (!eromptr)
index e5d706e..e4182ac 100644 (file)
@@ -387,6 +387,7 @@ struct bcm_subver_table {
 };
 
 static const struct bcm_subver_table bcm_uart_subver_table[] = {
+       { 0x1111, "BCM4362A2"   },      /* 000.017.017 */
        { 0x4103, "BCM4330B1"   },      /* 002.001.003 */
        { 0x410d, "BCM4334B0"   },      /* 002.001.013 */
        { 0x410e, "BCM43341B0"  },      /* 002.001.014 */
index e44b699..f1705b4 100644 (file)
@@ -131,6 +131,26 @@ int btintel_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
 }
 EXPORT_SYMBOL_GPL(btintel_set_bdaddr);
 
+static int btintel_set_event_mask(struct hci_dev *hdev, bool debug)
+{
+       u8 mask[8] = { 0x87, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+       struct sk_buff *skb;
+       int err;
+
+       if (debug)
+               mask[1] |= 0x62;
+
+       skb = __hci_cmd_sync(hdev, 0xfc52, 8, mask, HCI_INIT_TIMEOUT);
+       if (IS_ERR(skb)) {
+               err = PTR_ERR(skb);
+               bt_dev_err(hdev, "Setting Intel event mask failed (%d)", err);
+               return err;
+       }
+       kfree_skb(skb);
+
+       return 0;
+}
+
 int btintel_set_diag(struct hci_dev *hdev, bool enable)
 {
        struct sk_buff *skb;
@@ -164,7 +184,7 @@ done:
 }
 EXPORT_SYMBOL_GPL(btintel_set_diag);
 
-int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable)
+static int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable)
 {
        int err, ret;
 
@@ -180,9 +200,25 @@ int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable)
 
        return ret;
 }
-EXPORT_SYMBOL_GPL(btintel_set_diag_mfg);
 
-void btintel_hw_error(struct hci_dev *hdev, u8 code)
+static int btintel_set_diag_combined(struct hci_dev *hdev, bool enable)
+{
+       int ret;
+
+       /* Legacy ROM device needs to be in the manufacturer mode to apply
+        * diagnostic setting
+        *
+        * This flag is set after reading the Intel version.
+        */
+       if (btintel_test_flag(hdev, INTEL_ROM_LEGACY))
+               ret = btintel_set_diag_mfg(hdev, enable);
+       else
+               ret = btintel_set_diag(hdev, enable);
+
+       return ret;
+}
+
+static void btintel_hw_error(struct hci_dev *hdev, u8 code)
 {
        struct sk_buff *skb;
        u8 type = 0x00;
@@ -214,7 +250,6 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code)
 
        kfree_skb(skb);
 }
-EXPORT_SYMBOL_GPL(btintel_hw_error);
 
 int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
 {
@@ -236,6 +271,8 @@ int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
         * compatibility options when newer hardware variants come along.
         */
        switch (ver->hw_variant) {
+       case 0x07:      /* WP - Legacy ROM */
+       case 0x08:      /* StP - Legacy ROM */
        case 0x0b:      /* SfP */
        case 0x0c:      /* WsP */
        case 0x11:      /* JfP */
@@ -250,9 +287,15 @@ int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
        }
 
        switch (ver->fw_variant) {
+       case 0x01:
+               variant = "Legacy ROM 2.5";
+               break;
        case 0x06:
                variant = "Bootloader";
                break;
+       case 0x22:
+               variant = "Legacy ROM 2.x";
+               break;
        case 0x23:
                variant = "Firmware";
                break;
@@ -270,8 +313,8 @@ int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
 }
 EXPORT_SYMBOL_GPL(btintel_version_info);
 
-int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen,
-                       const void *param)
+static int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen,
+                              const void *param)
 {
        while (plen > 0) {
                struct sk_buff *skb;
@@ -293,7 +336,6 @@ int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen,
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(btintel_secure_send);
 
 int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name)
 {
@@ -340,27 +382,6 @@ int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name)
 }
 EXPORT_SYMBOL_GPL(btintel_load_ddc_config);
 
-int btintel_set_event_mask(struct hci_dev *hdev, bool debug)
-{
-       u8 mask[8] = { 0x87, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
-       struct sk_buff *skb;
-       int err;
-
-       if (debug)
-               mask[1] |= 0x62;
-
-       skb = __hci_cmd_sync(hdev, 0xfc52, 8, mask, HCI_INIT_TIMEOUT);
-       if (IS_ERR(skb)) {
-               err = PTR_ERR(skb);
-               bt_dev_err(hdev, "Setting Intel event mask failed (%d)", err);
-               return err;
-       }
-       kfree_skb(skb);
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(btintel_set_event_mask);
-
 int btintel_set_event_mask_mfg(struct hci_dev *hdev, bool debug)
 {
        int err, ret;
@@ -404,7 +425,8 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver)
 }
 EXPORT_SYMBOL_GPL(btintel_read_version);
 
-int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *version)
+static int btintel_version_info_tlv(struct hci_dev *hdev,
+                                   struct intel_version_tlv *version)
 {
        const char *variant;
 
@@ -481,30 +503,11 @@ int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(btintel_version_info_tlv);
 
-int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *version)
+static int btintel_parse_version_tlv(struct hci_dev *hdev,
+                                    struct intel_version_tlv *version,
+                                    struct sk_buff *skb)
 {
-       struct sk_buff *skb;
-       const u8 param[1] = { 0xFF };
-
-       if (!version)
-               return -EINVAL;
-
-       skb = __hci_cmd_sync(hdev, 0xfc05, 1, param, HCI_CMD_TIMEOUT);
-       if (IS_ERR(skb)) {
-               bt_dev_err(hdev, "Reading Intel version information failed (%ld)",
-                          PTR_ERR(skb));
-               return PTR_ERR(skb);
-       }
-
-       if (skb->data[0]) {
-               bt_dev_err(hdev, "Intel Read Version command failed (%02x)",
-                          skb->data[0]);
-               kfree_skb(skb);
-               return -EIO;
-       }
-
        /* Consume Command Complete Status field */
        skb_pull(skb, 1);
 
@@ -516,7 +519,16 @@ int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver
        while (skb->len) {
                struct intel_tlv *tlv;
 
+               /* Make sure skb has a minimum length of the header */
+               if (skb->len < sizeof(*tlv))
+                       return -EINVAL;
+
                tlv = (struct intel_tlv *)skb->data;
+
+               /* Make sure skb has a enough data */
+               if (skb->len < tlv->len + sizeof(*tlv))
+                       return -EINVAL;
+
                switch (tlv->type) {
                case INTEL_TLV_CNVI_TOP:
                        version->cnvi_top = get_unaligned_le32(tlv->val);
@@ -580,7 +592,8 @@ int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver
                        version->sbe_type = tlv->val[0];
                        break;
                case INTEL_TLV_OTP_BDADDR:
-                       memcpy(&version->otp_bd_addr, tlv->val, tlv->len);
+                       memcpy(&version->otp_bd_addr, tlv->val,
+                                                       sizeof(bdaddr_t));
                        break;
                default:
                        /* Ignore rest of information */
@@ -590,10 +603,37 @@ int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver
                skb_pull(skb, tlv->len + sizeof(*tlv));
        }
 
+       return 0;
+}
+
+static int btintel_read_version_tlv(struct hci_dev *hdev,
+                                   struct intel_version_tlv *version)
+{
+       struct sk_buff *skb;
+       const u8 param[1] = { 0xFF };
+
+       if (!version)
+               return -EINVAL;
+
+       skb = __hci_cmd_sync(hdev, 0xfc05, 1, param, HCI_CMD_TIMEOUT);
+       if (IS_ERR(skb)) {
+               bt_dev_err(hdev, "Reading Intel version information failed (%ld)",
+                          PTR_ERR(skb));
+               return PTR_ERR(skb);
+       }
+
+       if (skb->data[0]) {
+               bt_dev_err(hdev, "Intel Read Version command failed (%02x)",
+                          skb->data[0]);
+               kfree_skb(skb);
+               return -EIO;
+       }
+
+       btintel_parse_version_tlv(hdev, version, skb);
+
        kfree_skb(skb);
        return 0;
 }
-EXPORT_SYMBOL_GPL(btintel_read_version_tlv);
 
 /* ------- REGMAP IBT SUPPORT ------- */
 
@@ -1066,10 +1106,10 @@ int btintel_download_firmware(struct hci_dev *hdev,
 }
 EXPORT_SYMBOL_GPL(btintel_download_firmware);
 
-int btintel_download_firmware_newgen(struct hci_dev *hdev,
-                                    struct intel_version_tlv *ver,
-                                    const struct firmware *fw, u32 *boot_param,
-                                    u8 hw_variant, u8 sbe_type)
+static int btintel_download_fw_tlv(struct hci_dev *hdev,
+                                  struct intel_version_tlv *ver,
+                                  const struct firmware *fw, u32 *boot_param,
+                                  u8 hw_variant, u8 sbe_type)
 {
        int err;
        u32 css_header_ver;
@@ -1166,9 +1206,8 @@ int btintel_download_firmware_newgen(struct hci_dev *hdev,
        }
        return 0;
 }
-EXPORT_SYMBOL_GPL(btintel_download_firmware_newgen);
 
-void btintel_reset_to_bootloader(struct hci_dev *hdev)
+static void btintel_reset_to_bootloader(struct hci_dev *hdev)
 {
        struct intel_reset params;
        struct sk_buff *skb;
@@ -1211,10 +1250,9 @@ void btintel_reset_to_bootloader(struct hci_dev *hdev)
         */
        msleep(150);
 }
-EXPORT_SYMBOL_GPL(btintel_reset_to_bootloader);
 
-int btintel_read_debug_features(struct hci_dev *hdev,
-                               struct intel_debug_features *features)
+static int btintel_read_debug_features(struct hci_dev *hdev,
+                                      struct intel_debug_features *features)
 {
        struct sk_buff *skb;
        u8 page_no = 1;
@@ -1243,9 +1281,8 @@ int btintel_read_debug_features(struct hci_dev *hdev,
        kfree_skb(skb);
        return 0;
 }
-EXPORT_SYMBOL_GPL(btintel_read_debug_features);
 
-int btintel_set_debug_features(struct hci_dev *hdev,
+static int btintel_set_debug_features(struct hci_dev *hdev,
                               const struct intel_debug_features *features)
 {
        u8 mask[11] = { 0x0a, 0x92, 0x02, 0x07, 0x00, 0x00, 0x00, 0x00,
@@ -1270,7 +1307,1154 @@ int btintel_set_debug_features(struct hci_dev *hdev,
        kfree_skb(skb);
        return 0;
 }
-EXPORT_SYMBOL_GPL(btintel_set_debug_features);
+
+static const struct firmware *btintel_legacy_rom_get_fw(struct hci_dev *hdev,
+                                              struct intel_version *ver)
+{
+       const struct firmware *fw;
+       char fwname[64];
+       int ret;
+
+       snprintf(fwname, sizeof(fwname),
+                "intel/ibt-hw-%x.%x.%x-fw-%x.%x.%x.%x.%x.bseq",
+                ver->hw_platform, ver->hw_variant, ver->hw_revision,
+                ver->fw_variant,  ver->fw_revision, ver->fw_build_num,
+                ver->fw_build_ww, ver->fw_build_yy);
+
+       ret = request_firmware(&fw, fwname, &hdev->dev);
+       if (ret < 0) {
+               if (ret == -EINVAL) {
+                       bt_dev_err(hdev, "Intel firmware file request failed (%d)",
+                                  ret);
+                       return NULL;
+               }
+
+               bt_dev_err(hdev, "failed to open Intel firmware file: %s (%d)",
+                          fwname, ret);
+
+               /* If the correct firmware patch file is not found, use the
+                * default firmware patch file instead
+                */
+               snprintf(fwname, sizeof(fwname), "intel/ibt-hw-%x.%x.bseq",
+                        ver->hw_platform, ver->hw_variant);
+               if (request_firmware(&fw, fwname, &hdev->dev) < 0) {
+                       bt_dev_err(hdev, "failed to open default fw file: %s",
+                                  fwname);
+                       return NULL;
+               }
+       }
+
+       bt_dev_info(hdev, "Intel Bluetooth firmware file: %s", fwname);
+
+       return fw;
+}
+
+static int btintel_legacy_rom_patching(struct hci_dev *hdev,
+                                     const struct firmware *fw,
+                                     const u8 **fw_ptr, int *disable_patch)
+{
+       struct sk_buff *skb;
+       struct hci_command_hdr *cmd;
+       const u8 *cmd_param;
+       struct hci_event_hdr *evt = NULL;
+       const u8 *evt_param = NULL;
+       int remain = fw->size - (*fw_ptr - fw->data);
+
+       /* The first byte indicates the types of the patch command or event.
+        * 0x01 means HCI command and 0x02 is HCI event. If the first bytes
+        * in the current firmware buffer doesn't start with 0x01 or
+        * the size of remain buffer is smaller than HCI command header,
+        * the firmware file is corrupted and it should stop the patching
+        * process.
+        */
+       if (remain > HCI_COMMAND_HDR_SIZE && *fw_ptr[0] != 0x01) {
+               bt_dev_err(hdev, "Intel fw corrupted: invalid cmd read");
+               return -EINVAL;
+       }
+       (*fw_ptr)++;
+       remain--;
+
+       cmd = (struct hci_command_hdr *)(*fw_ptr);
+       *fw_ptr += sizeof(*cmd);
+       remain -= sizeof(*cmd);
+
+       /* Ensure that the remain firmware data is long enough than the length
+        * of command parameter. If not, the firmware file is corrupted.
+        */
+       if (remain < cmd->plen) {
+               bt_dev_err(hdev, "Intel fw corrupted: invalid cmd len");
+               return -EFAULT;
+       }
+
+       /* If there is a command that loads a patch in the firmware
+        * file, then enable the patch upon success, otherwise just
+        * disable the manufacturer mode, for example patch activation
+        * is not required when the default firmware patch file is used
+        * because there are no patch data to load.
+        */
+       if (*disable_patch && le16_to_cpu(cmd->opcode) == 0xfc8e)
+               *disable_patch = 0;
+
+       cmd_param = *fw_ptr;
+       *fw_ptr += cmd->plen;
+       remain -= cmd->plen;
+
+       /* This reads the expected events when the above command is sent to the
+        * device. Some vendor commands expects more than one events, for
+        * example command status event followed by vendor specific event.
+        * For this case, it only keeps the last expected event. so the command
+        * can be sent with __hci_cmd_sync_ev() which returns the sk_buff of
+        * last expected event.
+        */
+       while (remain > HCI_EVENT_HDR_SIZE && *fw_ptr[0] == 0x02) {
+               (*fw_ptr)++;
+               remain--;
+
+               evt = (struct hci_event_hdr *)(*fw_ptr);
+               *fw_ptr += sizeof(*evt);
+               remain -= sizeof(*evt);
+
+               if (remain < evt->plen) {
+                       bt_dev_err(hdev, "Intel fw corrupted: invalid evt len");
+                       return -EFAULT;
+               }
+
+               evt_param = *fw_ptr;
+               *fw_ptr += evt->plen;
+               remain -= evt->plen;
+       }
+
+       /* Every HCI commands in the firmware file has its correspond event.
+        * If event is not found or remain is smaller than zero, the firmware
+        * file is corrupted.
+        */
+       if (!evt || !evt_param || remain < 0) {
+               bt_dev_err(hdev, "Intel fw corrupted: invalid evt read");
+               return -EFAULT;
+       }
+
+       skb = __hci_cmd_sync_ev(hdev, le16_to_cpu(cmd->opcode), cmd->plen,
+                               cmd_param, evt->evt, HCI_INIT_TIMEOUT);
+       if (IS_ERR(skb)) {
+               bt_dev_err(hdev, "sending Intel patch command (0x%4.4x) failed (%ld)",
+                          cmd->opcode, PTR_ERR(skb));
+               return PTR_ERR(skb);
+       }
+
+       /* It ensures that the returned event matches the event data read from
+        * the firmware file. At fist, it checks the length and then
+        * the contents of the event.
+        */
+       if (skb->len != evt->plen) {
+               bt_dev_err(hdev, "mismatch event length (opcode 0x%4.4x)",
+                          le16_to_cpu(cmd->opcode));
+               kfree_skb(skb);
+               return -EFAULT;
+       }
+
+       if (memcmp(skb->data, evt_param, evt->plen)) {
+               bt_dev_err(hdev, "mismatch event parameter (opcode 0x%4.4x)",
+                          le16_to_cpu(cmd->opcode));
+               kfree_skb(skb);
+               return -EFAULT;
+       }
+       kfree_skb(skb);
+
+       return 0;
+}
+
+static int btintel_legacy_rom_setup(struct hci_dev *hdev,
+                                   struct intel_version *ver)
+{
+       const struct firmware *fw;
+       const u8 *fw_ptr;
+       int disable_patch, err;
+       struct intel_version new_ver;
+
+       BT_DBG("%s", hdev->name);
+
+       /* fw_patch_num indicates the version of patch the device currently
+        * have. If there is no patch data in the device, it is always 0x00.
+        * So, if it is other than 0x00, no need to patch the device again.
+        */
+       if (ver->fw_patch_num) {
+               bt_dev_info(hdev,
+                           "Intel device is already patched. patch num: %02x",
+                           ver->fw_patch_num);
+               goto complete;
+       }
+
+       /* Opens the firmware patch file based on the firmware version read
+        * from the controller. If it fails to open the matching firmware
+        * patch file, it tries to open the default firmware patch file.
+        * If no patch file is found, allow the device to operate without
+        * a patch.
+        */
+       fw = btintel_legacy_rom_get_fw(hdev, ver);
+       if (!fw)
+               goto complete;
+       fw_ptr = fw->data;
+
+       /* Enable the manufacturer mode of the controller.
+        * Only while this mode is enabled, the driver can download the
+        * firmware patch data and configuration parameters.
+        */
+       err = btintel_enter_mfg(hdev);
+       if (err) {
+               release_firmware(fw);
+               return err;
+       }
+
+       disable_patch = 1;
+
+       /* The firmware data file consists of list of Intel specific HCI
+        * commands and its expected events. The first byte indicates the
+        * type of the message, either HCI command or HCI event.
+        *
+        * It reads the command and its expected event from the firmware file,
+        * and send to the controller. Once __hci_cmd_sync_ev() returns,
+        * the returned event is compared with the event read from the firmware
+        * file and it will continue until all the messages are downloaded to
+        * the controller.
+        *
+        * Once the firmware patching is completed successfully,
+        * the manufacturer mode is disabled with reset and activating the
+        * downloaded patch.
+        *
+        * If the firmware patching fails, the manufacturer mode is
+        * disabled with reset and deactivating the patch.
+        *
+        * If the default patch file is used, no reset is done when disabling
+        * the manufacturer.
+        */
+       while (fw->size > fw_ptr - fw->data) {
+               int ret;
+
+               ret = btintel_legacy_rom_patching(hdev, fw, &fw_ptr,
+                                                &disable_patch);
+               if (ret < 0)
+                       goto exit_mfg_deactivate;
+       }
+
+       release_firmware(fw);
+
+       if (disable_patch)
+               goto exit_mfg_disable;
+
+       /* Patching completed successfully and disable the manufacturer mode
+        * with reset and activate the downloaded firmware patches.
+        */
+       err = btintel_exit_mfg(hdev, true, true);
+       if (err)
+               return err;
+
+       /* Need build number for downloaded fw patches in
+        * every power-on boot
+        */
+       err = btintel_read_version(hdev, &new_ver);
+       if (err)
+               return err;
+
+       bt_dev_info(hdev, "Intel BT fw patch 0x%02x completed & activated",
+                   new_ver.fw_patch_num);
+
+       goto complete;
+
+exit_mfg_disable:
+       /* Disable the manufacturer mode without reset */
+       err = btintel_exit_mfg(hdev, false, false);
+       if (err)
+               return err;
+
+       bt_dev_info(hdev, "Intel firmware patch completed");
+
+       goto complete;
+
+exit_mfg_deactivate:
+       release_firmware(fw);
+
+       /* Patching failed. Disable the manufacturer mode with reset and
+        * deactivate the downloaded firmware patches.
+        */
+       err = btintel_exit_mfg(hdev, true, false);
+       if (err)
+               return err;
+
+       bt_dev_info(hdev, "Intel firmware patch completed and deactivated");
+
+complete:
+       /* Set the event mask for Intel specific vendor events. This enables
+        * a few extra events that are useful during general operation.
+        */
+       btintel_set_event_mask_mfg(hdev, false);
+
+       btintel_check_bdaddr(hdev);
+
+       return 0;
+}
+
+static int btintel_download_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
+{
+       ktime_t delta, rettime;
+       unsigned long long duration;
+       int err;
+
+       btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+
+       bt_dev_info(hdev, "Waiting for firmware download to complete");
+
+       err = btintel_wait_on_flag_timeout(hdev, INTEL_DOWNLOADING,
+                                          TASK_INTERRUPTIBLE,
+                                          msecs_to_jiffies(msec));
+       if (err == -EINTR) {
+               bt_dev_err(hdev, "Firmware loading interrupted");
+               return err;
+       }
+
+       if (err) {
+               bt_dev_err(hdev, "Firmware loading timeout");
+               return -ETIMEDOUT;
+       }
+
+       if (btintel_test_flag(hdev, INTEL_FIRMWARE_FAILED)) {
+               bt_dev_err(hdev, "Firmware loading failed");
+               return -ENOEXEC;
+       }
+
+       rettime = ktime_get();
+       delta = ktime_sub(rettime, calltime);
+       duration = (unsigned long long)ktime_to_ns(delta) >> 10;
+
+       bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration);
+
+       return 0;
+}
+
+static int btintel_boot_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
+{
+       ktime_t delta, rettime;
+       unsigned long long duration;
+       int err;
+
+       bt_dev_info(hdev, "Waiting for device to boot");
+
+       err = btintel_wait_on_flag_timeout(hdev, INTEL_BOOTING,
+                                          TASK_INTERRUPTIBLE,
+                                          msecs_to_jiffies(msec));
+       if (err == -EINTR) {
+               bt_dev_err(hdev, "Device boot interrupted");
+               return -EINTR;
+       }
+
+       if (err) {
+               bt_dev_err(hdev, "Device boot timeout");
+               return -ETIMEDOUT;
+       }
+
+       rettime = ktime_get();
+       delta = ktime_sub(rettime, calltime);
+       duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+
+       bt_dev_info(hdev, "Device booted in %llu usecs", duration);
+
+       return 0;
+}
+
+static int btintel_boot(struct hci_dev *hdev, u32 boot_addr)
+{
+       ktime_t calltime;
+       int err;
+
+       calltime = ktime_get();
+
+       btintel_set_flag(hdev, INTEL_BOOTING);
+
+       err = btintel_send_intel_reset(hdev, boot_addr);
+       if (err) {
+               bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
+               btintel_reset_to_bootloader(hdev);
+               return err;
+       }
+
+       /* The bootloader will not indicate when the device is ready. This
+        * is done by the operational firmware sending bootup notification.
+        *
+        * Booting into operational firmware should not take longer than
+        * 1 second. However if that happens, then just fail the setup
+        * since something went wrong.
+        */
+       err = btintel_boot_wait(hdev, calltime, 1000);
+       if (err == -ETIMEDOUT)
+               btintel_reset_to_bootloader(hdev);
+
+       return err;
+}
+
+static int btintel_get_fw_name(struct intel_version *ver,
+                                            struct intel_boot_params *params,
+                                            char *fw_name, size_t len,
+                                            const char *suffix)
+{
+       switch (ver->hw_variant) {
+       case 0x0b:      /* SfP */
+       case 0x0c:      /* WsP */
+               snprintf(fw_name, len, "intel/ibt-%u-%u.%s",
+                       le16_to_cpu(ver->hw_variant),
+                       le16_to_cpu(params->dev_revid),
+                       suffix);
+               break;
+       case 0x11:      /* JfP */
+       case 0x12:      /* ThP */
+       case 0x13:      /* HrP */
+       case 0x14:      /* CcP */
+               snprintf(fw_name, len, "intel/ibt-%u-%u-%u.%s",
+                       le16_to_cpu(ver->hw_variant),
+                       le16_to_cpu(ver->hw_revision),
+                       le16_to_cpu(ver->fw_revision),
+                       suffix);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int btintel_download_fw(struct hci_dev *hdev,
+                                        struct intel_version *ver,
+                                        struct intel_boot_params *params,
+                                        u32 *boot_param)
+{
+       const struct firmware *fw;
+       char fwname[64];
+       int err;
+       ktime_t calltime;
+
+       if (!ver || !params)
+               return -EINVAL;
+
+       /* The firmware variant determines if the device is in bootloader
+        * mode or is running operational firmware. The value 0x06 identifies
+        * the bootloader and the value 0x23 identifies the operational
+        * firmware.
+        *
+        * When the operational firmware is already present, then only
+        * the check for valid Bluetooth device address is needed. This
+        * determines if the device will be added as configured or
+        * unconfigured controller.
+        *
+        * It is not possible to use the Secure Boot Parameters in this
+        * case since that command is only available in bootloader mode.
+        */
+       if (ver->fw_variant == 0x23) {
+               btintel_clear_flag(hdev, INTEL_BOOTLOADER);
+               btintel_check_bdaddr(hdev);
+
+               /* SfP and WsP don't seem to update the firmware version on file
+                * so version checking is currently possible.
+                */
+               switch (ver->hw_variant) {
+               case 0x0b:      /* SfP */
+               case 0x0c:      /* WsP */
+                       return 0;
+               }
+
+               /* Proceed to download to check if the version matches */
+               goto download;
+       }
+
+       /* Read the secure boot parameters to identify the operating
+        * details of the bootloader.
+        */
+       err = btintel_read_boot_params(hdev, params);
+       if (err)
+               return err;
+
+       /* It is required that every single firmware fragment is acknowledged
+        * with a command complete event. If the boot parameters indicate
+        * that this bootloader does not send them, then abort the setup.
+        */
+       if (params->limited_cce != 0x00) {
+               bt_dev_err(hdev, "Unsupported Intel firmware loading method (%u)",
+                          params->limited_cce);
+               return -EINVAL;
+       }
+
+       /* If the OTP has no valid Bluetooth device address, then there will
+        * also be no valid address for the operational firmware.
+        */
+       if (!bacmp(&params->otp_bdaddr, BDADDR_ANY)) {
+               bt_dev_info(hdev, "No device address configured");
+               set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+       }
+
+download:
+       /* With this Intel bootloader only the hardware variant and device
+        * revision information are used to select the right firmware for SfP
+        * and WsP.
+        *
+        * The firmware filename is ibt-<hw_variant>-<dev_revid>.sfi.
+        *
+        * Currently the supported hardware variants are:
+        *   11 (0x0b) for iBT3.0 (LnP/SfP)
+        *   12 (0x0c) for iBT3.5 (WsP)
+        *
+        * For ThP/JfP and for future SKU's, the FW name varies based on HW
+        * variant, HW revision and FW revision, as these are dependent on CNVi
+        * and RF Combination.
+        *
+        *   17 (0x11) for iBT3.5 (JfP)
+        *   18 (0x12) for iBT3.5 (ThP)
+        *
+        * The firmware file name for these will be
+        * ibt-<hw_variant>-<hw_revision>-<fw_revision>.sfi.
+        *
+        */
+       err = btintel_get_fw_name(ver, params, fwname, sizeof(fwname), "sfi");
+       if (err < 0) {
+               if (!btintel_test_flag(hdev, INTEL_BOOTLOADER)) {
+                       /* Firmware has already been loaded */
+                       btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+                       return 0;
+               }
+
+               bt_dev_err(hdev, "Unsupported Intel firmware naming");
+               return -EINVAL;
+       }
+
+       err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
+       if (err < 0) {
+               if (!btintel_test_flag(hdev, INTEL_BOOTLOADER)) {
+                       /* Firmware has already been loaded */
+                       btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+                       return 0;
+               }
+
+               bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
+                          fwname, err);
+               return err;
+       }
+
+       bt_dev_info(hdev, "Found device firmware: %s", fwname);
+
+       if (fw->size < 644) {
+               bt_dev_err(hdev, "Invalid size of firmware file (%zu)",
+                          fw->size);
+               err = -EBADF;
+               goto done;
+       }
+
+       calltime = ktime_get();
+
+       btintel_set_flag(hdev, INTEL_DOWNLOADING);
+
+       /* Start firmware downloading and get boot parameter */
+       err = btintel_download_firmware(hdev, ver, fw, boot_param);
+       if (err < 0) {
+               if (err == -EALREADY) {
+                       /* Firmware has already been loaded */
+                       btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+                       err = 0;
+                       goto done;
+               }
+
+               /* When FW download fails, send Intel Reset to retry
+                * FW download.
+                */
+               btintel_reset_to_bootloader(hdev);
+               goto done;
+       }
+
+       /* Before switching the device into operational mode and with that
+        * booting the loaded firmware, wait for the bootloader notification
+        * that all fragments have been successfully received.
+        *
+        * When the event processing receives the notification, then the
+        * INTEL_DOWNLOADING flag will be cleared.
+        *
+        * The firmware loading should not take longer than 5 seconds
+        * and thus just timeout if that happens and fail the setup
+        * of this device.
+        */
+       err = btintel_download_wait(hdev, calltime, 5000);
+       if (err == -ETIMEDOUT)
+               btintel_reset_to_bootloader(hdev);
+
+done:
+       release_firmware(fw);
+       return err;
+}
+
+static int btintel_bootloader_setup(struct hci_dev *hdev,
+                                   struct intel_version *ver)
+{
+       struct intel_version new_ver;
+       struct intel_boot_params params;
+       u32 boot_param;
+       char ddcname[64];
+       int err;
+       struct intel_debug_features features;
+
+       BT_DBG("%s", hdev->name);
+
+       /* Set the default boot parameter to 0x0 and it is updated to
+        * SKU specific boot parameter after reading Intel_Write_Boot_Params
+        * command while downloading the firmware.
+        */
+       boot_param = 0x00000000;
+
+       btintel_set_flag(hdev, INTEL_BOOTLOADER);
+
+       err = btintel_download_fw(hdev, ver, &params, &boot_param);
+       if (err)
+               return err;
+
+       /* controller is already having an operational firmware */
+       if (ver->fw_variant == 0x23)
+               goto finish;
+
+       err = btintel_boot(hdev, boot_param);
+       if (err)
+               return err;
+
+       btintel_clear_flag(hdev, INTEL_BOOTLOADER);
+
+       err = btintel_get_fw_name(ver, &params, ddcname,
+                                               sizeof(ddcname), "ddc");
+
+       if (err < 0) {
+               bt_dev_err(hdev, "Unsupported Intel firmware naming");
+       } else {
+               /* Once the device is running in operational mode, it needs to
+                * apply the device configuration (DDC) parameters.
+                *
+                * The device can work without DDC parameters, so even if it
+                * fails to load the file, no need to fail the setup.
+                */
+               btintel_load_ddc_config(hdev, ddcname);
+       }
+
+       /* Read the Intel supported features and if new exception formats
+        * supported, need to load the additional DDC config to enable.
+        */
+       err = btintel_read_debug_features(hdev, &features);
+       if (!err) {
+               /* Set DDC mask for available debug features */
+               btintel_set_debug_features(hdev, &features);
+       }
+
+       /* Read the Intel version information after loading the FW  */
+       err = btintel_read_version(hdev, &new_ver);
+       if (err)
+               return err;
+
+       btintel_version_info(hdev, &new_ver);
+
+finish:
+       /* Set the event mask for Intel specific vendor events. This enables
+        * a few extra events that are useful during general operation. It
+        * does not enable any debugging related events.
+        *
+        * The device will function correctly without these events enabled
+        * and thus no need to fail the setup.
+        */
+       btintel_set_event_mask(hdev, false);
+
+       return 0;
+}
+
+static void btintel_get_fw_name_tlv(const struct intel_version_tlv *ver,
+                                   char *fw_name, size_t len,
+                                   const char *suffix)
+{
+       /* The firmware file name for new generation controllers will be
+        * ibt-<cnvi_top type+cnvi_top step>-<cnvr_top type+cnvr_top step>
+        */
+       snprintf(fw_name, len, "intel/ibt-%04x-%04x.%s",
+                INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver->cnvi_top),
+                                         INTEL_CNVX_TOP_STEP(ver->cnvi_top)),
+                INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver->cnvr_top),
+                                         INTEL_CNVX_TOP_STEP(ver->cnvr_top)),
+                suffix);
+}
+
+static int btintel_prepare_fw_download_tlv(struct hci_dev *hdev,
+                                          struct intel_version_tlv *ver,
+                                          u32 *boot_param)
+{
+       const struct firmware *fw;
+       char fwname[64];
+       int err;
+       ktime_t calltime;
+
+       if (!ver || !boot_param)
+               return -EINVAL;
+
+       /* The firmware variant determines if the device is in bootloader
+        * mode or is running operational firmware. The value 0x03 identifies
+        * the bootloader and the value 0x23 identifies the operational
+        * firmware.
+        *
+        * When the operational firmware is already present, then only
+        * the check for valid Bluetooth device address is needed. This
+        * determines if the device will be added as configured or
+        * unconfigured controller.
+        *
+        * It is not possible to use the Secure Boot Parameters in this
+        * case since that command is only available in bootloader mode.
+        */
+       if (ver->img_type == 0x03) {
+               btintel_clear_flag(hdev, INTEL_BOOTLOADER);
+               btintel_check_bdaddr(hdev);
+       }
+
+       /* If the OTP has no valid Bluetooth device address, then there will
+        * also be no valid address for the operational firmware.
+        */
+       if (!bacmp(&ver->otp_bd_addr, BDADDR_ANY)) {
+               bt_dev_info(hdev, "No device address configured");
+               set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+       }
+
+       btintel_get_fw_name_tlv(ver, fwname, sizeof(fwname), "sfi");
+       err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
+       if (err < 0) {
+               if (!btintel_test_flag(hdev, INTEL_BOOTLOADER)) {
+                       /* Firmware has already been loaded */
+                       btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+                       return 0;
+               }
+
+               bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
+                          fwname, err);
+
+               return err;
+       }
+
+       bt_dev_info(hdev, "Found device firmware: %s", fwname);
+
+       if (fw->size < 644) {
+               bt_dev_err(hdev, "Invalid size of firmware file (%zu)",
+                          fw->size);
+               err = -EBADF;
+               goto done;
+       }
+
+       calltime = ktime_get();
+
+       btintel_set_flag(hdev, INTEL_DOWNLOADING);
+
+       /* Start firmware downloading and get boot parameter */
+       err = btintel_download_fw_tlv(hdev, ver, fw, boot_param,
+                                              INTEL_HW_VARIANT(ver->cnvi_bt),
+                                              ver->sbe_type);
+       if (err < 0) {
+               if (err == -EALREADY) {
+                       /* Firmware has already been loaded */
+                       btintel_set_flag(hdev, INTEL_FIRMWARE_LOADED);
+                       err = 0;
+                       goto done;
+               }
+
+               /* When FW download fails, send Intel Reset to retry
+                * FW download.
+                */
+               btintel_reset_to_bootloader(hdev);
+               goto done;
+       }
+
+       /* Before switching the device into operational mode and with that
+        * booting the loaded firmware, wait for the bootloader notification
+        * that all fragments have been successfully received.
+        *
+        * When the event processing receives the notification, then the
+        * BTUSB_DOWNLOADING flag will be cleared.
+        *
+        * The firmware loading should not take longer than 5 seconds
+        * and thus just timeout if that happens and fail the setup
+        * of this device.
+        */
+       err = btintel_download_wait(hdev, calltime, 5000);
+       if (err == -ETIMEDOUT)
+               btintel_reset_to_bootloader(hdev);
+
+done:
+       release_firmware(fw);
+       return err;
+}
+
+static int btintel_bootloader_setup_tlv(struct hci_dev *hdev,
+                                       struct intel_version_tlv *ver)
+{
+       u32 boot_param;
+       char ddcname[64];
+       int err;
+       struct intel_debug_features features;
+       struct intel_version_tlv new_ver;
+
+       bt_dev_dbg(hdev, "");
+
+       /* Set the default boot parameter to 0x0 and it is updated to
+        * SKU specific boot parameter after reading Intel_Write_Boot_Params
+        * command while downloading the firmware.
+        */
+       boot_param = 0x00000000;
+
+       btintel_set_flag(hdev, INTEL_BOOTLOADER);
+
+       err = btintel_prepare_fw_download_tlv(hdev, ver, &boot_param);
+       if (err)
+               return err;
+
+       /* check if controller is already having an operational firmware */
+       if (ver->img_type == 0x03)
+               goto finish;
+
+       err = btintel_boot(hdev, boot_param);
+       if (err)
+               return err;
+
+       btintel_clear_flag(hdev, INTEL_BOOTLOADER);
+
+       btintel_get_fw_name_tlv(ver, ddcname, sizeof(ddcname), "ddc");
+       /* Once the device is running in operational mode, it needs to
+        * apply the device configuration (DDC) parameters.
+        *
+        * The device can work without DDC parameters, so even if it
+        * fails to load the file, no need to fail the setup.
+        */
+       btintel_load_ddc_config(hdev, ddcname);
+
+       /* Read the Intel supported features and if new exception formats
+        * supported, need to load the additional DDC config to enable.
+        */
+       err = btintel_read_debug_features(hdev, &features);
+       if (!err) {
+               /* Set DDC mask for available debug features */
+               btintel_set_debug_features(hdev, &features);
+       }
+
+       /* Read the Intel version information after loading the FW  */
+       err = btintel_read_version_tlv(hdev, &new_ver);
+       if (err)
+               return err;
+
+       btintel_version_info_tlv(hdev, &new_ver);
+
+finish:
+       /* Set the event mask for Intel specific vendor events. This enables
+        * a few extra events that are useful during general operation. It
+        * does not enable any debugging related events.
+        *
+        * The device will function correctly without these events enabled
+        * and thus no need to fail the setup.
+        */
+       btintel_set_event_mask(hdev, false);
+
+       return 0;
+}
+
+static void btintel_set_msft_opcode(struct hci_dev *hdev, u8 hw_variant)
+{
+       switch (hw_variant) {
+       /* Legacy bootloader devices that supports MSFT Extension */
+       case 0x11:      /* JfP */
+       case 0x12:      /* ThP */
+       case 0x13:      /* HrP */
+       case 0x14:      /* CcP */
+       /* All Intel new genration controllers support the Microsoft vendor
+        * extension are using 0xFC1E for VsMsftOpCode.
+        */
+       case 0x17:
+       case 0x18:
+       case 0x19:
+               hci_set_msft_opcode(hdev, 0xFC1E);
+               break;
+       default:
+               /* Not supported */
+               break;
+       }
+}
+
+static int btintel_setup_combined(struct hci_dev *hdev)
+{
+       const u8 param[1] = { 0xFF };
+       struct intel_version ver;
+       struct intel_version_tlv ver_tlv;
+       struct sk_buff *skb;
+       int err;
+
+       BT_DBG("%s", hdev->name);
+
+       /* The some controllers have a bug with the first HCI command sent to it
+        * returning number of completed commands as zero. This would stall the
+        * command processing in the Bluetooth core.
+        *
+        * As a workaround, send HCI Reset command first which will reset the
+        * number of completed commands and allow normal command processing
+        * from now on.
+        */
+       if (btintel_test_flag(hdev, INTEL_BROKEN_INITIAL_NCMD)) {
+               skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL,
+                                    HCI_INIT_TIMEOUT);
+               if (IS_ERR(skb)) {
+                       bt_dev_err(hdev,
+                                  "sending initial HCI reset failed (%ld)",
+                                  PTR_ERR(skb));
+                       return PTR_ERR(skb);
+               }
+               kfree_skb(skb);
+       }
+
+       /* Starting from TyP device, the command parameter and response are
+        * changed even though the OCF for HCI_Intel_Read_Version command
+        * remains same. The legacy devices can handle even if the
+        * command has a parameter and returns a correct version information.
+        * So, it uses new format to support both legacy and new format.
+        */
+       skb = __hci_cmd_sync(hdev, 0xfc05, 1, param, HCI_CMD_TIMEOUT);
+       if (IS_ERR(skb)) {
+               bt_dev_err(hdev, "Reading Intel version command failed (%ld)",
+                          PTR_ERR(skb));
+               return PTR_ERR(skb);
+       }
+
+       /* Check the status */
+       if (skb->data[0]) {
+               bt_dev_err(hdev, "Intel Read Version command failed (%02x)",
+                          skb->data[0]);
+               err = -EIO;
+               goto exit_error;
+       }
+
+       /* Apply the common HCI quirks for Intel device */
+       set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
+       set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+       set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks);
+
+       /* For Legacy device, check the HW platform value and size */
+       if (skb->len == sizeof(ver) && skb->data[1] == 0x37) {
+               bt_dev_dbg(hdev, "Read the legacy Intel version information");
+
+               memcpy(&ver, skb->data, sizeof(ver));
+
+               /* Display version information */
+               btintel_version_info(hdev, &ver);
+
+               /* Check for supported iBT hardware variants of this firmware
+                * loading method.
+                *
+                * This check has been put in place to ensure correct forward
+                * compatibility options when newer hardware variants come
+                * along.
+                */
+               switch (ver.hw_variant) {
+               case 0x07:      /* WP */
+               case 0x08:      /* StP */
+                       /* Legacy ROM product */
+                       btintel_set_flag(hdev, INTEL_ROM_LEGACY);
+
+                       /* Apply the device specific HCI quirks
+                        *
+                        * WBS for SdP - SdP and Stp have a same hw_varaint but
+                        * different fw_variant
+                        */
+                       if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22)
+                               set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
+                                       &hdev->quirks);
+
+                       /* These devices have an issue with LED which doesn't
+                        * go off immediately during shutdown. Set the flag
+                        * here to send the LED OFF command during shutdown.
+                        */
+                       btintel_set_flag(hdev, INTEL_BROKEN_LED);
+
+                       err = btintel_legacy_rom_setup(hdev, &ver);
+                       break;
+               case 0x0b:      /* SfP */
+               case 0x0c:      /* WsP */
+               case 0x11:      /* JfP */
+               case 0x12:      /* ThP */
+               case 0x13:      /* HrP */
+               case 0x14:      /* CcP */
+                       /* Apply the device specific HCI quirks
+                        *
+                        * All Legacy bootloader devices support WBS
+                        */
+                       set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
+                               &hdev->quirks);
+
+                       /* Valid LE States quirk for JfP/ThP familiy */
+                       if (ver.hw_variant == 0x11 || ver.hw_variant == 0x12)
+                               set_bit(HCI_QUIRK_VALID_LE_STATES,
+                                       &hdev->quirks);
+
+                       /* Setup MSFT Extension support */
+                       btintel_set_msft_opcode(hdev, ver.hw_variant);
+
+                       err = btintel_bootloader_setup(hdev, &ver);
+                       break;
+               default:
+                       bt_dev_err(hdev, "Unsupported Intel hw variant (%u)",
+                                  ver.hw_variant);
+                       err = -EINVAL;
+               }
+
+               goto exit_error;
+       }
+
+       /* For TLV type device, parse the tlv data */
+       err = btintel_parse_version_tlv(hdev, &ver_tlv, skb);
+       if (err) {
+               bt_dev_err(hdev, "Failed to parse TLV version information");
+               goto exit_error;
+       }
+
+       if (INTEL_HW_PLATFORM(ver_tlv.cnvi_bt) != 0x37) {
+               bt_dev_err(hdev, "Unsupported Intel hardware platform (0x%2x)",
+                          INTEL_HW_PLATFORM(ver_tlv.cnvi_bt));
+               err = -EINVAL;
+               goto exit_error;
+       }
+
+       /* Check for supported iBT hardware variants of this firmware
+        * loading method.
+        *
+        * This check has been put in place to ensure correct forward
+        * compatibility options when newer hardware variants come
+        * along.
+        */
+       switch (INTEL_HW_VARIANT(ver_tlv.cnvi_bt)) {
+       case 0x11:      /* JfP */
+       case 0x12:      /* ThP */
+       case 0x13:      /* HrP */
+       case 0x14:      /* CcP */
+               /* Some legacy bootloader devices from JfP supports both old
+                * and TLV based HCI_Intel_Read_Version command. But we don't
+                * want to use the TLV based setup routines for those legacy
+                * bootloader device.
+                *
+                * Also, it is not easy to convert TLV based version from the
+                * legacy version format.
+                *
+                * So, as a workaround for those devices, use the legacy
+                * HCI_Intel_Read_Version to get the version information and
+                * run the legacy bootloader setup.
+                */
+               err = btintel_read_version(hdev, &ver);
+               if (err)
+                       return err;
+               err = btintel_bootloader_setup(hdev, &ver);
+               break;
+       case 0x17:
+       case 0x18:
+       case 0x19:
+               /* Display version information of TLV type */
+               btintel_version_info_tlv(hdev, &ver_tlv);
+
+               /* Apply the device specific HCI quirks for TLV based devices
+                *
+                * All TLV based devices support WBS
+                */
+               set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+
+               /* Valid LE States quirk for GfP */
+               if (INTEL_HW_VARIANT(ver_tlv.cnvi_bt) == 0x18)
+                       set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks);
+
+               /* Setup MSFT Extension support */
+               btintel_set_msft_opcode(hdev,
+                                       INTEL_HW_VARIANT(ver_tlv.cnvi_bt));
+
+               err = btintel_bootloader_setup_tlv(hdev, &ver_tlv);
+               break;
+       default:
+               bt_dev_err(hdev, "Unsupported Intel hw variant (%u)",
+                          INTEL_HW_VARIANT(ver_tlv.cnvi_bt));
+               return -EINVAL;
+       }
+
+exit_error:
+       kfree_skb(skb);
+
+       return err;
+}
+
+static int btintel_shutdown_combined(struct hci_dev *hdev)
+{
+       struct sk_buff *skb;
+       int ret;
+
+       /* Send HCI Reset to the controller to stop any BT activity which
+        * were triggered. This will help to save power and maintain the
+        * sync b/w Host and controller
+        */
+       skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
+       if (IS_ERR(skb)) {
+               bt_dev_err(hdev, "HCI reset during shutdown failed");
+               return PTR_ERR(skb);
+       }
+       kfree_skb(skb);
+
+
+       /* Some platforms have an issue with BT LED when the interface is
+        * down or BT radio is turned off, which takes 5 seconds to BT LED
+        * goes off. This command turns off the BT LED immediately.
+        */
+       if (btintel_test_flag(hdev, INTEL_BROKEN_LED)) {
+               skb = __hci_cmd_sync(hdev, 0xfc3f, 0, NULL, HCI_INIT_TIMEOUT);
+               if (IS_ERR(skb)) {
+                       ret = PTR_ERR(skb);
+                       bt_dev_err(hdev, "turning off Intel device LED failed");
+                       return ret;
+               }
+               kfree_skb(skb);
+       }
+
+       return 0;
+}
+
+int btintel_configure_setup(struct hci_dev *hdev)
+{
+       hdev->manufacturer = 2;
+       hdev->setup = btintel_setup_combined;
+       hdev->shutdown = btintel_shutdown_combined;
+       hdev->hw_error = btintel_hw_error;
+       hdev->set_diag = btintel_set_diag_combined;
+       hdev->set_bdaddr = btintel_set_bdaddr;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(btintel_configure_setup);
+
+void btintel_bootup(struct hci_dev *hdev, const void *ptr, unsigned int len)
+{
+       const struct intel_bootup *evt = ptr;
+
+       if (len != sizeof(*evt))
+               return;
+
+       if (btintel_test_and_clear_flag(hdev, INTEL_BOOTING))
+               btintel_wake_up_flag(hdev, INTEL_BOOTING);
+}
+EXPORT_SYMBOL_GPL(btintel_bootup);
+
+void btintel_secure_send_result(struct hci_dev *hdev,
+                               const void *ptr, unsigned int len)
+{
+       const struct intel_secure_send_result *evt = ptr;
+
+       if (len != sizeof(*evt))
+               return;
+
+       if (evt->result)
+               btintel_set_flag(hdev, INTEL_FIRMWARE_FAILED);
+
+       if (btintel_test_and_clear_flag(hdev, INTEL_DOWNLOADING) &&
+           btintel_test_flag(hdev, INTEL_FIRMWARE_LOADED))
+               btintel_wake_up_flag(hdev, INTEL_DOWNLOADING);
+}
+EXPORT_SYMBOL_GPL(btintel_secure_send_result);
 
 MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth support for Intel devices ver " VERSION);
index d184064..aa64072 100644 (file)
@@ -138,6 +138,49 @@ struct intel_debug_features {
 #define INTEL_CNVX_TOP_STEP(cnvx_top)  (((cnvx_top) & 0x0f000000) >> 24)
 #define INTEL_CNVX_TOP_PACK_SWAB(t, s) __swab16(((__u16)(((t) << 4) | (s))))
 
+enum {
+       INTEL_BOOTLOADER,
+       INTEL_DOWNLOADING,
+       INTEL_FIRMWARE_LOADED,
+       INTEL_FIRMWARE_FAILED,
+       INTEL_BOOTING,
+       INTEL_BROKEN_INITIAL_NCMD,
+       INTEL_BROKEN_LED,
+       INTEL_ROM_LEGACY,
+
+       __INTEL_NUM_FLAGS,
+};
+
+struct btintel_data {
+       DECLARE_BITMAP(flags, __INTEL_NUM_FLAGS);
+};
+
+#define btintel_set_flag(hdev, nr)                                     \
+       do {                                                            \
+               struct btintel_data *intel = hci_get_priv((hdev));      \
+               set_bit((nr), intel->flags);                            \
+       } while (0)
+
+#define btintel_clear_flag(hdev, nr)                                   \
+       do {                                                            \
+               struct btintel_data *intel = hci_get_priv((hdev));      \
+               clear_bit((nr), intel->flags);                          \
+       } while (0)
+
+#define btintel_wake_up_flag(hdev, nr)                                 \
+       do {                                                            \
+               struct btintel_data *intel = hci_get_priv((hdev));      \
+               wake_up_bit(intel->flags, (nr));                        \
+       } while (0)
+
+#define btintel_get_flag(hdev)                                         \
+       (((struct btintel_data *)hci_get_priv(hdev))->flags)
+
+#define btintel_test_flag(hdev, nr)    test_bit((nr), btintel_get_flag(hdev))
+#define btintel_test_and_clear_flag(hdev, nr) test_and_clear_bit((nr), btintel_get_flag(hdev))
+#define btintel_wait_on_flag_timeout(hdev, nr, m, to)                  \
+               wait_on_bit_timeout(btintel_get_flag(hdev), (nr), m, to)
+
 #if IS_ENABLED(CONFIG_BT_INTEL)
 
 int btintel_check_bdaddr(struct hci_dev *hdev);
@@ -145,19 +188,11 @@ int btintel_enter_mfg(struct hci_dev *hdev);
 int btintel_exit_mfg(struct hci_dev *hdev, bool reset, bool patched);
 int btintel_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr);
 int btintel_set_diag(struct hci_dev *hdev, bool enable);
-int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable);
-void btintel_hw_error(struct hci_dev *hdev, u8 code);
 
 int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver);
-int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *version);
-int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen,
-                       const void *param);
 int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name);
-int btintel_set_event_mask(struct hci_dev *hdev, bool debug);
 int btintel_set_event_mask_mfg(struct hci_dev *hdev, bool debug);
 int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver);
-int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver);
-
 struct regmap *btintel_regmap_init(struct hci_dev *hdev, u16 opcode_read,
                                   u16 opcode_write);
 int btintel_send_intel_reset(struct hci_dev *hdev, u32 boot_param);
@@ -165,16 +200,10 @@ int btintel_read_boot_params(struct hci_dev *hdev,
                             struct intel_boot_params *params);
 int btintel_download_firmware(struct hci_dev *dev, struct intel_version *ver,
                              const struct firmware *fw, u32 *boot_param);
-int btintel_download_firmware_newgen(struct hci_dev *hdev,
-                                    struct intel_version_tlv *ver,
-                                    const struct firmware *fw,
-                                    u32 *boot_param, u8 hw_variant,
-                                    u8 sbe_type);
-void btintel_reset_to_bootloader(struct hci_dev *hdev);
-int btintel_read_debug_features(struct hci_dev *hdev,
-                               struct intel_debug_features *features);
-int btintel_set_debug_features(struct hci_dev *hdev,
-                              const struct intel_debug_features *features);
+int btintel_configure_setup(struct hci_dev *hdev);
+void btintel_bootup(struct hci_dev *hdev, const void *ptr, unsigned int len);
+void btintel_secure_send_result(struct hci_dev *hdev,
+                               const void *ptr, unsigned int len);
 #else
 
 static inline int btintel_check_bdaddr(struct hci_dev *hdev)
@@ -202,44 +231,18 @@ static inline int btintel_set_diag(struct hci_dev *hdev, bool enable)
        return -EOPNOTSUPP;
 }
 
-static inline int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable)
-{
-       return -EOPNOTSUPP;
-}
-
-static inline void btintel_hw_error(struct hci_dev *hdev, u8 code)
-{
-}
-
 static inline int btintel_version_info(struct hci_dev *hdev,
                                       struct intel_version *ver)
 {
        return -EOPNOTSUPP;
 }
 
-static inline int btintel_version_info_tlv(struct hci_dev *hdev,
-                                          struct intel_version_tlv *version)
-{
-       return -EOPNOTSUPP;
-}
-
-static inline int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type,
-                                     u32 plen, const void *param)
-{
-       return -EOPNOTSUPP;
-}
-
 static inline int btintel_load_ddc_config(struct hci_dev *hdev,
                                          const char *ddc_name)
 {
        return -EOPNOTSUPP;
 }
 
-static inline int btintel_set_event_mask(struct hci_dev *hdev, bool debug)
-{
-       return -EOPNOTSUPP;
-}
-
 static inline int btintel_set_event_mask_mfg(struct hci_dev *hdev, bool debug)
 {
        return -EOPNOTSUPP;
@@ -251,12 +254,6 @@ static inline int btintel_read_version(struct hci_dev *hdev,
        return -EOPNOTSUPP;
 }
 
-static inline int btintel_read_version_tlv(struct hci_dev *hdev,
-                                          struct intel_version_tlv *ver)
-{
-       return -EOPNOTSUPP;
-}
-
 static inline struct regmap *btintel_regmap_init(struct hci_dev *hdev,
                                                 u16 opcode_read,
                                                 u16 opcode_write)
@@ -283,28 +280,18 @@ static inline int btintel_download_firmware(struct hci_dev *dev,
        return -EOPNOTSUPP;
 }
 
-static inline int btintel_download_firmware_newgen(struct hci_dev *hdev,
-                                                  const struct firmware *fw,
-                                                  u32 *boot_param,
-                                                  u8 hw_variant, u8 sbe_type)
-{
-       return -EOPNOTSUPP;
-}
-
-static inline void btintel_reset_to_bootloader(struct hci_dev *hdev)
+static inline int btintel_configure_setup(struct hci_dev *hdev)
 {
+       return -ENODEV;
 }
 
-static inline int btintel_read_debug_features(struct hci_dev *hdev,
-                                             struct intel_debug_features *features)
+static inline void btintel_bootup(struct hci_dev *hdev,
+                                 const void *ptr, unsigned int len)
 {
-       return -EOPNOTSUPP;
 }
 
-static inline int btintel_set_debug_features(struct hci_dev *hdev,
-                                            const struct intel_debug_features *features)
+static inline void btintel_secure_send_result(struct hci_dev *hdev,
+                               const void *ptr, unsigned int len)
 {
-       return -EOPNOTSUPP;
 }
-
 #endif
index cddd350..68378b4 100644 (file)
@@ -1350,6 +1350,7 @@ static void btmrvl_sdio_coredump(struct device *dev)
        u8 *dbg_ptr, *end_ptr, *fw_dump_data, *fw_dump_ptr;
        u8 dump_num = 0, idx, i, read_reg, doneflag = 0;
        u32 memory_size, fw_dump_len = 0;
+       int size = 0;
 
        card = sdio_get_drvdata(func);
        priv = card->priv;
@@ -1478,7 +1479,7 @@ done:
        if (fw_dump_len == 0)
                return;
 
-       fw_dump_data = vzalloc(fw_dump_len+1);
+       fw_dump_data = vzalloc(fw_dump_len + 1);
        if (!fw_dump_data) {
                BT_ERR("Vzalloc fw_dump_data fail!");
                return;
@@ -1493,20 +1494,18 @@ done:
                struct memory_type_mapping *entry = &mem_type_mapping_tbl[idx];
 
                if (entry->mem_ptr) {
-                       strcpy(fw_dump_ptr, "========Start dump ");
-                       fw_dump_ptr += strlen("========Start dump ");
-
-                       strcpy(fw_dump_ptr, entry->mem_name);
-                       fw_dump_ptr += strlen(entry->mem_name);
-
-                       strcpy(fw_dump_ptr, "========\n");
-                       fw_dump_ptr += strlen("========\n");
-
-                       memcpy(fw_dump_ptr, entry->mem_ptr, entry->mem_size);
-                       fw_dump_ptr += entry->mem_size;
-
-                       strcpy(fw_dump_ptr, "\n========End dump========\n");
-                       fw_dump_ptr += strlen("\n========End dump========\n");
+                       size += scnprintf(fw_dump_ptr + size,
+                                         fw_dump_len + 1 - size,
+                                         "========Start dump %s========\n",
+                                         entry->mem_name);
+
+                       memcpy(fw_dump_ptr + size, entry->mem_ptr,
+                              entry->mem_size);
+                       size += entry->mem_size;
+
+                       size += scnprintf(fw_dump_ptr + size,
+                                         fw_dump_len + 1 - size,
+                                         "\n========End dump========\n");
 
                        vfree(mem_type_mapping_tbl[idx].mem_ptr);
                        mem_type_mapping_tbl[idx].mem_ptr = NULL;
index bea1595..8646b6d 100644 (file)
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2017 Redpine Signals Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
index cce0125..1f8afa0 100644 (file)
@@ -681,11 +681,15 @@ out_free:
                }
        }
 
-       /* RTL8822CE supports the Microsoft vendor extension and uses 0xFCF0
-        * for VsMsftOpCode.
+       /* The following chips supports the Microsoft vendor extension,
+        * therefore set the corresponding VsMsftOpCode.
         */
-       if (lmp_subver == RTL_ROM_LMP_8822B)
+       switch (lmp_subver) {
+       case RTL_ROM_LMP_8822B:
+       case RTL_ROM_LMP_8852A:
                hci_set_msft_opcode(hdev, 0xFCF0);
+               break;
+       }
 
        return btrtl_dev;
 
index a9855a2..60d2fce 100644 (file)
@@ -43,12 +43,11 @@ static struct usb_driver btusb_driver;
 #define BTUSB_BROKEN_ISOC      0x20
 #define BTUSB_WRONG_SCO_MTU    0x40
 #define BTUSB_ATH3012          0x80
-#define BTUSB_INTEL            0x100
+#define BTUSB_INTEL_COMBINED   0x100
 #define BTUSB_INTEL_BOOT       0x200
 #define BTUSB_BCM_PATCHRAM     0x400
 #define BTUSB_MARVELL          0x800
 #define BTUSB_SWAVE            0x1000
-#define BTUSB_INTEL_NEW                0x2000
 #define BTUSB_AMP              0x4000
 #define BTUSB_QCA_ROME         0x8000
 #define BTUSB_BCM_APPLE                0x10000
@@ -60,7 +59,7 @@ static struct usb_driver btusb_driver;
 #define BTUSB_WIDEBAND_SPEECH  0x400000
 #define BTUSB_VALID_LE_STATES   0x800000
 #define BTUSB_QCA_WCN6855      0x1000000
-#define BTUSB_INTEL_NEWGEN     0x2000000
+#define BTUSB_INTEL_BROKEN_INITIAL_NCMD 0x4000000
 
 static const struct usb_device_id btusb_table[] = {
        /* Generic Bluetooth USB device */
@@ -119,9 +118,6 @@ static const struct usb_device_id btusb_table[] = {
        /* Canyon CN-BTU1 with HID interfaces */
        { USB_DEVICE(0x0c10, 0x0000) },
 
-       /* Broadcom BCM20702A0 */
-       { USB_DEVICE(0x413c, 0x8197) },
-
        /* Broadcom BCM20702B0 (Dynex/Insignia) */
        { USB_DEVICE(0x19ff, 0x0239), .driver_info = BTUSB_BCM_PATCHRAM },
 
@@ -297,7 +293,8 @@ static const struct usb_device_id blacklist_table[] = {
 
        /* QCA WCN6855 chipset */
        { USB_DEVICE(0x0cf3, 0xe600), .driver_info = BTUSB_QCA_WCN6855 |
-                                                    BTUSB_WIDEBAND_SPEECH },
+                                                    BTUSB_WIDEBAND_SPEECH |
+                                                    BTUSB_VALID_LE_STATES },
 
        /* Broadcom BCM2035 */
        { USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 },
@@ -361,27 +358,18 @@ static const struct usb_device_id blacklist_table[] = {
        { USB_DEVICE(0x1286, 0x204e), .driver_info = BTUSB_MARVELL },
 
        /* Intel Bluetooth devices */
-       { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW |
-                                                    BTUSB_WIDEBAND_SPEECH |
-                                                    BTUSB_VALID_LE_STATES },
-       { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW |
-                                                    BTUSB_WIDEBAND_SPEECH },
-       { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_NEW |
-                                                    BTUSB_WIDEBAND_SPEECH },
-       { USB_DEVICE(0x8087, 0x0032), .driver_info = BTUSB_INTEL_NEWGEN |
-                                                    BTUSB_WIDEBAND_SPEECH},
-       { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_NEWGEN |
-                                                    BTUSB_WIDEBAND_SPEECH},
+       { USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_COMBINED },
+       { USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_COMBINED },
+       { USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_COMBINED },
+       { USB_DEVICE(0x8087, 0x0032), .driver_info = BTUSB_INTEL_COMBINED },
+       { USB_DEVICE(0x8087, 0x0033), .driver_info = BTUSB_INTEL_COMBINED },
        { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
-       { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
-       { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
-       { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_NEW |
-                                                    BTUSB_WIDEBAND_SPEECH },
-       { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL |
-                                                    BTUSB_WIDEBAND_SPEECH },
-       { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_NEW |
-                                                    BTUSB_WIDEBAND_SPEECH |
-                                                    BTUSB_VALID_LE_STATES },
+       { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL_COMBINED |
+                                                    BTUSB_INTEL_BROKEN_INITIAL_NCMD },
+       { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL_COMBINED },
+       { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_COMBINED },
+       { USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL_COMBINED },
+       { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_COMBINED },
 
        /* Other Intel Bluetooth devices */
        { USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01),
@@ -410,10 +398,21 @@ static const struct usb_device_id blacklist_table[] = {
        /* Additional MediaTek MT7615E Bluetooth devices */
        { USB_DEVICE(0x13d3, 0x3560), .driver_info = BTUSB_MEDIATEK},
 
+       /* Additional MediaTek MT7668 Bluetooth devices */
+       { USB_DEVICE(0x043e, 0x3109), .driver_info = BTUSB_MEDIATEK |
+                                                    BTUSB_WIDEBAND_SPEECH |
+                                                    BTUSB_VALID_LE_STATES },
+
        /* Additional MediaTek MT7921 Bluetooth devices */
        { USB_DEVICE(0x04ca, 0x3802), .driver_info = BTUSB_MEDIATEK |
                                                     BTUSB_WIDEBAND_SPEECH |
                                                     BTUSB_VALID_LE_STATES },
+       { USB_DEVICE(0x13d3, 0x3563), .driver_info = BTUSB_MEDIATEK |
+                                                    BTUSB_WIDEBAND_SPEECH |
+                                                    BTUSB_VALID_LE_STATES },
+       { USB_DEVICE(0x0489, 0xe0cd), .driver_info = BTUSB_MEDIATEK |
+                                                    BTUSB_WIDEBAND_SPEECH |
+                                                    BTUSB_VALID_LE_STATES },
 
        /* Additional Realtek 8723AE Bluetooth devices */
        { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK },
@@ -452,6 +451,10 @@ static const struct usb_device_id blacklist_table[] = {
        /* Additional Realtek 8822CE Bluetooth devices */
        { USB_DEVICE(0x04ca, 0x4005), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
+       /* Bluetooth component of Realtek 8852AE device */
+       { USB_DEVICE(0x04ca, 0x4006), .driver_info = BTUSB_REALTEK |
+                                                    BTUSB_WIDEBAND_SPEECH },
+
        { USB_DEVICE(0x04c5, 0x161f), .driver_info = BTUSB_REALTEK |
                                                     BTUSB_WIDEBAND_SPEECH },
        { USB_DEVICE(0x0b05, 0x18ef), .driver_info = BTUSB_REALTEK |
@@ -524,7 +527,8 @@ static const struct dmi_system_id btusb_needs_reset_resume_table[] = {
 #define BTUSB_OOB_WAKE_ENABLED 11
 #define BTUSB_HW_RESET_ACTIVE  12
 #define BTUSB_TX_WAIT_VND_EVT  13
-#define BTUSB_WAKEUP_DISABLE   14
+#define BTUSB_WAKEUP_AUTOSUSPEND       14
+#define BTUSB_USE_ALT3_FOR_WBS 15
 
 struct btusb_data {
        struct hci_dev       *hdev;
@@ -575,6 +579,7 @@ struct btusb_data {
        int suspend_count;
 
        int (*recv_event)(struct hci_dev *hdev, struct sk_buff *skb);
+       int (*recv_acl)(struct hci_dev *hdev, struct sk_buff *skb);
        int (*recv_bulk)(struct btusb_data *data, void *buffer, int count);
 
        int (*setup_on_usb)(struct hci_dev *hdev);
@@ -782,7 +787,7 @@ static int btusb_recv_bulk(struct btusb_data *data, void *buffer, int count)
 
                if (!hci_skb_expect(skb)) {
                        /* Complete frame */
-                       hci_recv_frame(data->hdev, skb);
+                       data->recv_acl(data->hdev, skb);
                        skb = NULL;
                }
        }
@@ -1345,13 +1350,6 @@ static int btusb_open(struct hci_dev *hdev)
 
        data->intf->needs_remote_wakeup = 1;
 
-       /* Disable device remote wakeup when host is suspended
-        * For Realtek chips, global suspend without
-        * SET_FEATURE (DEVICE_REMOTE_WAKEUP) can save more power in device.
-        */
-       if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags))
-               device_wakeup_disable(&data->udev->dev);
-
        if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags))
                goto done;
 
@@ -1418,7 +1416,7 @@ static int btusb_close(struct hci_dev *hdev)
        data->intf->needs_remote_wakeup = 0;
 
        /* Enable remote wake up for auto-suspend */
-       if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags))
+       if (test_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags))
                data->intf->needs_remote_wakeup = 1;
 
        usb_autopm_put_interface(data->intf);
@@ -1757,16 +1755,20 @@ static void btusb_work(struct work_struct *work)
                        /* Bluetooth USB spec recommends alt 6 (63 bytes), but
                         * many adapters do not support it.  Alt 1 appears to
                         * work for all adapters that do not have alt 6, and
-                        * which work with WBS at all.
+                        * which work with WBS at all.  Some devices prefer
+                        * alt 3 (HCI payload >= 60 Bytes let air packet
+                        * data satisfy 60 bytes), requiring
+                        * MTU >= 3 (packets) * 25 (size) - 3 (headers) = 72
+                        * see also Core spec 5, vol 4, B 2.1.1 & Table 2.1.
                         */
-                       new_alts = btusb_find_altsetting(data, 6) ? 6 : 1;
-                       /* Because mSBC frames do not need to be aligned to the
-                        * SCO packet boundary. If support the Alt 3, use the
-                        * Alt 3 for HCI payload >= 60 Bytes let air packet
-                        * data satisfy 60 bytes.
-                        */
-                       if (new_alts == 1 && btusb_find_altsetting(data, 3))
+                       if (btusb_find_altsetting(data, 6))
+                               new_alts = 6;
+                       else if (btusb_find_altsetting(data, 3) &&
+                                hdev->sco_mtu >= 72 &&
+                                test_bit(BTUSB_USE_ALT3_FOR_WBS, &data->flags))
                                new_alts = 3;
+                       else
+                               new_alts = 1;
                }
 
                if (btusb_switch_alt_setting(hdev, new_alts) < 0)
@@ -1890,7 +1892,7 @@ static int btusb_setup_csr(struct hci_dev *hdev)
                is_fake = true;
 
        if (is_fake) {
-               bt_dev_warn(hdev, "CSR: Unbranded CSR clone detected; adding workarounds...");
+               bt_dev_warn(hdev, "CSR: Unbranded CSR clone detected; adding workarounds and force-suspending once...");
 
                /* Generally these clones have big discrepancies between
                 * advertised features and what's actually supported.
@@ -1907,358 +1909,50 @@ static int btusb_setup_csr(struct hci_dev *hdev)
                clear_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
 
                /*
-                * Special workaround for clones with a Barrot 8041a02 chip,
-                * these clones are really messed-up:
-                * 1. Their bulk rx endpoint will never report any data unless
-                * the device was suspended at least once (yes really).
+                * Special workaround for these BT 4.0 chip clones, and potentially more:
+                *
+                * - 0x0134: a Barrot 8041a02                 (HCI rev: 0x1012 sub: 0x0810)
+                * - 0x7558: IC markings FR3191AHAL 749H15143 (HCI rev/sub-version: 0x0709)
+                *
+                * These controllers are really messed-up.
+                *
+                * 1. Their bulk RX endpoint will never report any data unless
+                * the device was suspended at least once (yes, really).
                 * 2. They will not wakeup when autosuspended and receiving data
-                * on their bulk rx endpoint from e.g. a keyboard or mouse
+                * on their bulk RX endpoint from e.g. a keyboard or mouse
                 * (IOW remote-wakeup support is broken for the bulk endpoint).
                 *
                 * To fix 1. enable runtime-suspend, force-suspend the
-                * hci and then wake-it up by disabling runtime-suspend.
+                * HCI and then wake-it up by disabling runtime-suspend.
                 *
-                * To fix 2. clear the hci's can_wake flag, this way the hci
+                * To fix 2. clear the HCI's can_wake flag, this way the HCI
                 * will still be autosuspended when it is not open.
+                *
+                * --
+                *
+                * Because these are widespread problems we prefer generic solutions; so
+                * apply this initialization quirk to every controller that gets here,
+                * it should be harmless. The alternative is to not work at all.
                 */
-               if (bcdDevice == 0x8891 &&
-                   le16_to_cpu(rp->lmp_subver) == 0x1012 &&
-                   le16_to_cpu(rp->hci_rev) == 0x0810 &&
-                   le16_to_cpu(rp->hci_ver) == BLUETOOTH_VER_4_0) {
-                       bt_dev_warn(hdev, "CSR: detected a fake CSR dongle using a Barrot 8041a02 chip, this chip is very buggy and may have issues");
-
-                       pm_runtime_allow(&data->udev->dev);
-
-                       ret = pm_runtime_suspend(&data->udev->dev);
-                       if (ret >= 0)
-                               msleep(200);
-                       else
-                               bt_dev_err(hdev, "Failed to suspend the device for Barrot 8041a02 receive-issue workaround");
-
-                       pm_runtime_forbid(&data->udev->dev);
-
-                       device_set_wakeup_capable(&data->udev->dev, false);
-                       /* Re-enable autosuspend if this was requested */
-                       if (enable_autosuspend)
-                               usb_enable_autosuspend(data->udev);
-               }
-       }
-
-       kfree_skb(skb);
-
-       return 0;
-}
-
-static const struct firmware *btusb_setup_intel_get_fw(struct hci_dev *hdev,
-                                                      struct intel_version *ver)
-{
-       const struct firmware *fw;
-       char fwname[64];
-       int ret;
-
-       snprintf(fwname, sizeof(fwname),
-                "intel/ibt-hw-%x.%x.%x-fw-%x.%x.%x.%x.%x.bseq",
-                ver->hw_platform, ver->hw_variant, ver->hw_revision,
-                ver->fw_variant,  ver->fw_revision, ver->fw_build_num,
-                ver->fw_build_ww, ver->fw_build_yy);
-
-       ret = request_firmware(&fw, fwname, &hdev->dev);
-       if (ret < 0) {
-               if (ret == -EINVAL) {
-                       bt_dev_err(hdev, "Intel firmware file request failed (%d)",
-                                  ret);
-                       return NULL;
-               }
-
-               bt_dev_err(hdev, "failed to open Intel firmware file: %s (%d)",
-                          fwname, ret);
-
-               /* If the correct firmware patch file is not found, use the
-                * default firmware patch file instead
-                */
-               snprintf(fwname, sizeof(fwname), "intel/ibt-hw-%x.%x.bseq",
-                        ver->hw_platform, ver->hw_variant);
-               if (request_firmware(&fw, fwname, &hdev->dev) < 0) {
-                       bt_dev_err(hdev, "failed to open default fw file: %s",
-                                  fwname);
-                       return NULL;
-               }
-       }
-
-       bt_dev_info(hdev, "Intel Bluetooth firmware file: %s", fwname);
-
-       return fw;
-}
-
-static int btusb_setup_intel_patching(struct hci_dev *hdev,
-                                     const struct firmware *fw,
-                                     const u8 **fw_ptr, int *disable_patch)
-{
-       struct sk_buff *skb;
-       struct hci_command_hdr *cmd;
-       const u8 *cmd_param;
-       struct hci_event_hdr *evt = NULL;
-       const u8 *evt_param = NULL;
-       int remain = fw->size - (*fw_ptr - fw->data);
-
-       /* The first byte indicates the types of the patch command or event.
-        * 0x01 means HCI command and 0x02 is HCI event. If the first bytes
-        * in the current firmware buffer doesn't start with 0x01 or
-        * the size of remain buffer is smaller than HCI command header,
-        * the firmware file is corrupted and it should stop the patching
-        * process.
-        */
-       if (remain > HCI_COMMAND_HDR_SIZE && *fw_ptr[0] != 0x01) {
-               bt_dev_err(hdev, "Intel fw corrupted: invalid cmd read");
-               return -EINVAL;
-       }
-       (*fw_ptr)++;
-       remain--;
+               pm_runtime_allow(&data->udev->dev);
 
-       cmd = (struct hci_command_hdr *)(*fw_ptr);
-       *fw_ptr += sizeof(*cmd);
-       remain -= sizeof(*cmd);
-
-       /* Ensure that the remain firmware data is long enough than the length
-        * of command parameter. If not, the firmware file is corrupted.
-        */
-       if (remain < cmd->plen) {
-               bt_dev_err(hdev, "Intel fw corrupted: invalid cmd len");
-               return -EFAULT;
-       }
-
-       /* If there is a command that loads a patch in the firmware
-        * file, then enable the patch upon success, otherwise just
-        * disable the manufacturer mode, for example patch activation
-        * is not required when the default firmware patch file is used
-        * because there are no patch data to load.
-        */
-       if (*disable_patch && le16_to_cpu(cmd->opcode) == 0xfc8e)
-               *disable_patch = 0;
-
-       cmd_param = *fw_ptr;
-       *fw_ptr += cmd->plen;
-       remain -= cmd->plen;
-
-       /* This reads the expected events when the above command is sent to the
-        * device. Some vendor commands expects more than one events, for
-        * example command status event followed by vendor specific event.
-        * For this case, it only keeps the last expected event. so the command
-        * can be sent with __hci_cmd_sync_ev() which returns the sk_buff of
-        * last expected event.
-        */
-       while (remain > HCI_EVENT_HDR_SIZE && *fw_ptr[0] == 0x02) {
-               (*fw_ptr)++;
-               remain--;
-
-               evt = (struct hci_event_hdr *)(*fw_ptr);
-               *fw_ptr += sizeof(*evt);
-               remain -= sizeof(*evt);
-
-               if (remain < evt->plen) {
-                       bt_dev_err(hdev, "Intel fw corrupted: invalid evt len");
-                       return -EFAULT;
-               }
-
-               evt_param = *fw_ptr;
-               *fw_ptr += evt->plen;
-               remain -= evt->plen;
-       }
-
-       /* Every HCI commands in the firmware file has its correspond event.
-        * If event is not found or remain is smaller than zero, the firmware
-        * file is corrupted.
-        */
-       if (!evt || !evt_param || remain < 0) {
-               bt_dev_err(hdev, "Intel fw corrupted: invalid evt read");
-               return -EFAULT;
-       }
+               ret = pm_runtime_suspend(&data->udev->dev);
+               if (ret >= 0)
+                       msleep(200);
+               else
+                       bt_dev_err(hdev, "CSR: Failed to suspend the device for our Barrot 8041a02 receive-issue workaround");
 
-       skb = __hci_cmd_sync_ev(hdev, le16_to_cpu(cmd->opcode), cmd->plen,
-                               cmd_param, evt->evt, HCI_INIT_TIMEOUT);
-       if (IS_ERR(skb)) {
-               bt_dev_err(hdev, "sending Intel patch command (0x%4.4x) failed (%ld)",
-                          cmd->opcode, PTR_ERR(skb));
-               return PTR_ERR(skb);
-       }
+               pm_runtime_forbid(&data->udev->dev);
 
-       /* It ensures that the returned event matches the event data read from
-        * the firmware file. At fist, it checks the length and then
-        * the contents of the event.
-        */
-       if (skb->len != evt->plen) {
-               bt_dev_err(hdev, "mismatch event length (opcode 0x%4.4x)",
-                          le16_to_cpu(cmd->opcode));
-               kfree_skb(skb);
-               return -EFAULT;
-       }
+               device_set_wakeup_capable(&data->udev->dev, false);
 
-       if (memcmp(skb->data, evt_param, evt->plen)) {
-               bt_dev_err(hdev, "mismatch event parameter (opcode 0x%4.4x)",
-                          le16_to_cpu(cmd->opcode));
-               kfree_skb(skb);
-               return -EFAULT;
+               /* Re-enable autosuspend if this was requested */
+               if (enable_autosuspend)
+                       usb_enable_autosuspend(data->udev);
        }
-       kfree_skb(skb);
-
-       return 0;
-}
-
-static int btusb_setup_intel(struct hci_dev *hdev)
-{
-       struct sk_buff *skb;
-       const struct firmware *fw;
-       const u8 *fw_ptr;
-       int disable_patch, err;
-       struct intel_version ver;
-
-       BT_DBG("%s", hdev->name);
 
-       /* The controller has a bug with the first HCI command sent to it
-        * returning number of completed commands as zero. This would stall the
-        * command processing in the Bluetooth core.
-        *
-        * As a workaround, send HCI Reset command first which will reset the
-        * number of completed commands and allow normal command processing
-        * from now on.
-        */
-       skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
-       if (IS_ERR(skb)) {
-               bt_dev_err(hdev, "sending initial HCI reset command failed (%ld)",
-                          PTR_ERR(skb));
-               return PTR_ERR(skb);
-       }
        kfree_skb(skb);
 
-       /* Read Intel specific controller version first to allow selection of
-        * which firmware file to load.
-        *
-        * The returned information are hardware variant and revision plus
-        * firmware variant, revision and build number.
-        */
-       err = btintel_read_version(hdev, &ver);
-       if (err)
-               return err;
-
-       bt_dev_info(hdev, "read Intel version: %02x%02x%02x%02x%02x%02x%02x%02x%02x",
-                   ver.hw_platform, ver.hw_variant, ver.hw_revision,
-                   ver.fw_variant,  ver.fw_revision, ver.fw_build_num,
-                   ver.fw_build_ww, ver.fw_build_yy, ver.fw_patch_num);
-
-       /* fw_patch_num indicates the version of patch the device currently
-        * have. If there is no patch data in the device, it is always 0x00.
-        * So, if it is other than 0x00, no need to patch the device again.
-        */
-       if (ver.fw_patch_num) {
-               bt_dev_info(hdev, "Intel device is already patched. "
-                           "patch num: %02x", ver.fw_patch_num);
-               goto complete;
-       }
-
-       /* Opens the firmware patch file based on the firmware version read
-        * from the controller. If it fails to open the matching firmware
-        * patch file, it tries to open the default firmware patch file.
-        * If no patch file is found, allow the device to operate without
-        * a patch.
-        */
-       fw = btusb_setup_intel_get_fw(hdev, &ver);
-       if (!fw)
-               goto complete;
-       fw_ptr = fw->data;
-
-       /* Enable the manufacturer mode of the controller.
-        * Only while this mode is enabled, the driver can download the
-        * firmware patch data and configuration parameters.
-        */
-       err = btintel_enter_mfg(hdev);
-       if (err) {
-               release_firmware(fw);
-               return err;
-       }
-
-       disable_patch = 1;
-
-       /* The firmware data file consists of list of Intel specific HCI
-        * commands and its expected events. The first byte indicates the
-        * type of the message, either HCI command or HCI event.
-        *
-        * It reads the command and its expected event from the firmware file,
-        * and send to the controller. Once __hci_cmd_sync_ev() returns,
-        * the returned event is compared with the event read from the firmware
-        * file and it will continue until all the messages are downloaded to
-        * the controller.
-        *
-        * Once the firmware patching is completed successfully,
-        * the manufacturer mode is disabled with reset and activating the
-        * downloaded patch.
-        *
-        * If the firmware patching fails, the manufacturer mode is
-        * disabled with reset and deactivating the patch.
-        *
-        * If the default patch file is used, no reset is done when disabling
-        * the manufacturer.
-        */
-       while (fw->size > fw_ptr - fw->data) {
-               int ret;
-
-               ret = btusb_setup_intel_patching(hdev, fw, &fw_ptr,
-                                                &disable_patch);
-               if (ret < 0)
-                       goto exit_mfg_deactivate;
-       }
-
-       release_firmware(fw);
-
-       if (disable_patch)
-               goto exit_mfg_disable;
-
-       /* Patching completed successfully and disable the manufacturer mode
-        * with reset and activate the downloaded firmware patches.
-        */
-       err = btintel_exit_mfg(hdev, true, true);
-       if (err)
-               return err;
-
-       /* Need build number for downloaded fw patches in
-        * every power-on boot
-        */
-       err = btintel_read_version(hdev, &ver);
-       if (err)
-               return err;
-       bt_dev_info(hdev, "Intel BT fw patch 0x%02x completed & activated",
-                  ver.fw_patch_num);
-
-       goto complete;
-
-exit_mfg_disable:
-       /* Disable the manufacturer mode without reset */
-       err = btintel_exit_mfg(hdev, false, false);
-       if (err)
-               return err;
-
-       bt_dev_info(hdev, "Intel firmware patch completed");
-
-       goto complete;
-
-exit_mfg_deactivate:
-       release_firmware(fw);
-
-       /* Patching failed. Disable the manufacturer mode with reset and
-        * deactivate the downloaded firmware patches.
-        */
-       err = btintel_exit_mfg(hdev, true, false);
-       if (err)
-               return err;
-
-       bt_dev_info(hdev, "Intel firmware patch completed and deactivated");
-
-complete:
-       /* Set the event mask for Intel specific vendor events. This enables
-        * a few extra events that are useful during general operation.
-        */
-       btintel_set_event_mask_mfg(hdev, false);
-
-       btintel_check_bdaddr(hdev);
        return 0;
 }
 
@@ -2290,49 +1984,21 @@ static int inject_cmd_complete(struct hci_dev *hdev, __u16 opcode)
 static int btusb_recv_bulk_intel(struct btusb_data *data, void *buffer,
                                 int count)
 {
+       struct hci_dev *hdev = data->hdev;
+
        /* When the device is in bootloader mode, then it can send
         * events via the bulk endpoint. These events are treated the
         * same way as the ones received from the interrupt endpoint.
         */
-       if (test_bit(BTUSB_BOOTLOADER, &data->flags))
+       if (btintel_test_flag(hdev, INTEL_BOOTLOADER))
                return btusb_recv_intr(data, buffer, count);
 
        return btusb_recv_bulk(data, buffer, count);
 }
 
-static void btusb_intel_bootup(struct btusb_data *data, const void *ptr,
-                              unsigned int len)
-{
-       const struct intel_bootup *evt = ptr;
-
-       if (len != sizeof(*evt))
-               return;
-
-       if (test_and_clear_bit(BTUSB_BOOTING, &data->flags))
-               wake_up_bit(&data->flags, BTUSB_BOOTING);
-}
-
-static void btusb_intel_secure_send_result(struct btusb_data *data,
-                                          const void *ptr, unsigned int len)
-{
-       const struct intel_secure_send_result *evt = ptr;
-
-       if (len != sizeof(*evt))
-               return;
-
-       if (evt->result)
-               set_bit(BTUSB_FIRMWARE_FAILED, &data->flags);
-
-       if (test_and_clear_bit(BTUSB_DOWNLOADING, &data->flags) &&
-           test_bit(BTUSB_FIRMWARE_LOADED, &data->flags))
-               wake_up_bit(&data->flags, BTUSB_DOWNLOADING);
-}
-
 static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb)
 {
-       struct btusb_data *data = hci_get_drvdata(hdev);
-
-       if (test_bit(BTUSB_BOOTLOADER, &data->flags)) {
+       if (btintel_test_flag(hdev, INTEL_BOOTLOADER)) {
                struct hci_event_hdr *hdr = (void *)skb->data;
 
                if (skb->len > HCI_EVENT_HDR_SIZE && hdr->evt == 0xff &&
@@ -2346,7 +2012,7 @@ static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb)
                                 * the device sends a vendor specific event
                                 * indicating that the bootup completed.
                                 */
-                               btusb_intel_bootup(data, ptr, len);
+                               btintel_bootup(hdev, ptr, len);
                                break;
                        case 0x06:
                                /* When the firmware loading completes the
@@ -2354,7 +2020,7 @@ static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb)
                                 * indicating the result of the firmware
                                 * loading.
                                 */
-                               btusb_intel_secure_send_result(data, ptr, len);
+                               btintel_secure_send_result(hdev, ptr, len);
                                break;
                        }
                }
@@ -2365,14 +2031,13 @@ static int btusb_recv_event_intel(struct hci_dev *hdev, struct sk_buff *skb)
 
 static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb)
 {
-       struct btusb_data *data = hci_get_drvdata(hdev);
        struct urb *urb;
 
        BT_DBG("%s", hdev->name);
 
        switch (hci_skb_pkt_type(skb)) {
        case HCI_COMMAND_PKT:
-               if (test_bit(BTUSB_BOOTLOADER, &data->flags)) {
+               if (btintel_test_flag(hdev, INTEL_BOOTLOADER)) {
                        struct hci_command_hdr *cmd = (void *)skb->data;
                        __u16 opcode = le16_to_cpu(cmd->opcode);
 
@@ -2424,728 +2089,82 @@ static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb)
        return -EILSEQ;
 }
 
-static int btusb_setup_intel_new_get_fw_name(struct intel_version *ver,
-                                            struct intel_boot_params *params,
-                                            char *fw_name, size_t len,
-                                            const char *suffix)
-{
-       switch (ver->hw_variant) {
-       case 0x0b:      /* SfP */
-       case 0x0c:      /* WsP */
-               snprintf(fw_name, len, "intel/ibt-%u-%u.%s",
-                       le16_to_cpu(ver->hw_variant),
-                       le16_to_cpu(params->dev_revid),
-                       suffix);
-               break;
-       case 0x11:      /* JfP */
-       case 0x12:      /* ThP */
-       case 0x13:      /* HrP */
-       case 0x14:      /* CcP */
-               snprintf(fw_name, len, "intel/ibt-%u-%u-%u.%s",
-                       le16_to_cpu(ver->hw_variant),
-                       le16_to_cpu(ver->hw_revision),
-                       le16_to_cpu(ver->fw_revision),
-                       suffix);
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static void btusb_setup_intel_newgen_get_fw_name(const struct intel_version_tlv *ver_tlv,
-                                                char *fw_name, size_t len,
-                                                const char *suffix)
-{
-       /* The firmware file name for new generation controllers will be
-        * ibt-<cnvi_top type+cnvi_top step>-<cnvr_top type+cnvr_top step>
-        */
-       snprintf(fw_name, len, "intel/ibt-%04x-%04x.%s",
-                INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver_tlv->cnvi_top),
-                                         INTEL_CNVX_TOP_STEP(ver_tlv->cnvi_top)),
-                INTEL_CNVX_TOP_PACK_SWAB(INTEL_CNVX_TOP_TYPE(ver_tlv->cnvr_top),
-                                         INTEL_CNVX_TOP_STEP(ver_tlv->cnvr_top)),
-                suffix);
-}
+/* UHW CR mapping */
+#define MTK_BT_MISC            0x70002510
+#define MTK_BT_SUBSYS_RST      0x70002610
+#define MTK_UDMA_INT_STA_BT    0x74000024
+#define MTK_UDMA_INT_STA_BT1   0x74000308
+#define MTK_BT_WDT_STATUS      0x740003A0
+#define MTK_EP_RST_OPT         0x74011890
+#define MTK_EP_RST_IN_OUT_OPT  0x00010001
+#define MTK_BT_RST_DONE                0x00000100
+#define MTK_BT_RESET_WAIT_MS   100
+#define MTK_BT_RESET_NUM_TRIES 10
+#define FIRMWARE_MT7663                "mediatek/mt7663pr2h.bin"
+#define FIRMWARE_MT7668                "mediatek/mt7668pr2h.bin"
 
-static int btusb_download_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
-{
-       struct btusb_data *data = hci_get_drvdata(hdev);
-       ktime_t delta, rettime;
-       unsigned long long duration;
-       int err;
+#define HCI_WMT_MAX_EVENT_SIZE         64
+/* It is for mt79xx download rom patch*/
+#define MTK_FW_ROM_PATCH_HEADER_SIZE   32
+#define MTK_FW_ROM_PATCH_GD_SIZE       64
+#define MTK_FW_ROM_PATCH_SEC_MAP_SIZE  64
+#define MTK_SEC_MAP_COMMON_SIZE        12
+#define MTK_SEC_MAP_NEED_SEND_SIZE     52
 
-       set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
+enum {
+       BTMTK_WMT_PATCH_DWNLD = 0x1,
+       BTMTK_WMT_FUNC_CTRL = 0x6,
+       BTMTK_WMT_RST = 0x7,
+       BTMTK_WMT_SEMAPHORE = 0x17,
+};
 
-       bt_dev_info(hdev, "Waiting for firmware download to complete");
+enum {
+       BTMTK_WMT_INVALID,
+       BTMTK_WMT_PATCH_UNDONE,
+       BTMTK_WMT_PATCH_PROGRESS,
+       BTMTK_WMT_PATCH_DONE,
+       BTMTK_WMT_ON_UNDONE,
+       BTMTK_WMT_ON_DONE,
+       BTMTK_WMT_ON_PROGRESS,
+};
 
-       err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING,
-                                 TASK_INTERRUPTIBLE,
-                                 msecs_to_jiffies(msec));
-       if (err == -EINTR) {
-               bt_dev_err(hdev, "Firmware loading interrupted");
-               return err;
-       }
+struct btmtk_wmt_hdr {
+       u8      dir;
+       u8      op;
+       __le16  dlen;
+       u8      flag;
+} __packed;
 
-       if (err) {
-               bt_dev_err(hdev, "Firmware loading timeout");
-               return -ETIMEDOUT;
-       }
+struct btmtk_hci_wmt_cmd {
+       struct btmtk_wmt_hdr hdr;
+       u8 data[];
+} __packed;
 
-       if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) {
-               bt_dev_err(hdev, "Firmware loading failed");
-               return -ENOEXEC;
-       }
+struct btmtk_hci_wmt_evt {
+       struct hci_event_hdr hhdr;
+       struct btmtk_wmt_hdr whdr;
+} __packed;
 
-       rettime = ktime_get();
-       delta = ktime_sub(rettime, calltime);
-       duration = (unsigned long long)ktime_to_ns(delta) >> 10;
+struct btmtk_hci_wmt_evt_funcc {
+       struct btmtk_hci_wmt_evt hwhdr;
+       __be16 status;
+} __packed;
 
-       bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration);
+struct btmtk_tci_sleep {
+       u8 mode;
+       __le16 duration;
+       __le16 host_duration;
+       u8 host_wakeup_pin;
+       u8 time_compensation;
+} __packed;
 
-       return 0;
-}
-
-static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
-                                               struct intel_version_tlv *ver,
-                                               u32 *boot_param)
-{
-       const struct firmware *fw;
-       char fwname[64];
-       int err;
-       struct btusb_data *data = hci_get_drvdata(hdev);
-       ktime_t calltime;
-
-       if (!ver || !boot_param)
-               return -EINVAL;
-
-       /* The firmware variant determines if the device is in bootloader
-        * mode or is running operational firmware. The value 0x03 identifies
-        * the bootloader and the value 0x23 identifies the operational
-        * firmware.
-        *
-        * When the operational firmware is already present, then only
-        * the check for valid Bluetooth device address is needed. This
-        * determines if the device will be added as configured or
-        * unconfigured controller.
-        *
-        * It is not possible to use the Secure Boot Parameters in this
-        * case since that command is only available in bootloader mode.
-        */
-       if (ver->img_type == 0x03) {
-               clear_bit(BTUSB_BOOTLOADER, &data->flags);
-               btintel_check_bdaddr(hdev);
-       }
-
-       /* If the OTP has no valid Bluetooth device address, then there will
-        * also be no valid address for the operational firmware.
-        */
-       if (!bacmp(&ver->otp_bd_addr, BDADDR_ANY)) {
-               bt_dev_info(hdev, "No device address configured");
-               set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
-       }
-
-       btusb_setup_intel_newgen_get_fw_name(ver, fwname, sizeof(fwname), "sfi");
-       err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
-       if (err < 0) {
-               if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
-                       /* Firmware has already been loaded */
-                       set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
-                       return 0;
-               }
-
-               bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
-                          fwname, err);
-
-               return err;
-       }
-
-       bt_dev_info(hdev, "Found device firmware: %s", fwname);
-
-       if (fw->size < 644) {
-               bt_dev_err(hdev, "Invalid size of firmware file (%zu)",
-                          fw->size);
-               err = -EBADF;
-               goto done;
-       }
-
-       calltime = ktime_get();
-
-       set_bit(BTUSB_DOWNLOADING, &data->flags);
-
-       /* Start firmware downloading and get boot parameter */
-       err = btintel_download_firmware_newgen(hdev, ver, fw, boot_param,
-                                              INTEL_HW_VARIANT(ver->cnvi_bt),
-                                              ver->sbe_type);
-       if (err < 0) {
-               if (err == -EALREADY) {
-                       /* Firmware has already been loaded */
-                       set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
-                       err = 0;
-                       goto done;
-               }
-
-               /* When FW download fails, send Intel Reset to retry
-                * FW download.
-                */
-               btintel_reset_to_bootloader(hdev);
-               goto done;
-       }
-
-       /* Before switching the device into operational mode and with that
-        * booting the loaded firmware, wait for the bootloader notification
-        * that all fragments have been successfully received.
-        *
-        * When the event processing receives the notification, then the
-        * BTUSB_DOWNLOADING flag will be cleared.
-        *
-        * The firmware loading should not take longer than 5 seconds
-        * and thus just timeout if that happens and fail the setup
-        * of this device.
-        */
-       err = btusb_download_wait(hdev, calltime, 5000);
-       if (err == -ETIMEDOUT)
-               btintel_reset_to_bootloader(hdev);
-
-done:
-       release_firmware(fw);
-       return err;
-}
-
-static int btusb_intel_download_firmware(struct hci_dev *hdev,
-                                        struct intel_version *ver,
-                                        struct intel_boot_params *params,
-                                        u32 *boot_param)
-{
-       const struct firmware *fw;
-       char fwname[64];
-       int err;
-       struct btusb_data *data = hci_get_drvdata(hdev);
-       ktime_t calltime;
-
-       if (!ver || !params)
-               return -EINVAL;
-
-       /* The firmware variant determines if the device is in bootloader
-        * mode or is running operational firmware. The value 0x06 identifies
-        * the bootloader and the value 0x23 identifies the operational
-        * firmware.
-        *
-        * When the operational firmware is already present, then only
-        * the check for valid Bluetooth device address is needed. This
-        * determines if the device will be added as configured or
-        * unconfigured controller.
-        *
-        * It is not possible to use the Secure Boot Parameters in this
-        * case since that command is only available in bootloader mode.
-        */
-       if (ver->fw_variant == 0x23) {
-               clear_bit(BTUSB_BOOTLOADER, &data->flags);
-               btintel_check_bdaddr(hdev);
-
-               /* SfP and WsP don't seem to update the firmware version on file
-                * so version checking is currently possible.
-                */
-               switch (ver->hw_variant) {
-               case 0x0b:      /* SfP */
-               case 0x0c:      /* WsP */
-                       return 0;
-               }
-
-               /* Proceed to download to check if the version matches */
-               goto download;
-       }
-
-       /* Read the secure boot parameters to identify the operating
-        * details of the bootloader.
-        */
-       err = btintel_read_boot_params(hdev, params);
-       if (err)
-               return err;
-
-       /* It is required that every single firmware fragment is acknowledged
-        * with a command complete event. If the boot parameters indicate
-        * that this bootloader does not send them, then abort the setup.
-        */
-       if (params->limited_cce != 0x00) {
-               bt_dev_err(hdev, "Unsupported Intel firmware loading method (%u)",
-                          params->limited_cce);
-               return -EINVAL;
-       }
-
-       /* If the OTP has no valid Bluetooth device address, then there will
-        * also be no valid address for the operational firmware.
-        */
-       if (!bacmp(&params->otp_bdaddr, BDADDR_ANY)) {
-               bt_dev_info(hdev, "No device address configured");
-               set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
-       }
-
-download:
-       /* With this Intel bootloader only the hardware variant and device
-        * revision information are used to select the right firmware for SfP
-        * and WsP.
-        *
-        * The firmware filename is ibt-<hw_variant>-<dev_revid>.sfi.
-        *
-        * Currently the supported hardware variants are:
-        *   11 (0x0b) for iBT3.0 (LnP/SfP)
-        *   12 (0x0c) for iBT3.5 (WsP)
-        *
-        * For ThP/JfP and for future SKU's, the FW name varies based on HW
-        * variant, HW revision and FW revision, as these are dependent on CNVi
-        * and RF Combination.
-        *
-        *   17 (0x11) for iBT3.5 (JfP)
-        *   18 (0x12) for iBT3.5 (ThP)
-        *
-        * The firmware file name for these will be
-        * ibt-<hw_variant>-<hw_revision>-<fw_revision>.sfi.
-        *
-        */
-       err = btusb_setup_intel_new_get_fw_name(ver, params, fwname,
-                                               sizeof(fwname), "sfi");
-       if (err < 0) {
-               if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
-                       /* Firmware has already been loaded */
-                       set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
-                       return 0;
-               }
-
-               bt_dev_err(hdev, "Unsupported Intel firmware naming");
-               return -EINVAL;
-       }
-
-       err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
-       if (err < 0) {
-               if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
-                       /* Firmware has already been loaded */
-                       set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
-                       return 0;
-               }
-
-               bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
-                          fwname, err);
-               return err;
-       }
-
-       bt_dev_info(hdev, "Found device firmware: %s", fwname);
-
-       if (fw->size < 644) {
-               bt_dev_err(hdev, "Invalid size of firmware file (%zu)",
-                          fw->size);
-               err = -EBADF;
-               goto done;
-       }
-
-       calltime = ktime_get();
-
-       set_bit(BTUSB_DOWNLOADING, &data->flags);
-
-       /* Start firmware downloading and get boot parameter */
-       err = btintel_download_firmware(hdev, ver, fw, boot_param);
-       if (err < 0) {
-               if (err == -EALREADY) {
-                       /* Firmware has already been loaded */
-                       set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
-                       err = 0;
-                       goto done;
-               }
-
-               /* When FW download fails, send Intel Reset to retry
-                * FW download.
-                */
-               btintel_reset_to_bootloader(hdev);
-               goto done;
-       }
-
-       /* Before switching the device into operational mode and with that
-        * booting the loaded firmware, wait for the bootloader notification
-        * that all fragments have been successfully received.
-        *
-        * When the event processing receives the notification, then the
-        * BTUSB_DOWNLOADING flag will be cleared.
-        *
-        * The firmware loading should not take longer than 5 seconds
-        * and thus just timeout if that happens and fail the setup
-        * of this device.
-        */
-       err = btusb_download_wait(hdev, calltime, 5000);
-       if (err == -ETIMEDOUT)
-               btintel_reset_to_bootloader(hdev);
-
-done:
-       release_firmware(fw);
-       return err;
-}
-
-static int btusb_boot_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
-{
-       struct btusb_data *data = hci_get_drvdata(hdev);
-       ktime_t delta, rettime;
-       unsigned long long duration;
-       int err;
-
-       bt_dev_info(hdev, "Waiting for device to boot");
-
-       err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING,
-                                 TASK_INTERRUPTIBLE,
-                                 msecs_to_jiffies(msec));
-       if (err == -EINTR) {
-               bt_dev_err(hdev, "Device boot interrupted");
-               return -EINTR;
-       }
-
-       if (err) {
-               bt_dev_err(hdev, "Device boot timeout");
-               return -ETIMEDOUT;
-       }
-
-       rettime = ktime_get();
-       delta = ktime_sub(rettime, calltime);
-       duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-
-       bt_dev_info(hdev, "Device booted in %llu usecs", duration);
-
-       return 0;
-}
-
-static int btusb_intel_boot(struct hci_dev *hdev, u32 boot_addr)
-{
-       struct btusb_data *data = hci_get_drvdata(hdev);
-       ktime_t calltime;
-       int err;
-
-       calltime = ktime_get();
-
-       set_bit(BTUSB_BOOTING, &data->flags);
-
-       err = btintel_send_intel_reset(hdev, boot_addr);
-       if (err) {
-               bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
-               btintel_reset_to_bootloader(hdev);
-               return err;
-       }
-
-       /* The bootloader will not indicate when the device is ready. This
-        * is done by the operational firmware sending bootup notification.
-        *
-        * Booting into operational firmware should not take longer than
-        * 1 second. However if that happens, then just fail the setup
-        * since something went wrong.
-        */
-       err = btusb_boot_wait(hdev, calltime, 1000);
-       if (err == -ETIMEDOUT)
-               btintel_reset_to_bootloader(hdev);
-
-       return err;
-}
-
-static int btusb_setup_intel_new(struct hci_dev *hdev)
-{
-       struct btusb_data *data = hci_get_drvdata(hdev);
-       struct intel_version ver;
-       struct intel_boot_params params;
-       u32 boot_param;
-       char ddcname[64];
-       int err;
-       struct intel_debug_features features;
-
-       BT_DBG("%s", hdev->name);
-
-       /* Set the default boot parameter to 0x0 and it is updated to
-        * SKU specific boot parameter after reading Intel_Write_Boot_Params
-        * command while downloading the firmware.
-        */
-       boot_param = 0x00000000;
-
-       /* Read the Intel version information to determine if the device
-        * is in bootloader mode or if it already has operational firmware
-        * loaded.
-        */
-       err = btintel_read_version(hdev, &ver);
-       if (err) {
-               bt_dev_err(hdev, "Intel Read version failed (%d)", err);
-               btintel_reset_to_bootloader(hdev);
-               return err;
-       }
-
-       err = btintel_version_info(hdev, &ver);
-       if (err)
-               return err;
-
-       err = btusb_intel_download_firmware(hdev, &ver, &params, &boot_param);
-       if (err)
-               return err;
-
-       /* controller is already having an operational firmware */
-       if (ver.fw_variant == 0x23)
-               goto finish;
-
-       err = btusb_intel_boot(hdev, boot_param);
-       if (err)
-               return err;
-
-       clear_bit(BTUSB_BOOTLOADER, &data->flags);
-
-       err = btusb_setup_intel_new_get_fw_name(&ver, &params, ddcname,
-                                               sizeof(ddcname), "ddc");
-
-       if (err < 0) {
-               bt_dev_err(hdev, "Unsupported Intel firmware naming");
-       } else {
-               /* Once the device is running in operational mode, it needs to
-                * apply the device configuration (DDC) parameters.
-                *
-                * The device can work without DDC parameters, so even if it
-                * fails to load the file, no need to fail the setup.
-                */
-               btintel_load_ddc_config(hdev, ddcname);
-       }
-
-       /* Read the Intel supported features and if new exception formats
-        * supported, need to load the additional DDC config to enable.
-        */
-       btintel_read_debug_features(hdev, &features);
-
-       /* Set DDC mask for available debug features */
-       btintel_set_debug_features(hdev, &features);
-
-       /* Read the Intel version information after loading the FW  */
-       err = btintel_read_version(hdev, &ver);
-       if (err)
-               return err;
-
-       btintel_version_info(hdev, &ver);
-
-finish:
-       /* All Intel controllers that support the Microsoft vendor
-        * extension are using 0xFC1E for VsMsftOpCode.
-        */
-       switch (ver.hw_variant) {
-       case 0x11:      /* JfP */
-       case 0x12:      /* ThP */
-       case 0x13:      /* HrP */
-       case 0x14:      /* CcP */
-               hci_set_msft_opcode(hdev, 0xFC1E);
-               break;
-       }
-
-       /* Set the event mask for Intel specific vendor events. This enables
-        * a few extra events that are useful during general operation. It
-        * does not enable any debugging related events.
-        *
-        * The device will function correctly without these events enabled
-        * and thus no need to fail the setup.
-        */
-       btintel_set_event_mask(hdev, false);
-
-       return 0;
-}
-
-static int btusb_setup_intel_newgen(struct hci_dev *hdev)
-{
-       struct btusb_data *data = hci_get_drvdata(hdev);
-       u32 boot_param;
-       char ddcname[64];
-       int err;
-       struct intel_debug_features features;
-       struct intel_version_tlv version;
-
-       bt_dev_dbg(hdev, "");
-
-       /* Set the default boot parameter to 0x0 and it is updated to
-        * SKU specific boot parameter after reading Intel_Write_Boot_Params
-        * command while downloading the firmware.
-        */
-       boot_param = 0x00000000;
-
-       /* Read the Intel version information to determine if the device
-        * is in bootloader mode or if it already has operational firmware
-        * loaded.
-        */
-       err = btintel_read_version_tlv(hdev, &version);
-       if (err) {
-               bt_dev_err(hdev, "Intel Read version failed (%d)", err);
-               btintel_reset_to_bootloader(hdev);
-               return err;
-       }
-
-       err = btintel_version_info_tlv(hdev, &version);
-       if (err)
-               return err;
-
-       err = btusb_intel_download_firmware_newgen(hdev, &version, &boot_param);
-       if (err)
-               return err;
-
-       /* check if controller is already having an operational firmware */
-       if (version.img_type == 0x03)
-               goto finish;
-
-       err = btusb_intel_boot(hdev, boot_param);
-       if (err)
-               return err;
-
-       clear_bit(BTUSB_BOOTLOADER, &data->flags);
-
-       btusb_setup_intel_newgen_get_fw_name(&version, ddcname, sizeof(ddcname),
-                                            "ddc");
-       /* Once the device is running in operational mode, it needs to
-        * apply the device configuration (DDC) parameters.
-        *
-        * The device can work without DDC parameters, so even if it
-        * fails to load the file, no need to fail the setup.
-        */
-       btintel_load_ddc_config(hdev, ddcname);
-
-       /* Read the Intel supported features and if new exception formats
-        * supported, need to load the additional DDC config to enable.
-        */
-       btintel_read_debug_features(hdev, &features);
-
-       /* Set DDC mask for available debug features */
-       btintel_set_debug_features(hdev, &features);
-
-       /* Read the Intel version information after loading the FW  */
-       err = btintel_read_version_tlv(hdev, &version);
-       if (err)
-               return err;
-
-       btintel_version_info_tlv(hdev, &version);
-
-finish:
-       /* Set the event mask for Intel specific vendor events. This enables
-        * a few extra events that are useful during general operation. It
-        * does not enable any debugging related events.
-        *
-        * The device will function correctly without these events enabled
-        * and thus no need to fail the setup.
-        */
-       btintel_set_event_mask(hdev, false);
-
-       return 0;
-}
-static int btusb_shutdown_intel(struct hci_dev *hdev)
-{
-       struct sk_buff *skb;
-       long ret;
-
-       /* In the shutdown sequence where Bluetooth is turned off followed
-        * by WiFi being turned off, turning WiFi back on causes issue with
-        * the RF calibration.
-        *
-        * To ensure that any RF activity has been stopped, issue HCI Reset
-        * command to clear all ongoing activity including advertising,
-        * scanning etc.
-        */
-       skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
-       if (IS_ERR(skb)) {
-               ret = PTR_ERR(skb);
-               bt_dev_err(hdev, "HCI reset during shutdown failed");
-               return ret;
-       }
-       kfree_skb(skb);
-
-       /* Some platforms have an issue with BT LED when the interface is
-        * down or BT radio is turned off, which takes 5 seconds to BT LED
-        * goes off. This command turns off the BT LED immediately.
-        */
-       skb = __hci_cmd_sync(hdev, 0xfc3f, 0, NULL, HCI_INIT_TIMEOUT);
-       if (IS_ERR(skb)) {
-               ret = PTR_ERR(skb);
-               bt_dev_err(hdev, "turning off Intel device LED failed");
-               return ret;
-       }
-       kfree_skb(skb);
-
-       return 0;
-}
-
-static int btusb_shutdown_intel_new(struct hci_dev *hdev)
-{
-       struct sk_buff *skb;
-
-       /* Send HCI Reset to the controller to stop any BT activity which
-        * were triggered. This will help to save power and maintain the
-        * sync b/w Host and controller
-        */
-       skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
-       if (IS_ERR(skb)) {
-               bt_dev_err(hdev, "HCI reset during shutdown failed");
-               return PTR_ERR(skb);
-       }
-       kfree_skb(skb);
-
-       return 0;
-}
-
-#define FIRMWARE_MT7663                "mediatek/mt7663pr2h.bin"
-#define FIRMWARE_MT7668                "mediatek/mt7668pr2h.bin"
-
-#define HCI_WMT_MAX_EVENT_SIZE         64
-/* It is for mt79xx download rom patch*/
-#define MTK_FW_ROM_PATCH_HEADER_SIZE   32
-#define MTK_FW_ROM_PATCH_GD_SIZE       64
-#define MTK_FW_ROM_PATCH_SEC_MAP_SIZE  64
-#define MTK_SEC_MAP_COMMON_SIZE        12
-#define MTK_SEC_MAP_NEED_SEND_SIZE     52
-
-enum {
-       BTMTK_WMT_PATCH_DWNLD = 0x1,
-       BTMTK_WMT_FUNC_CTRL = 0x6,
-       BTMTK_WMT_RST = 0x7,
-       BTMTK_WMT_SEMAPHORE = 0x17,
-};
-
-enum {
-       BTMTK_WMT_INVALID,
-       BTMTK_WMT_PATCH_UNDONE,
-       BTMTK_WMT_PATCH_PROGRESS,
-       BTMTK_WMT_PATCH_DONE,
-       BTMTK_WMT_ON_UNDONE,
-       BTMTK_WMT_ON_DONE,
-       BTMTK_WMT_ON_PROGRESS,
-};
-
-struct btmtk_wmt_hdr {
-       u8      dir;
-       u8      op;
-       __le16  dlen;
-       u8      flag;
-} __packed;
-
-struct btmtk_hci_wmt_cmd {
-       struct btmtk_wmt_hdr hdr;
-       u8 data[];
-} __packed;
-
-struct btmtk_hci_wmt_evt {
-       struct hci_event_hdr hhdr;
-       struct btmtk_wmt_hdr whdr;
-} __packed;
-
-struct btmtk_hci_wmt_evt_funcc {
-       struct btmtk_hci_wmt_evt hwhdr;
-       __be16 status;
-} __packed;
-
-struct btmtk_tci_sleep {
-       u8 mode;
-       __le16 duration;
-       __le16 host_duration;
-       u8 host_wakeup_pin;
-       u8 time_compensation;
-} __packed;
-
-struct btmtk_hci_wmt_params {
-       u8 op;
-       u8 flag;
-       u16 dlen;
-       const void *data;
-       u32 *status;
-};
+struct btmtk_hci_wmt_params {
+       u8 op;
+       u8 flag;
+       u16 dlen;
+       const void *data;
+       u32 *status;
+};
 
 struct btmtk_patch_header {
        u8 datetime[16];
@@ -3655,6 +2674,63 @@ static int btusb_mtk_func_query(struct hci_dev *hdev)
        return status;
 }
 
+static int btusb_mtk_uhw_reg_write(struct btusb_data *data, u32 reg, u32 val)
+{
+       struct hci_dev *hdev = data->hdev;
+       int pipe, err;
+       void *buf;
+
+       buf = kzalloc(4, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       put_unaligned_le32(val, buf);
+
+       pipe = usb_sndctrlpipe(data->udev, 0);
+       err = usb_control_msg(data->udev, pipe, 0x02,
+                             0x5E,
+                             reg >> 16, reg & 0xffff,
+                             buf, 4, USB_CTRL_SET_TIMEOUT);
+       if (err < 0) {
+               bt_dev_err(hdev, "Failed to write uhw reg(%d)", err);
+               goto err_free_buf;
+       }
+
+err_free_buf:
+       kfree(buf);
+
+       return err;
+}
+
+static int btusb_mtk_uhw_reg_read(struct btusb_data *data, u32 reg, u32 *val)
+{
+       struct hci_dev *hdev = data->hdev;
+       int pipe, err;
+       void *buf;
+
+       buf = kzalloc(4, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       pipe = usb_rcvctrlpipe(data->udev, 0);
+       err = usb_control_msg(data->udev, pipe, 0x01,
+                             0xDE,
+                             reg >> 16, reg & 0xffff,
+                             buf, 4, USB_CTRL_SET_TIMEOUT);
+       if (err < 0) {
+               bt_dev_err(hdev, "Failed to read uhw reg(%d)", err);
+               goto err_free_buf;
+       }
+
+       *val = get_unaligned_le32(buf);
+       bt_dev_dbg(hdev, "reg=%x, value=0x%08x", reg, *val);
+
+err_free_buf:
+       kfree(buf);
+
+       return err;
+}
+
 static int btusb_mtk_reg_read(struct btusb_data *data, u32 reg, u32 *val)
 {
        int pipe, err, size = sizeof(u32);
@@ -3734,6 +2810,9 @@ static int btusb_mtk_setup(struct hci_dev *hdev)
                         dev_id & 0xffff, (fw_version & 0xff) + 1);
                err = btusb_mtk_setup_firmware_79xx(hdev, fw_bin_name);
 
+               /* It's Device EndPoint Reset Option Register */
+               btusb_mtk_uhw_reg_write(data, MTK_EP_RST_OPT, MTK_EP_RST_IN_OUT_OPT);
+
                /* Enable Bluetooth protocol */
                param = 1;
                wmt_params.op = BTMTK_WMT_FUNC_CTRL;
@@ -3747,6 +2826,8 @@ static int btusb_mtk_setup(struct hci_dev *hdev)
                        bt_dev_err(hdev, "Failed to send wmt func ctrl (%d)", err);
                        return err;
                }
+
+               hci_set_msft_opcode(hdev, 0xFD30);
                goto done;
        default:
                bt_dev_err(hdev, "Unsupported hardware variant (%08x)",
@@ -3857,6 +2938,83 @@ static int btusb_mtk_shutdown(struct hci_dev *hdev)
        return 0;
 }
 
+static void btusb_mtk_cmd_timeout(struct hci_dev *hdev)
+{
+       struct btusb_data *data = hci_get_drvdata(hdev);
+       u32 val;
+       int err, retry = 0;
+
+       /* It's MediaTek specific bluetooth reset mechanism via USB */
+       if (test_and_set_bit(BTUSB_HW_RESET_ACTIVE, &data->flags)) {
+               bt_dev_err(hdev, "last reset failed? Not resetting again");
+               return;
+       }
+
+       err = usb_autopm_get_interface(data->intf);
+       if (err < 0)
+               return;
+
+       btusb_stop_traffic(data);
+       usb_kill_anchored_urbs(&data->tx_anchor);
+
+       /* It's Device EndPoint Reset Option Register */
+       bt_dev_dbg(hdev, "Initiating reset mechanism via uhw");
+       btusb_mtk_uhw_reg_write(data, MTK_EP_RST_OPT, MTK_EP_RST_IN_OUT_OPT);
+       btusb_mtk_uhw_reg_read(data, MTK_BT_WDT_STATUS, &val);
+
+       /* Reset the bluetooth chip via USB interface. */
+       btusb_mtk_uhw_reg_write(data, MTK_BT_SUBSYS_RST, 1);
+       btusb_mtk_uhw_reg_write(data, MTK_UDMA_INT_STA_BT, 0x000000FF);
+       btusb_mtk_uhw_reg_read(data, MTK_UDMA_INT_STA_BT, &val);
+       btusb_mtk_uhw_reg_write(data, MTK_UDMA_INT_STA_BT1, 0x000000FF);
+       btusb_mtk_uhw_reg_read(data, MTK_UDMA_INT_STA_BT1, &val);
+       /* MT7921 need to delay 20ms between toggle reset bit */
+       msleep(20);
+       btusb_mtk_uhw_reg_write(data, MTK_BT_SUBSYS_RST, 0);
+       btusb_mtk_uhw_reg_read(data, MTK_BT_SUBSYS_RST, &val);
+
+       /* Poll the register until reset is completed */
+       do {
+               btusb_mtk_uhw_reg_read(data, MTK_BT_MISC, &val);
+               if (val & MTK_BT_RST_DONE) {
+                       bt_dev_dbg(hdev, "Bluetooth Reset Successfully");
+                       break;
+               }
+
+               bt_dev_dbg(hdev, "Polling Bluetooth Reset CR");
+               retry++;
+               msleep(MTK_BT_RESET_WAIT_MS);
+       } while (retry < MTK_BT_RESET_NUM_TRIES);
+
+       btusb_mtk_id_get(data, 0x70010200, &val);
+       if (!val)
+               bt_dev_err(hdev, "Can't get device id, subsys reset fail.");
+
+       usb_queue_reset_device(data->intf);
+
+       clear_bit(BTUSB_HW_RESET_ACTIVE, &data->flags);
+}
+
+static int btusb_recv_acl_mtk(struct hci_dev *hdev, struct sk_buff *skb)
+{
+       struct btusb_data *data = hci_get_drvdata(hdev);
+       u16 handle = le16_to_cpu(hci_acl_hdr(skb)->handle);
+
+       switch (handle) {
+       case 0xfc6f:            /* Firmware dump from device */
+               /* When the firmware hangs, the device can no longer
+                * suspend and thus disable auto-suspend.
+                */
+               usb_disable_autosuspend(data->udev);
+               fallthrough;
+       case 0x05ff:            /* Firmware debug logging 1 */
+       case 0x05fe:            /* Firmware debug logging 2 */
+               return hci_recv_diag(hdev, skb);
+       }
+
+       return hci_recv_frame(hdev, skb);
+}
+
 MODULE_FIRMWARE(FIRMWARE_MT7663);
 MODULE_FIRMWARE(FIRMWARE_MT7668);
 
@@ -4437,9 +3595,6 @@ static bool btusb_prevent_wake(struct hci_dev *hdev)
 {
        struct btusb_data *data = hci_get_drvdata(hdev);
 
-       if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags))
-               return true;
-
        return !device_may_wakeup(&data->udev->dev);
 }
 
@@ -4465,7 +3620,7 @@ static int btusb_probe(struct usb_interface *intf,
        struct btusb_data *data;
        struct hci_dev *hdev;
        unsigned ifnum_base;
-       int i, err;
+       int i, err, priv_size;
 
        BT_DBG("intf %p id %p", intf, id);
 
@@ -4551,16 +3706,23 @@ static int btusb_probe(struct usb_interface *intf,
        init_usb_anchor(&data->ctrl_anchor);
        spin_lock_init(&data->rxlock);
 
-       if (id->driver_info & BTUSB_INTEL_NEW) {
+       priv_size = 0;
+
+       data->recv_event = hci_recv_frame;
+       data->recv_bulk = btusb_recv_bulk;
+
+       if (id->driver_info & BTUSB_INTEL_COMBINED) {
+               /* Allocate extra space for Intel device */
+               priv_size += sizeof(struct btintel_data);
+
+               /* Override the rx handlers */
                data->recv_event = btusb_recv_event_intel;
                data->recv_bulk = btusb_recv_bulk_intel;
-               set_bit(BTUSB_BOOTLOADER, &data->flags);
-       } else {
-               data->recv_event = hci_recv_frame;
-               data->recv_bulk = btusb_recv_bulk;
        }
 
-       hdev = hci_alloc_dev();
+       data->recv_acl = hci_recv_frame;
+
+       hdev = hci_alloc_dev_priv(priv_size);
        if (!hdev)
                return -ENOMEM;
 
@@ -4634,48 +3796,18 @@ static int btusb_probe(struct usb_interface *intf,
                data->diag = usb_ifnum_to_if(data->udev, ifnum_base + 2);
        }
 
-       if (id->driver_info & BTUSB_INTEL) {
-               hdev->manufacturer = 2;
-               hdev->setup = btusb_setup_intel;
-               hdev->shutdown = btusb_shutdown_intel;
-               hdev->set_diag = btintel_set_diag_mfg;
-               hdev->set_bdaddr = btintel_set_bdaddr;
-               hdev->cmd_timeout = btusb_intel_cmd_timeout;
-               set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
-               set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
-               set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks);
-       }
-
-       if (id->driver_info & BTUSB_INTEL_NEW) {
-               hdev->manufacturer = 2;
-               hdev->send = btusb_send_frame_intel;
-               hdev->setup = btusb_setup_intel_new;
-               hdev->shutdown = btusb_shutdown_intel_new;
-               hdev->hw_error = btintel_hw_error;
-               hdev->set_diag = btintel_set_diag;
-               hdev->set_bdaddr = btintel_set_bdaddr;
-               hdev->cmd_timeout = btusb_intel_cmd_timeout;
-               set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
-               set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
-               set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks);
-       }
+       /* Combined Intel Device setup to support multiple setup routine */
+       if (id->driver_info & BTUSB_INTEL_COMBINED) {
+               err = btintel_configure_setup(hdev);
+               if (err)
+                       goto out_free_dev;
 
-       if (id->driver_info & BTUSB_INTEL_NEWGEN) {
-               hdev->manufacturer = 2;
+               /* Transport specific configuration */
                hdev->send = btusb_send_frame_intel;
-               hdev->setup = btusb_setup_intel_newgen;
-               hdev->shutdown = btusb_shutdown_intel_new;
-               hdev->hw_error = btintel_hw_error;
-               hdev->set_diag = btintel_set_diag;
-               hdev->set_bdaddr = btintel_set_bdaddr;
                hdev->cmd_timeout = btusb_intel_cmd_timeout;
-               set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
-               set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
-               set_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks);
 
-               data->recv_event = btusb_recv_event_intel;
-               data->recv_bulk = btusb_recv_bulk_intel;
-               set_bit(BTUSB_BOOTLOADER, &data->flags);
+               if (id->driver_info & BTUSB_INTEL_BROKEN_INITIAL_NCMD)
+                       btintel_set_flag(hdev, INTEL_BROKEN_INITIAL_NCMD);
        }
 
        if (id->driver_info & BTUSB_MARVELL)
@@ -4686,7 +3818,9 @@ static int btusb_probe(struct usb_interface *intf,
                hdev->setup = btusb_mtk_setup;
                hdev->shutdown = btusb_mtk_shutdown;
                hdev->manufacturer = 70;
+               hdev->cmd_timeout = btusb_mtk_cmd_timeout;
                set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
+               data->recv_acl = btusb_recv_acl_mtk;
        }
 
        if (id->driver_info & BTUSB_SWAVE) {
@@ -4720,6 +3854,7 @@ static int btusb_probe(struct usb_interface *intf,
                hdev->set_bdaddr = btusb_set_bdaddr_wcn6855;
                hdev->cmd_timeout = btusb_qca_cmd_timeout;
                set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+               hci_set_msft_opcode(hdev, 0xFD70);
        }
 
        if (id->driver_info & BTUSB_AMP) {
@@ -4737,11 +3872,9 @@ static int btusb_probe(struct usb_interface *intf,
                hdev->shutdown = btrtl_shutdown_realtek;
                hdev->cmd_timeout = btusb_rtl_cmd_timeout;
 
-               /* Realtek devices lose their updated firmware over global
-                * suspend that means host doesn't send SET_FEATURE
-                * (DEVICE_REMOTE_WAKEUP)
-                */
-               set_bit(BTUSB_WAKEUP_DISABLE, &data->flags);
+               /* Realtek devices need to set remote wakeup on auto-suspend */
+               set_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags);
+               set_bit(BTUSB_USE_ALT3_FOR_WBS, &data->flags);
        }
 
        if (!reset)
@@ -4916,12 +4049,15 @@ static int btusb_suspend(struct usb_interface *intf, pm_message_t message)
         * Actually, it depends on whether the usb host sends
         * set feature (enable wakeup) or not.
         */
-       if (test_bit(BTUSB_WAKEUP_DISABLE, &data->flags)) {
+       if (test_bit(BTUSB_WAKEUP_AUTOSUSPEND, &data->flags)) {
                if (PMSG_IS_AUTO(message) &&
                    device_can_wakeup(&data->udev->dev))
                        data->udev->do_remote_wakeup = 1;
-               else if (!PMSG_IS_AUTO(message))
+               else if (!PMSG_IS_AUTO(message) &&
+                        !device_may_wakeup(&data->udev->dev)) {
+                       data->udev->do_remote_wakeup = 0;
                        data->udev->reset_resume = 1;
+               }
        }
 
        return 0;
index 3cd57fc..ef54afa 100644 (file)
@@ -51,6 +51,7 @@
 /**
  * struct bcm_device_data - device specific data
  * @no_early_set_baudrate: Disallow set baudrate before driver setup()
+ * @drive_rts_on_open: drive RTS signal on ->open() when platform requires it
  */
 struct bcm_device_data {
        bool    no_early_set_baudrate;
@@ -77,6 +78,8 @@ struct bcm_device_data {
  * @btlp: Apple ACPI method to toggle BT_WAKE pin ("Bluetooth Low Power")
  * @btpu: Apple ACPI method to drive BT_REG_ON pin high ("Bluetooth Power Up")
  * @btpd: Apple ACPI method to drive BT_REG_ON pin low ("Bluetooth Power Down")
+ * @gpio_count: internal counter for GPIO resources associated with ACPI device
+ * @gpio_int_idx: index in _CRS for GpioInt() resource
  * @txco_clk: external reference frequency clock used by Bluetooth device
  * @lpo_clk: external LPO clock used by Bluetooth device
  * @supplies: VBAT and VDDIO supplies used by Bluetooth device
@@ -88,10 +91,13 @@ struct bcm_device_data {
  *     set to 0 if @init_speed is already the preferred baudrate
  * @irq: interrupt triggered by HOST_WAKE_BT pin
  * @irq_active_low: whether @irq is active low
+ * @irq_acquired: flag to show if IRQ handler has been assigned
  * @hu: pointer to HCI UART controller struct,
  *     used to disable flow control during runtime suspend and system sleep
  * @is_suspended: whether flow control is currently disabled
  * @no_early_set_baudrate: don't set_baudrate before setup()
+ * @drive_rts_on_open: drive RTS signal on ->open() when platform requires it
+ * @pcm_int_params: keep the initial PCM configuration
  */
 struct bcm_device {
        /* Must be the first member, hci_serdev.c expects this. */
index e052063..0c0dede 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/mod_devicetable.h>
 #include <linux/of_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/serdev.h>
 #include <linux/skbuff.h>
 
@@ -21,6 +22,8 @@
 #include "btrtl.h"
 #include "hci_uart.h"
 
+#define SUSPEND_TIMEOUT_MS     6000
+
 #define HCI_3WIRE_ACK_PKT      0
 #define HCI_3WIRE_LINK_PKT     15
 
 
 /* H5 state flags */
 enum {
-       H5_RX_ESC,      /* SLIP escape mode */
-       H5_TX_ACK_REQ,  /* Pending ack to send */
+       H5_RX_ESC,              /* SLIP escape mode */
+       H5_TX_ACK_REQ,          /* Pending ack to send */
+       H5_WAKEUP_DISABLE,      /* Device cannot wake host */
+       H5_HW_FLOW_CONTROL,     /* Use HW flow control */
 };
 
 struct h5 {
@@ -97,6 +102,10 @@ struct h5 {
        struct gpio_desc *device_wake_gpio;
 };
 
+enum h5_driver_info {
+       H5_INFO_WAKEUP_DISABLE = BIT(0),
+};
+
 struct h5_vnd {
        int (*setup)(struct h5 *h5);
        void (*open)(struct h5 *h5);
@@ -106,6 +115,11 @@ struct h5_vnd {
        const struct acpi_gpio_mapping *acpi_gpio_map;
 };
 
+struct h5_device_data {
+       uint32_t driver_info;
+       struct h5_vnd *vnd;
+};
+
 static void h5_reset_rx(struct h5 *h5);
 
 static void h5_link_control(struct hci_uart *hu, const void *data, size_t len)
@@ -573,6 +587,10 @@ static int h5_recv(struct hci_uart *hu, const void *data, int count)
                count -= processed;
        }
 
+       pm_runtime_get(&hu->serdev->dev);
+       pm_runtime_mark_last_busy(&hu->serdev->dev);
+       pm_runtime_put_autosuspend(&hu->serdev->dev);
+
        return 0;
 }
 
@@ -609,6 +627,10 @@ static int h5_enqueue(struct hci_uart *hu, struct sk_buff *skb)
                break;
        }
 
+       pm_runtime_get_sync(&hu->serdev->dev);
+       pm_runtime_mark_last_busy(&hu->serdev->dev);
+       pm_runtime_put_autosuspend(&hu->serdev->dev);
+
        return 0;
 }
 
@@ -791,6 +813,8 @@ static int h5_serdev_probe(struct serdev_device *serdev)
 {
        struct device *dev = &serdev->dev;
        struct h5 *h5;
+       const struct h5_device_data *data;
+       int err;
 
        h5 = devm_kzalloc(dev, sizeof(*h5), GFP_KERNEL);
        if (!h5)
@@ -807,20 +831,19 @@ static int h5_serdev_probe(struct serdev_device *serdev)
                if (!match)
                        return -ENODEV;
 
-               h5->vnd = (const struct h5_vnd *)match->driver_data;
+               data = (const struct h5_device_data *)match->driver_data;
+               h5->vnd = data->vnd;
                h5->id  = (char *)match->id;
 
                if (h5->vnd->acpi_gpio_map)
                        devm_acpi_dev_add_driver_gpios(dev,
                                                       h5->vnd->acpi_gpio_map);
        } else {
-               const void *data;
-
                data = of_device_get_match_data(dev);
                if (!data)
                        return -ENODEV;
 
-               h5->vnd = (const struct h5_vnd *)data;
+               h5->vnd = data->vnd;
        }
 
 
@@ -833,7 +856,14 @@ static int h5_serdev_probe(struct serdev_device *serdev)
        if (IS_ERR(h5->device_wake_gpio))
                return PTR_ERR(h5->device_wake_gpio);
 
-       return hci_uart_register_device(&h5->serdev_hu, &h5p);
+       err = hci_uart_register_device(&h5->serdev_hu, &h5p);
+       if (err)
+               return err;
+
+       if (data->driver_info & H5_INFO_WAKEUP_DISABLE)
+               set_bit(H5_WAKEUP_DISABLE, &h5->flags);
+
+       return 0;
 }
 
 static void h5_serdev_remove(struct serdev_device *serdev)
@@ -902,6 +932,9 @@ static int h5_btrtl_setup(struct h5 *h5)
        serdev_device_set_baudrate(h5->hu->serdev, controller_baudrate);
        serdev_device_set_flow_control(h5->hu->serdev, flow_control);
 
+       if (flow_control)
+               set_bit(H5_HW_FLOW_CONTROL, &h5->flags);
+
        err = btrtl_download_firmware(h5->hu->hdev, btrtl_dev);
        /* Give the device some time before the hci-core sends it a reset */
        usleep_range(10000, 20000);
@@ -916,11 +949,25 @@ out_free:
 
 static void h5_btrtl_open(struct h5 *h5)
 {
+       /*
+        * Since h5_btrtl_resume() does a device_reprobe() the suspend handling
+        * done by the hci_suspend_notifier is not necessary; it actually causes
+        * delays and a bunch of errors to get logged, so disable it.
+        */
+       if (test_bit(H5_WAKEUP_DISABLE, &h5->flags))
+               set_bit(HCI_UART_NO_SUSPEND_NOTIFIER, &h5->hu->flags);
+
        /* Devices always start with these fixed parameters */
        serdev_device_set_flow_control(h5->hu->serdev, false);
        serdev_device_set_parity(h5->hu->serdev, SERDEV_PARITY_EVEN);
        serdev_device_set_baudrate(h5->hu->serdev, 115200);
 
+       pm_runtime_set_active(&h5->hu->serdev->dev);
+       pm_runtime_use_autosuspend(&h5->hu->serdev->dev);
+       pm_runtime_set_autosuspend_delay(&h5->hu->serdev->dev,
+                                        SUSPEND_TIMEOUT_MS);
+       pm_runtime_enable(&h5->hu->serdev->dev);
+
        /* The controller needs up to 500ms to wakeup */
        gpiod_set_value_cansleep(h5->enable_gpio, 1);
        gpiod_set_value_cansleep(h5->device_wake_gpio, 1);
@@ -929,21 +976,26 @@ static void h5_btrtl_open(struct h5 *h5)
 
 static void h5_btrtl_close(struct h5 *h5)
 {
+       pm_runtime_disable(&h5->hu->serdev->dev);
+
        gpiod_set_value_cansleep(h5->device_wake_gpio, 0);
        gpiod_set_value_cansleep(h5->enable_gpio, 0);
 }
 
 /* Suspend/resume support. On many devices the RTL BT device loses power during
  * suspend/resume, causing it to lose its firmware and all state. So we simply
- * turn it off on suspend and reprobe on resume.  This mirrors how RTL devices
- * are handled in the USB driver, where the USB_QUIRK_RESET_RESUME is used which
+ * turn it off on suspend and reprobe on resume. This mirrors how RTL devices
+ * are handled in the USB driver, where the BTUSB_WAKEUP_DISABLE is used which
  * also causes a reprobe on resume.
  */
 static int h5_btrtl_suspend(struct h5 *h5)
 {
        serdev_device_set_flow_control(h5->hu->serdev, false);
        gpiod_set_value_cansleep(h5->device_wake_gpio, 0);
-       gpiod_set_value_cansleep(h5->enable_gpio, 0);
+
+       if (test_bit(H5_WAKEUP_DISABLE, &h5->flags))
+               gpiod_set_value_cansleep(h5->enable_gpio, 0);
+
        return 0;
 }
 
@@ -969,17 +1021,25 @@ static void h5_btrtl_reprobe_worker(struct work_struct *work)
 
 static int h5_btrtl_resume(struct h5 *h5)
 {
-       struct h5_btrtl_reprobe *reprobe;
+       if (test_bit(H5_WAKEUP_DISABLE, &h5->flags)) {
+               struct h5_btrtl_reprobe *reprobe;
 
-       reprobe = kzalloc(sizeof(*reprobe), GFP_KERNEL);
-       if (!reprobe)
-               return -ENOMEM;
+               reprobe = kzalloc(sizeof(*reprobe), GFP_KERNEL);
+               if (!reprobe)
+                       return -ENOMEM;
 
-       __module_get(THIS_MODULE);
+               __module_get(THIS_MODULE);
+
+               INIT_WORK(&reprobe->work, h5_btrtl_reprobe_worker);
+               reprobe->dev = get_device(&h5->hu->serdev->dev);
+               queue_work(system_long_wq, &reprobe->work);
+       } else {
+               gpiod_set_value_cansleep(h5->device_wake_gpio, 1);
+
+               if (test_bit(H5_HW_FLOW_CONTROL, &h5->flags))
+                       serdev_device_set_flow_control(h5->hu->serdev, true);
+       }
 
-       INIT_WORK(&reprobe->work, h5_btrtl_reprobe_worker);
-       reprobe->dev = get_device(&h5->hu->serdev->dev);
-       queue_work(system_long_wq, &reprobe->work);
        return 0;
 }
 
@@ -1001,13 +1061,22 @@ static struct h5_vnd rtl_vnd = {
        .resume         = h5_btrtl_resume,
        .acpi_gpio_map  = acpi_btrtl_gpios,
 };
+
+static const struct h5_device_data h5_data_rtl8822cs = {
+       .vnd = &rtl_vnd,
+};
+
+static const struct h5_device_data h5_data_rtl8723bs = {
+       .driver_info = H5_INFO_WAKEUP_DISABLE,
+       .vnd = &rtl_vnd,
+};
 #endif
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id h5_acpi_match[] = {
 #ifdef CONFIG_BT_HCIUART_RTL
-       { "OBDA0623", (kernel_ulong_t)&rtl_vnd },
-       { "OBDA8723", (kernel_ulong_t)&rtl_vnd },
+       { "OBDA0623", (kernel_ulong_t)&h5_data_rtl8723bs },
+       { "OBDA8723", (kernel_ulong_t)&h5_data_rtl8723bs },
 #endif
        { },
 };
@@ -1016,16 +1085,17 @@ MODULE_DEVICE_TABLE(acpi, h5_acpi_match);
 
 static const struct dev_pm_ops h5_serdev_pm_ops = {
        SET_SYSTEM_SLEEP_PM_OPS(h5_serdev_suspend, h5_serdev_resume)
+       SET_RUNTIME_PM_OPS(h5_serdev_suspend, h5_serdev_resume, NULL)
 };
 
 static const struct of_device_id rtl_bluetooth_of_match[] = {
 #ifdef CONFIG_BT_HCIUART_RTL
        { .compatible = "realtek,rtl8822cs-bt",
-         .data = (const void *)&rtl_vnd },
+         .data = (const void *)&h5_data_rtl8822cs },
        { .compatible = "realtek,rtl8723bs-bt",
-         .data = (const void *)&rtl_vnd },
+         .data = (const void *)&h5_data_rtl8723bs },
        { .compatible = "realtek,rtl8723ds-bt",
-         .data = (const void *)&rtl_vnd },
+         .data = (const void *)&h5_data_rtl8723bs },
 #endif
        { },
 };
index 9e03402..3b00d82 100644 (file)
@@ -343,6 +343,9 @@ int hci_uart_register_device(struct hci_uart *hu,
        hdev->setup = hci_uart_setup;
        SET_HCIDEV_DEV(hdev, &hu->serdev->dev);
 
+       if (test_bit(HCI_UART_NO_SUSPEND_NOTIFIER, &hu->flags))
+               set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks);
+
        if (test_bit(HCI_UART_RAW_DEVICE, &hu->hdev_flags))
                set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
 
index 4e039d7..fb4a2d0 100644 (file)
@@ -86,9 +86,10 @@ struct hci_uart {
 };
 
 /* HCI_UART proto flag bits */
-#define HCI_UART_PROTO_SET     0
-#define HCI_UART_REGISTERED    1
-#define HCI_UART_PROTO_READY   2
+#define HCI_UART_PROTO_SET             0
+#define HCI_UART_REGISTERED            1
+#define HCI_UART_PROTO_READY           2
+#define HCI_UART_NO_SUSPEND_NOTIFIER   3
 
 /* TX states  */
 #define HCI_UART_SENDING       1
index 09c8ab5..b3691de 100644 (file)
@@ -914,7 +914,8 @@ void fsl_mc_device_remove(struct fsl_mc_device *mc_dev)
 }
 EXPORT_SYMBOL_GPL(fsl_mc_device_remove);
 
-struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
+                                         u16 if_id)
 {
        struct fsl_mc_device *mc_bus_dev, *endpoint;
        struct fsl_mc_obj_desc endpoint_desc = {{ 0 }};
@@ -925,6 +926,7 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
        mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent);
        strcpy(endpoint1.type, mc_dev->obj_desc.type);
        endpoint1.id = mc_dev->obj_desc.id;
+       endpoint1.if_id = if_id;
 
        err = dprc_get_connection(mc_bus_dev->mc_io, 0,
                                  mc_bus_dev->mc_handle,
index 4dd1077..b33b9d7 100644 (file)
@@ -32,6 +32,7 @@
  * @edl: emergency download mode firmware path (if any)
  * @bar_num: PCI base address register to use for MHI MMIO register space
  * @dma_data_width: DMA transfer word size (32 or 64 bits)
+ * @mru_default: default MRU size for MBIM network packets
  * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead
  *                of inband wake support (such as sdx24)
  */
@@ -42,6 +43,7 @@ struct mhi_pci_dev_info {
        const char *edl;
        unsigned int bar_num;
        unsigned int dma_data_width;
+       unsigned int mru_default;
        bool sideband_wake;
 };
 
@@ -272,6 +274,7 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = {
        .config = &modem_qcom_v1_mhiv_config,
        .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
        .dma_data_width = 32,
+       .mru_default = 32768,
        .sideband_wake = false,
 };
 
@@ -664,6 +667,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        mhi_cntrl->status_cb = mhi_pci_status_cb;
        mhi_cntrl->runtime_get = mhi_pci_runtime_get;
        mhi_cntrl->runtime_put = mhi_pci_runtime_put;
+       mhi_cntrl->mru = info->mru_default;
 
        if (info->sideband_wake) {
                mhi_cntrl->wake_get = mhi_pci_wake_get_nop;
index 6eaefea..5ac53dc 100644 (file)
@@ -4050,16 +4050,15 @@ static int hdlcdev_close(struct net_device *dev)
  * called by network layer to process IOCTL call to network device
  *
  * dev  pointer to network device structure
- * ifr  pointer to network interface request structure
- * cmd  IOCTL command code
+ * ifs  pointer to network interface settings structure
  *
  * returns 0 if success, otherwise error code
  */
-static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int hdlcdev_wan_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
        const size_t size = sizeof(sync_serial_settings);
        sync_serial_settings new_line;
-       sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
+       sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
        MGSLPC_INFO *info = dev_to_port(dev);
        unsigned int flags;
 
@@ -4070,17 +4069,14 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        if (info->port.count)
                return -EBUSY;
 
-       if (cmd != SIOCWANDEV)
-               return hdlc_ioctl(dev, ifr, cmd);
-
        memset(&new_line, 0, size);
 
-       switch(ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_IFACE: /* return current sync_serial_settings */
 
-               ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_IFACE_SYNC_SERIAL;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
 
@@ -4148,9 +4144,8 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                        tty_kref_put(tty);
                }
                return 0;
-
        default:
-               return hdlc_ioctl(dev, ifr, cmd);
+               return hdlc_ioctl(dev, ifs);
        }
 }
 
@@ -4225,7 +4220,7 @@ static const struct net_device_ops hdlcdev_ops = {
        .ndo_open       = hdlcdev_open,
        .ndo_stop       = hdlcdev_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = hdlcdev_ioctl,
+       .ndo_siocwandev = hdlcdev_wan_ioctl,
        .ndo_tx_timeout = hdlcdev_tx_timeout,
 };
 
index b8e5e37..a190fb5 100644 (file)
@@ -996,7 +996,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                                  MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
        MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
        MLX5_SET(cqc, cqc, uar_page, index);
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
        MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
        if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
                MLX5_SET(cqc, cqc, oi, 1);
index c869b2a..e95967a 100644 (file)
@@ -1436,11 +1436,10 @@ out:
        rcu_read_unlock();
 }
 
-static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in)
+static bool is_apu_cq(struct mlx5_ib_dev *dev, const void *in)
 {
        if (!MLX5_CAP_GEN(dev->mdev, apu) ||
-           !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
-                     apu_thread_cq))
+           !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), apu_cq))
                return false;
 
        return true;
@@ -1500,7 +1499,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
                err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
                                           cmd_in_len, cmd_out, cmd_out_len);
        } else if (opcode == MLX5_CMD_OP_CREATE_CQ &&
-                  !is_apu_thread_cq(dev, cmd_in)) {
+                  !is_apu_cq(dev, cmd_in)) {
                obj->flags |= DEVX_OBJ_FLAGS_CQ;
                obj->core_cq.comp = devx_cq_comp;
                err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
index b25e0b3..5282148 100644 (file)
@@ -8,13 +8,15 @@
 #include "srq.h"
 
 static int
-mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
+                     struct mlx5_eswitch_rep *rep,
+                     int vport_index)
 {
        struct mlx5_ib_dev *ibdev;
-       int vport_index;
 
        ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
-       vport_index = rep->vport_index;
+       if (!ibdev)
+               return -EINVAL;
 
        ibdev->port[vport_index].rep = rep;
        rep->rep_data[REP_IB].priv = ibdev;
@@ -26,19 +28,39 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
        return 0;
 }
 
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
+
 static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
        u32 num_ports = mlx5_eswitch_get_total_vports(dev);
        const struct mlx5_ib_profile *profile;
+       struct mlx5_core_dev *peer_dev;
        struct mlx5_ib_dev *ibdev;
+       u32 peer_num_ports;
        int vport_index;
        int ret;
 
+       vport_index = rep->vport_index;
+
+       if (mlx5_lag_is_shared_fdb(dev)) {
+               peer_dev = mlx5_lag_get_peer_mdev(dev);
+               peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+               if (mlx5_lag_is_master(dev)) {
+                       /* Only 1 ib port is the representor for both uplinks */
+                       num_ports += peer_num_ports - 1;
+               } else {
+                       if (rep->vport == MLX5_VPORT_UPLINK)
+                               return 0;
+                       vport_index += peer_num_ports;
+                       dev = peer_dev;
+               }
+       }
+
        if (rep->vport == MLX5_VPORT_UPLINK)
                profile = &raw_eth_profile;
        else
-               return mlx5_ib_set_vport_rep(dev, rep);
+               return mlx5_ib_set_vport_rep(dev, rep, vport_index);
 
        ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
        if (!ibdev)
@@ -64,6 +86,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
                goto fail_add;
 
        rep->rep_data[REP_IB].priv = ibdev;
+       if (mlx5_lag_is_shared_fdb(dev))
+               mlx5_ib_register_peer_vport_reps(dev);
 
        return 0;
 
@@ -82,18 +106,45 @@ static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
 static void
 mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 {
+       struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
        struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+       int vport_index = rep->vport_index;
        struct mlx5_ib_port *port;
 
-       port = &dev->port[rep->vport_index];
+       if (WARN_ON(!mdev))
+               return;
+
+       if (mlx5_lag_is_shared_fdb(mdev) &&
+           !mlx5_lag_is_master(mdev)) {
+               struct mlx5_core_dev *peer_mdev;
+
+               if (rep->vport == MLX5_VPORT_UPLINK)
+                       return;
+               peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+               vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
+       }
+
+       if (!dev)
+               return;
+
+       port = &dev->port[vport_index];
        write_lock(&port->roce.netdev_lock);
        port->roce.netdev = NULL;
        write_unlock(&port->roce.netdev_lock);
        rep->rep_data[REP_IB].priv = NULL;
        port->rep = NULL;
 
-       if (rep->vport == MLX5_VPORT_UPLINK)
+       if (rep->vport == MLX5_VPORT_UPLINK) {
+               struct mlx5_core_dev *peer_mdev;
+               struct mlx5_eswitch *esw;
+
+               if (mlx5_lag_is_shared_fdb(mdev)) {
+                       peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+                       esw = peer_mdev->priv.eswitch;
+                       mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+               }
                __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+       }
 }
 
 static const struct mlx5_eswitch_rep_ops rep_ops = {
@@ -102,6 +153,18 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
        .get_proto_dev = mlx5_ib_rep_to_dev,
 };
 
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
+{
+       struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+       struct mlx5_eswitch *esw;
+
+       if (!peer_mdev)
+               return;
+
+       esw = peer_mdev->priv.eswitch;
+       mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+}
+
 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
                                          u16 vport_num)
 {
@@ -123,7 +186,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
 
        rep = dev->port[port - 1].rep;
 
-       return mlx5_eswitch_add_send_to_vport_rule(esw, rep, sq->base.mqp.qpn);
+       return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
 }
 
 static int mlx5r_rep_probe(struct auxiliary_device *adev,
index 2507051..466f0a5 100644 (file)
@@ -126,6 +126,7 @@ static int get_port_state(struct ib_device *ibdev,
 
 static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
                                           struct net_device *ndev,
+                                          struct net_device *upper,
                                           u32 *port_num)
 {
        struct net_device *rep_ndev;
@@ -137,6 +138,14 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
                if (!port->rep)
                        continue;
 
+               if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) {
+                       *port_num = i + 1;
+                       return &port->roce;
+               }
+
+               if (upper && port->rep->vport == MLX5_VPORT_UPLINK)
+                       continue;
+
                read_lock(&port->roce.netdev_lock);
                rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw,
                                                  port->rep->vport);
@@ -196,11 +205,12 @@ static int mlx5_netdev_event(struct notifier_block *this,
                }
 
                if (ibdev->is_rep)
-                       roce = mlx5_get_rep_roce(ibdev, ndev, &port_num);
+                       roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num);
                if (!roce)
                        return NOTIFY_DONE;
-               if ((upper == ndev || (!upper && ndev == roce->netdev))
-                   && ibdev->ib_active) {
+               if ((upper == ndev ||
+                    ((!upper || ibdev->is_rep) && ndev == roce->netdev)) &&
+                   ibdev->ib_active) {
                        struct ib_event ibev = { };
                        enum ib_port_state port_state;
 
@@ -3012,7 +3022,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
        struct mlx5_flow_table *ft;
        int err;
 
-       if (!ns || !mlx5_lag_is_roce(mdev))
+       if (!ns || !mlx5_lag_is_active(mdev))
                return 0;
 
        err = mlx5_cmd_create_vport_lag(mdev);
@@ -3074,9 +3084,11 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
 {
        int err;
 
-       err = mlx5_nic_vport_enable_roce(dev->mdev);
-       if (err)
-               return err;
+       if (!dev->is_rep && dev->profile != &raw_eth_profile) {
+               err = mlx5_nic_vport_enable_roce(dev->mdev);
+               if (err)
+                       return err;
+       }
 
        err = mlx5_eth_lag_init(dev);
        if (err)
@@ -3085,7 +3097,8 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
        return 0;
 
 err_disable_roce:
-       mlx5_nic_vport_disable_roce(dev->mdev);
+       if (!dev->is_rep && dev->profile != &raw_eth_profile)
+               mlx5_nic_vport_disable_roce(dev->mdev);
 
        return err;
 }
@@ -3093,7 +3106,8 @@ err_disable_roce:
 static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
 {
        mlx5_eth_lag_cleanup(dev);
-       mlx5_nic_vport_disable_roce(dev->mdev);
+       if (!dev->is_rep && dev->profile != &raw_eth_profile)
+               mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
 static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num,
@@ -3950,12 +3964,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
 
                /* Register only for native ports */
                err = mlx5_add_netdev_notifier(dev, port_num);
-               if (err || dev->is_rep || !mlx5_is_roce_init_enabled(mdev))
-                       /*
-                        * We don't enable ETH interface for
-                        * 1. IB representors
-                        * 2. User disabled ROCE through devlink interface
-                        */
+               if (err)
                        return err;
 
                err = mlx5_enable_eth(dev);
@@ -3980,8 +3989,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
        ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
        if (ll == IB_LINK_LAYER_ETHERNET) {
-               if (!dev->is_rep)
-                       mlx5_disable_eth(dev);
+               mlx5_disable_eth(dev);
 
                port_num = mlx5_core_native_port_num(dev->mdev) - 1;
                mlx5_remove_netdev_notifier(dev, port_num);
@@ -4037,7 +4045,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 {
        const char *name;
 
-       if (!mlx5_lag_is_roce(dev->mdev))
+       if (!mlx5_lag_is_active(dev->mdev))
                name = "mlx5_%d";
        else
                name = "mlx5_bond_%d";
index c0ddf7b..bbfcce3 100644 (file)
@@ -114,14 +114,18 @@ out:
 static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
                               struct mlx5_ib_uapi_query_port *info)
 {
-       struct mlx5_core_dev *mdev = dev->mdev;
        struct mlx5_eswitch_rep *rep;
+       struct mlx5_core_dev *mdev;
        int err;
 
        rep = dev->port[port_num - 1].rep;
        if (!rep)
                return -EOPNOTSUPP;
 
+       mdev = mlx5_eswitch_get_core_dev(rep->esw);
+       if (!mdev)
+               return -EINVAL;
+
        info->vport = rep->vport;
        info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT;
 
@@ -138,9 +142,9 @@ static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
        if (err)
                return err;
 
-       if (mlx5_eswitch_vport_match_metadata_enabled(mdev->priv.eswitch)) {
+       if (mlx5_eswitch_vport_match_metadata_enabled(rep->esw)) {
                info->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(
-                       mdev->priv.eswitch, rep->vport);
+                       rep->esw, rep->vport);
                info->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
                info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_REG_C0;
        }
index 823f683..a09ca21 100644 (file)
@@ -72,7 +72,9 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
 }
 
 static int ipoib_get_coalesce(struct net_device *dev,
-                             struct ethtool_coalesce *coal)
+                             struct ethtool_coalesce *coal,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
@@ -83,7 +85,9 @@ static int ipoib_get_coalesce(struct net_device *dev,
 }
 
 static int ipoib_set_coalesce(struct net_device *dev,
-                             struct ethtool_coalesce *coal)
+                             struct ethtool_coalesce *coal,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct ipoib_dev_priv *priv = ipoib_priv(dev);
        int ret;
index abf60f4..0aa8629 100644 (file)
@@ -1745,10 +1745,10 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr,
 {
        struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
-       if (!priv->rn_ops->ndo_do_ioctl)
+       if (!priv->rn_ops->ndo_eth_ioctl)
                return -EOPNOTSUPP;
 
-       return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd);
+       return priv->rn_ops->ndo_eth_ioctl(dev, ifr, cmd);
 }
 
 static int ipoib_dev_init(struct net_device *dev)
@@ -2078,7 +2078,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
        .ndo_set_vf_guid         = ipoib_set_vf_guid,
        .ndo_set_mac_address     = ipoib_set_mac,
        .ndo_get_stats64         = ipoib_get_stats,
-       .ndo_do_ioctl            = ipoib_ioctl,
+       .ndo_eth_ioctl           = ipoib_ioctl,
 };
 
 static const struct net_device_ops ipoib_netdev_ops_vf = {
@@ -2093,7 +2093,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
        .ndo_set_rx_mode         = ipoib_set_mcast_list,
        .ndo_get_iflink          = ipoib_get_iflink,
        .ndo_get_stats64         = ipoib_get_stats,
-       .ndo_do_ioctl            = ipoib_ioctl,
+       .ndo_eth_ioctl           = ipoib_ioctl,
 };
 
 static const struct net_device_ops ipoib_netdev_default_pf = {
index afae0af..3eff08d 100644 (file)
@@ -160,7 +160,7 @@ static int lirc_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog)
                goto unlock;
        }
 
-       ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
+       ret = bpf_prog_array_copy(old_array, NULL, prog, 0, &new_array);
        if (ret < 0)
                goto unlock;
 
@@ -193,7 +193,7 @@ static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog)
        }
 
        old_array = lirc_rcu_dereference(raw->progs);
-       ret = bpf_prog_array_copy(old_array, prog, NULL, &new_array);
+       ret = bpf_prog_array_copy(old_array, prog, NULL, 0, &new_array);
        /*
         * Do not use bpf_prog_array_delete_safe() as we would end up
         * with a dummy entry in the array, and the we would free the
@@ -217,7 +217,7 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
        raw->bpf_sample = sample;
 
        if (raw->progs)
-               BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, BPF_PROG_RUN);
+               BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, bpf_prog_run);
 }
 
 /*
index 6977f82..f37b1c5 100644 (file)
@@ -431,10 +431,10 @@ config VSOCKMON
 config MHI_NET
        tristate "MHI network driver"
        depends on MHI_BUS
-       select WWAN
        help
          This is the network driver for MHI bus.  It can be used with
-         QCOM based WWAN modems (like SDX55).  Say Y or M.
+         QCOM based WWAN modems for IP or QMAP/rmnet protocol (like SDX55).
+         Say Y or M.
 
 endif # NET_CORE
 
@@ -483,6 +483,8 @@ config NET_SB1000
 
 source "drivers/net/phy/Kconfig"
 
+source "drivers/net/mctp/Kconfig"
+
 source "drivers/net/mdio/Kconfig"
 
 source "drivers/net/pcs/Kconfig"
@@ -549,8 +551,8 @@ config VMXNET3
        tristate "VMware VMXNET3 ethernet driver"
        depends on PCI && INET
        depends on !(PAGE_SIZE_64KB || ARM64_64K_PAGES || \
-                    IA64_PAGE_SIZE_64KB || MICROBLAZE_64K_PAGES || \
-                    PARISC_PAGE_SIZE_64KB || PPC_64K_PAGES)
+                    IA64_PAGE_SIZE_64KB || PARISC_PAGE_SIZE_64KB || \
+                    PPC_64K_PAGES)
        help
          This driver supports VMware's vmxnet3 virtual ethernet NIC.
          To compile this driver as a module, choose M here: the
@@ -604,4 +606,11 @@ config NET_FAILOVER
          a VM with direct attached VF by failing over to the paravirtual
          datapath when the VF is unplugged.
 
+config NETDEV_LEGACY_INIT
+       bool
+       depends on ISA
+       help
+         Drivers that call netdev_boot_setup_check() should select this
+         symbol, everything else no longer needs it.
+
 endif # NETDEVICES
index 7ffd2d0..7398386 100644 (file)
@@ -18,7 +18,8 @@ obj-$(CONFIG_MACVLAN) += macvlan.o
 obj-$(CONFIG_MACVTAP) += macvtap.o
 obj-$(CONFIG_MII) += mii.o
 obj-$(CONFIG_MDIO) += mdio.o
-obj-$(CONFIG_NET) += Space.o loopback.o
+obj-$(CONFIG_NET) += loopback.o
+obj-$(CONFIG_NETDEV_LEGACY_INIT) += Space.o
 obj-$(CONFIG_NETCONSOLE) += netconsole.o
 obj-y += phy/
 obj-y += mdio/
@@ -36,7 +37,7 @@ obj-$(CONFIG_GTP) += gtp.o
 obj-$(CONFIG_NLMON) += nlmon.o
 obj-$(CONFIG_NET_VRF) += vrf.o
 obj-$(CONFIG_VSOCKMON) += vsockmon.o
-obj-$(CONFIG_MHI_NET) += mhi/
+obj-$(CONFIG_MHI_NET) += mhi_net.o
 
 #
 # Networking Drivers
@@ -69,6 +70,7 @@ obj-$(CONFIG_WAN) += wan/
 obj-$(CONFIG_WLAN) += wireless/
 obj-$(CONFIG_IEEE802154) += ieee802154/
 obj-$(CONFIG_WWAN) += wwan/
+obj-$(CONFIG_MCTP) += mctp/
 
 obj-$(CONFIG_VMXNET3) += vmxnet3/
 obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
index df79e73..49e67c9 100644 (file)
 #include <linux/netlink.h>
 #include <net/Space.h>
 
+/*
+ * This structure holds boot-time configured netdevice settings. They
+ * are then used in the device probing.
+ */
+struct netdev_boot_setup {
+       char name[IFNAMSIZ];
+       struct ifmap map;
+};
+#define NETDEV_BOOT_SETUP_MAX 8
+
+
+/******************************************************************************
+ *
+ *                   Device Boot-time Settings Routines
+ *
+ ******************************************************************************/
+
+/* Boot time configuration table */
+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+
+/**
+ *     netdev_boot_setup_add   - add new setup entry
+ *     @name: name of the device
+ *     @map: configured settings for the device
+ *
+ *     Adds new setup entry to the dev_boot_setup list.  The function
+ *     returns 0 on error and 1 on success.  This is a generic routine to
+ *     all netdevices.
+ */
+static int netdev_boot_setup_add(char *name, struct ifmap *map)
+{
+       struct netdev_boot_setup *s;
+       int i;
+
+       s = dev_boot_setup;
+       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+               if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
+                       memset(s[i].name, 0, sizeof(s[i].name));
+                       strlcpy(s[i].name, name, IFNAMSIZ);
+                       memcpy(&s[i].map, map, sizeof(s[i].map));
+                       break;
+               }
+       }
+
+       return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
+}
+
+/**
+ * netdev_boot_setup_check     - check boot time settings
+ * @dev: the netdevice
+ *
+ * Check boot time settings for the device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found, 1 if they are.
+ */
+int netdev_boot_setup_check(struct net_device *dev)
+{
+       struct netdev_boot_setup *s = dev_boot_setup;
+       int i;
+
+       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+               if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
+                   !strcmp(dev->name, s[i].name)) {
+                       dev->irq = s[i].map.irq;
+                       dev->base_addr = s[i].map.base_addr;
+                       dev->mem_start = s[i].map.mem_start;
+                       dev->mem_end = s[i].map.mem_end;
+                       return 1;
+               }
+       }
+       return 0;
+}
+EXPORT_SYMBOL(netdev_boot_setup_check);
+
+/**
+ * netdev_boot_base    - get address from boot time settings
+ * @prefix: prefix for network device
+ * @unit: id for network device
+ *
+ * Check boot time settings for the base address of device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found.
+ */
+static unsigned long netdev_boot_base(const char *prefix, int unit)
+{
+       const struct netdev_boot_setup *s = dev_boot_setup;
+       char name[IFNAMSIZ];
+       int i;
+
+       sprintf(name, "%s%d", prefix, unit);
+
+       /*
+        * If device already registered then return base of 1
+        * to indicate not to probe for this interface
+        */
+       if (__dev_get_by_name(&init_net, name))
+               return 1;
+
+       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
+               if (!strcmp(name, s[i].name))
+                       return s[i].map.base_addr;
+       return 0;
+}
+
+/*
+ * Saves at boot time configured settings for any netdevice.
+ */
+static int __init netdev_boot_setup(char *str)
+{
+       int ints[5];
+       struct ifmap map;
+
+       str = get_options(str, ARRAY_SIZE(ints), ints);
+       if (!str || !*str)
+               return 0;
+
+       /* Save settings */
+       memset(&map, 0, sizeof(map));
+       if (ints[0] > 0)
+               map.irq = ints[1];
+       if (ints[0] > 1)
+               map.base_addr = ints[2];
+       if (ints[0] > 2)
+               map.mem_start = ints[3];
+       if (ints[0] > 3)
+               map.mem_end = ints[4];
+
+       /* Add new entry to the list */
+       return netdev_boot_setup_add(str, &map);
+}
+
+__setup("netdev=", netdev_boot_setup);
+
+static int __init ether_boot_setup(char *str)
+{
+       return netdev_boot_setup(str);
+}
+__setup("ether=", ether_boot_setup);
+
+
 /* A unified ethernet device probe.  This is the easiest way to have every
  * ethernet adaptor have the name "eth[0123...]".
  */
@@ -77,39 +219,15 @@ static struct devprobe2 isa_probes[] __initdata = {
 #ifdef CONFIG_SMC9194
        {smc_init, 0},
 #endif
-#ifdef CONFIG_CS89x0
-#ifndef CONFIG_CS89x0_PLATFORM
+#ifdef CONFIG_CS89x0_ISA
        {cs89x0_probe, 0},
 #endif
-#endif
-#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_BVME6000_NET)        /* Intel */
-       {i82596_probe, 0},                                      /* I82596 */
-#endif
 #ifdef CONFIG_NI65
        {ni65_probe, 0},
 #endif
        {NULL, 0},
 };
 
-static struct devprobe2 m68k_probes[] __initdata = {
-#ifdef CONFIG_ATARILANCE       /* Lance-based Atari ethernet boards */
-       {atarilance_probe, 0},
-#endif
-#ifdef CONFIG_SUN3LANCE         /* sun3 onboard Lance chip */
-       {sun3lance_probe, 0},
-#endif
-#ifdef CONFIG_SUN3_82586        /* sun3 onboard Intel 82586 chip */
-       {sun3_82586_probe, 0},
-#endif
-#ifdef CONFIG_APNE             /* A1200 PCMCIA NE2000 */
-       {apne_probe, 0},
-#endif
-#ifdef CONFIG_MVME147_NET      /* MVME147 internal Ethernet */
-       {mvme147lance_probe, 0},
-#endif
-       {NULL, 0},
-};
-
 /* Unified ethernet device probe, segmented per architecture and
  * per bus interface. This drives the legacy devices only for now.
  */
@@ -121,8 +239,7 @@ static void __init ethif_probe2(int unit)
        if (base_addr == 1)
                return;
 
-       (void)(probe_list2(unit, m68k_probes, base_addr == 0) &&
-               probe_list2(unit, isa_probes, base_addr == 0));
+       probe_list2(unit, isa_probes, base_addr == 0);
 }
 
 /*  Statically configured drivers -- order matters here. */
@@ -130,10 +247,6 @@ static int __init net_olddevs_init(void)
 {
        int num;
 
-#ifdef CONFIG_SBNI
-       for (num = 0; num < 8; ++num)
-               sbni_probe(num);
-#endif
        for (num = 0; num < 8; ++num)
                ethif_probe2(num);
 
@@ -142,9 +255,6 @@ static int __init net_olddevs_init(void)
        cops_probe(1);
        cops_probe(2);
 #endif
-#ifdef CONFIG_LTPC
-       ltpc_probe();
-#endif
 
        return 0;
 }
index 4391839..90b9f1d 100644 (file)
@@ -52,7 +52,9 @@ config LTPC
 
 config COPS
        tristate "COPS LocalTalk PC support"
-       depends on DEV_APPLETALK && (ISA || EISA)
+       depends on DEV_APPLETALK && ISA
+       depends on NETDEVICES
+       select NETDEV_LEGACY_INIT
        help
          This allows you to use COPS AppleTalk cards to connect to LocalTalk
          networks. You also need version 1.3.3 or later of the netatalk
index 51cf5ec..5566dae 100644 (file)
@@ -54,11 +54,12 @@ static netdev_tx_t ipddp_xmit(struct sk_buff *skb,
 static int ipddp_create(struct ipddp_route *new_rt);
 static int ipddp_delete(struct ipddp_route *rt);
 static struct ipddp_route* __ipddp_find_route(struct ipddp_route *rt);
-static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+static int ipddp_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                               void __user *data, int cmd);
 
 static const struct net_device_ops ipddp_netdev_ops = {
        .ndo_start_xmit         = ipddp_xmit,
-       .ndo_do_ioctl           = ipddp_ioctl,
+       .ndo_siocdevprivate     = ipddp_siocdevprivate,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 };
@@ -268,15 +269,18 @@ static struct ipddp_route* __ipddp_find_route(struct ipddp_route *rt)
         return NULL;
 }
 
-static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int ipddp_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                               void __user *data, int cmd)
 {
-        struct ipddp_route __user *rt = ifr->ifr_data;
         struct ipddp_route rcp, rcp2, *rp;
 
+       if (in_compat_syscall())
+               return -EOPNOTSUPP;
+
         if(!capable(CAP_NET_ADMIN))
                 return -EPERM;
 
-       if(copy_from_user(&rcp, rt, sizeof(rcp)))
+       if (copy_from_user(&rcp, data, sizeof(rcp)))
                return -EFAULT;
 
         switch(cmd)
@@ -296,7 +300,7 @@ static int ipddp_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                        spin_unlock_bh(&ipddp_route_lock);
 
                        if (rp) {
-                               if (copy_to_user(rt, &rcp2,
+                               if (copy_to_user(data, &rcp2,
                                                 sizeof(struct ipddp_route)))
                                        return -EFAULT;
                                return 0;
index 69c2708..1f8925e 100644 (file)
@@ -1015,7 +1015,7 @@ static const struct net_device_ops ltpc_netdev = {
        .ndo_set_rx_mode        = set_multicast_list,
 };
 
-struct net_device * __init ltpc_probe(void)
+static struct net_device * __init ltpc_probe(void)
 {
        struct net_device *dev;
        int err = -ENOMEM;
@@ -1221,12 +1221,10 @@ static int __init ltpc_setup(char *str)
 }
 
 __setup("ltpc=", ltpc_setup);
-#endif /* MODULE */
+#endif
 
 static struct net_device *dev_ltpc;
 
-#ifdef MODULE
-
 MODULE_LICENSE("GPL");
 module_param(debug, int, 0);
 module_param_hw(io, int, ioport, 0);
@@ -1244,7 +1242,6 @@ static int __init ltpc_module_init(void)
        return PTR_ERR_OR_ZERO(dev_ltpc);
 }
 module_init(ltpc_module_init);
-#endif
 
 static void __exit ltpc_cleanup(void)
 {
index 6908822..a4a202b 100644 (file)
@@ -96,7 +96,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker);
 static void ad_mux_machine(struct port *port, bool *update_slave_arr);
 static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
 static void ad_tx_machine(struct port *port);
-static void ad_periodic_machine(struct port *port);
+static void ad_periodic_machine(struct port *port, struct bond_params bond_params);
 static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
 static void ad_agg_selection_logic(struct aggregator *aggregator,
                                   bool *update_slave_arr);
@@ -1294,10 +1294,11 @@ static void ad_tx_machine(struct port *port)
 /**
  * ad_periodic_machine - handle a port's periodic state machine
  * @port: the port we're looking at
+ * @bond_params: bond parameters we will use
  *
  * Turn ntt flag on priodically to perform periodic transmission of lacpdu's.
  */
-static void ad_periodic_machine(struct port *port)
+static void ad_periodic_machine(struct port *port, struct bond_params bond_params)
 {
        periodic_states_t last_state;
 
@@ -1306,8 +1307,8 @@ static void ad_periodic_machine(struct port *port)
 
        /* check if port was reinitialized */
        if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||
-           (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY))
-          ) {
+           (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
+           !bond_params.lacp_active) {
                port->sm_periodic_state = AD_NO_PERIODIC;
        }
        /* check if state machine should change state */
@@ -2341,7 +2342,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
                }
 
                ad_rx_machine(NULL, port);
-               ad_periodic_machine(port);
+               ad_periodic_machine(port, bond->params);
                ad_port_selection_logic(port, &update_slave_arr);
                ad_mux_machine(port, &update_slave_arr);
                ad_tx_machine(port);
index 22e5632..7d3752c 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/if_bonding.h>
 #include <linux/if_vlan.h>
 #include <linux/in.h>
-#include <net/ipx.h>
 #include <net/arp.h>
 #include <net/ipv6.h>
 #include <asm/byteorder.h>
@@ -1351,8 +1350,6 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
        if (!is_multicast_ether_addr(eth_data->h_dest)) {
                switch (skb->protocol) {
                case htons(ETH_P_IP):
-               case htons(ETH_P_IPX):
-                   /* In case of IPX, it will falback to L2 hash */
                case htons(ETH_P_IPV6):
                        hash_index = bond_xmit_hash(bond, skb);
                        if (bond->params.tlb_dynamic_lb) {
@@ -1454,35 +1451,6 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
                hash_size = sizeof(ip6hdr->daddr);
                break;
        }
-       case ETH_P_IPX: {
-               const struct ipxhdr *ipxhdr;
-
-               if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) {
-                       do_tx_balance = false;
-                       break;
-               }
-               ipxhdr = (struct ipxhdr *)skb_network_header(skb);
-
-               if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) {
-                       /* something is wrong with this packet */
-                       do_tx_balance = false;
-                       break;
-               }
-
-               if (ipxhdr->ipx_type != IPX_TYPE_NCP) {
-                       /* The only protocol worth balancing in
-                        * this family since it has an "ARP" like
-                        * mechanism
-                        */
-                       do_tx_balance = false;
-                       break;
-               }
-
-               eth_data = eth_hdr(skb);
-               hash_start = (char *)eth_data->h_dest;
-               hash_size = ETH_ALEN;
-               break;
-       }
        case ETH_P_ARP:
                do_tx_balance = false;
                if (bond_info->rlb_enabled)
index 31730ef..b0966e7 100644 (file)
@@ -317,6 +317,25 @@ bool bond_sk_check(struct bonding *bond)
        }
 }
 
+static bool bond_xdp_check(struct bonding *bond)
+{
+       switch (BOND_MODE(bond)) {
+       case BOND_MODE_ROUNDROBIN:
+       case BOND_MODE_ACTIVEBACKUP:
+               return true;
+       case BOND_MODE_8023AD:
+       case BOND_MODE_XOR:
+               /* vlan+srcmac is not supported with XDP as in most cases the 802.1q
+                * payload is not in the packet due to hardware offload.
+                */
+               if (bond->params.xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC)
+                       return true;
+               fallthrough;
+       default:
+               return false;
+       }
+}
+
 /*---------------------------------- VLAN -----------------------------------*/
 
 /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
@@ -732,7 +751,7 @@ static int bond_check_dev_link(struct bonding *bond,
                        BMSR_LSTATUS : 0;
 
        /* Ethtool can't be used, fallback to MII ioctls. */
-       ioctl = slave_ops->ndo_do_ioctl;
+       ioctl = slave_ops->ndo_eth_ioctl;
        if (ioctl) {
                /* TODO: set pointer to correct ioctl on a per team member
                 *       bases to make this more efficient. that is, once
@@ -756,7 +775,7 @@ static int bond_check_dev_link(struct bonding *bond,
                }
        }
 
-       /* If reporting, report that either there's no dev->do_ioctl,
+       /* If reporting, report that either there's no ndo_eth_ioctl,
         * or both SIOCGMIIREG and get_link failed (meaning that we
         * cannot report link status).  If not reporting, pretend
         * we're ok.
@@ -1712,6 +1731,20 @@ void bond_lower_state_changed(struct slave *slave)
        netdev_lower_state_changed(slave->dev, &info);
 }
 
+#define BOND_NL_ERR(bond_dev, extack, errmsg) do {             \
+       if (extack)                                             \
+               NL_SET_ERR_MSG(extack, errmsg);                 \
+       else                                                    \
+               netdev_err(bond_dev, "Error: %s\n", errmsg);    \
+} while (0)
+
+#define SLAVE_NL_ERR(bond_dev, slave_dev, extack, errmsg) do {         \
+       if (extack)                                                     \
+               NL_SET_ERR_MSG(extack, errmsg);                         \
+       else                                                            \
+               slave_err(bond_dev, slave_dev, "Error: %s\n", errmsg);  \
+} while (0)
+
 /* enslave device <slave> to bond device <master> */
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
                 struct netlink_ext_ack *extack)
@@ -1725,29 +1758,26 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 
        if (slave_dev->flags & IFF_MASTER &&
            !netif_is_bond_master(slave_dev)) {
-               NL_SET_ERR_MSG(extack, "Device with IFF_MASTER cannot be enslaved");
-               netdev_err(bond_dev,
-                          "Error: Device with IFF_MASTER cannot be enslaved\n");
+               BOND_NL_ERR(bond_dev, extack,
+                           "Device type (master device) cannot be enslaved");
                return -EPERM;
        }
 
        if (!bond->params.use_carrier &&
            slave_dev->ethtool_ops->get_link == NULL &&
-           slave_ops->ndo_do_ioctl == NULL) {
+           slave_ops->ndo_eth_ioctl == NULL) {
                slave_warn(bond_dev, slave_dev, "no link monitoring support\n");
        }
 
        /* already in-use? */
        if (netdev_is_rx_handler_busy(slave_dev)) {
-               NL_SET_ERR_MSG(extack, "Device is in use and cannot be enslaved");
-               slave_err(bond_dev, slave_dev,
-                         "Error: Device is in use and cannot be enslaved\n");
+               SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+                            "Device is in use and cannot be enslaved");
                return -EBUSY;
        }
 
        if (bond_dev == slave_dev) {
-               NL_SET_ERR_MSG(extack, "Cannot enslave bond to itself.");
-               netdev_err(bond_dev, "cannot enslave bond to itself.\n");
+               BOND_NL_ERR(bond_dev, extack, "Cannot enslave bond to itself.");
                return -EPERM;
        }
 
@@ -1756,8 +1786,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
        if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {
                slave_dbg(bond_dev, slave_dev, "is NETIF_F_VLAN_CHALLENGED\n");
                if (vlan_uses_dev(bond_dev)) {
-                       NL_SET_ERR_MSG(extack, "Can not enslave VLAN challenged device to VLAN enabled bond");
-                       slave_err(bond_dev, slave_dev, "Error: cannot enslave VLAN challenged slave on VLAN enabled bond\n");
+                       SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+                                    "Can not enslave VLAN challenged device to VLAN enabled bond");
                        return -EPERM;
                } else {
                        slave_warn(bond_dev, slave_dev, "enslaved VLAN challenged slave. Adding VLANs will be blocked as long as it is part of bond.\n");
@@ -1775,8 +1805,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
         * enslaving it; the old ifenslave will not.
         */
        if (slave_dev->flags & IFF_UP) {
-               NL_SET_ERR_MSG(extack, "Device can not be enslaved while up");
-               slave_err(bond_dev, slave_dev, "slave is up - this may be due to an out of date ifenslave\n");
+               SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+                            "Device can not be enslaved while up");
                return -EPERM;
        }
 
@@ -1815,17 +1845,15 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
                                                 bond_dev);
                }
        } else if (bond_dev->type != slave_dev->type) {
-               NL_SET_ERR_MSG(extack, "Device type is different from other slaves");
-               slave_err(bond_dev, slave_dev, "ether type (%d) is different from other slaves (%d), can not enslave it\n",
-                         slave_dev->type, bond_dev->type);
+               SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+                            "Device type is different from other slaves");
                return -EINVAL;
        }
 
        if (slave_dev->type == ARPHRD_INFINIBAND &&
            BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
-               NL_SET_ERR_MSG(extack, "Only active-backup mode is supported for infiniband slaves");
-               slave_warn(bond_dev, slave_dev, "Type (%d) supports only active-backup mode\n",
-                          slave_dev->type);
+               SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+                            "Only active-backup mode is supported for infiniband slaves");
                res = -EOPNOTSUPP;
                goto err_undo_flags;
        }
@@ -1839,8 +1867,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
                                bond->params.fail_over_mac = BOND_FOM_ACTIVE;
                                slave_warn(bond_dev, slave_dev, "Setting fail_over_mac to active for active-backup mode\n");
                        } else {
-                               NL_SET_ERR_MSG(extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active");
-                               slave_err(bond_dev, slave_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n");
+                               SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+                                            "Slave device does not support setting the MAC address, but fail_over_mac is not set to active");
                                res = -EOPNOTSUPP;
                                goto err_undo_flags;
                        }
@@ -2133,6 +2161,39 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
                bond_update_slave_arr(bond, NULL);
 
 
+       if (!slave_dev->netdev_ops->ndo_bpf ||
+           !slave_dev->netdev_ops->ndo_xdp_xmit) {
+               if (bond->xdp_prog) {
+                       SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+                                    "Slave does not support XDP");
+                       res = -EOPNOTSUPP;
+                       goto err_sysfs_del;
+               }
+       } else {
+               struct netdev_bpf xdp = {
+                       .command = XDP_SETUP_PROG,
+                       .flags   = 0,
+                       .prog    = bond->xdp_prog,
+                       .extack  = extack,
+               };
+
+               if (dev_xdp_prog_count(slave_dev) > 0) {
+                       SLAVE_NL_ERR(bond_dev, slave_dev, extack,
+                                    "Slave has XDP program loaded, please unload before enslaving");
+                       res = -EOPNOTSUPP;
+                       goto err_sysfs_del;
+               }
+
+               res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (res < 0) {
+                       /* ndo_bpf() sets extack error message */
+                       slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
+                       goto err_sysfs_del;
+               }
+               if (bond->xdp_prog)
+                       bpf_prog_inc(bond->xdp_prog);
+       }
+
        slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
                   bond_is_active_slave(new_slave) ? "an active" : "a backup",
                   new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
@@ -2252,7 +2313,17 @@ static int __bond_release_one(struct net_device *bond_dev,
        /* recompute stats just before removing the slave */
        bond_get_stats(bond->dev, &bond->bond_stats);
 
-       bond_upper_dev_unlink(bond, slave);
+       if (bond->xdp_prog) {
+               struct netdev_bpf xdp = {
+                       .command = XDP_SETUP_PROG,
+                       .flags   = 0,
+                       .prog    = NULL,
+                       .extack  = NULL,
+               };
+               if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
+                       slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
+       }
+
        /* unregister rx_handler early so bond_handle_frame wouldn't be called
         * for this slave anymore.
         */
@@ -2261,6 +2332,8 @@ static int __bond_release_one(struct net_device *bond_dev,
        if (BOND_MODE(bond) == BOND_MODE_8023AD)
                bond_3ad_unbind_slave(slave);
 
+       bond_upper_dev_unlink(bond, slave);
+
        if (bond_mode_can_use_xmit_hash(bond))
                bond_update_slave_arr(bond, slave);
 
@@ -3613,90 +3686,112 @@ static struct notifier_block bond_netdev_notifier = {
 
 /*---------------------------- Hashing Policies -----------------------------*/
 
+/* Helper to access data in a packet, with or without a backing skb.
+ * If skb is given the data is linearized if necessary via pskb_may_pull.
+ */
+static inline const void *bond_pull_data(struct sk_buff *skb,
+                                        const void *data, int hlen, int n)
+{
+       if (likely(n <= hlen))
+               return data;
+       else if (skb && likely(pskb_may_pull(skb, n)))
+               return skb->head;
+
+       return NULL;
+}
+
 /* L2 hash helper */
-static inline u32 bond_eth_hash(struct sk_buff *skb)
+static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
 {
-       struct ethhdr *ep, hdr_tmp;
+       struct ethhdr *ep;
 
-       ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp);
-       if (ep)
-               return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto;
-       return 0;
+       data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+       if (!data)
+               return 0;
+
+       ep = (struct ethhdr *)(data + mhoff);
+       return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto);
 }
 
-static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
-                        int *noff, int *proto, bool l34)
+static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data,
+                        int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34)
 {
        const struct ipv6hdr *iph6;
        const struct iphdr *iph;
 
-       if (skb->protocol == htons(ETH_P_IP)) {
-               if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph))))
+       if (l2_proto == htons(ETH_P_IP)) {
+               data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph));
+               if (!data)
                        return false;
-               iph = (const struct iphdr *)(skb->data + *noff);
+
+               iph = (const struct iphdr *)(data + *nhoff);
                iph_to_flow_copy_v4addrs(fk, iph);
-               *noff += iph->ihl << 2;
+               *nhoff += iph->ihl << 2;
                if (!ip_is_fragment(iph))
-                       *proto = iph->protocol;
-       } else if (skb->protocol == htons(ETH_P_IPV6)) {
-               if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6))))
+                       *ip_proto = iph->protocol;
+       } else if (l2_proto == htons(ETH_P_IPV6)) {
+               data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6));
+               if (!data)
                        return false;
-               iph6 = (const struct ipv6hdr *)(skb->data + *noff);
+
+               iph6 = (const struct ipv6hdr *)(data + *nhoff);
                iph_to_flow_copy_v6addrs(fk, iph6);
-               *noff += sizeof(*iph6);
-               *proto = iph6->nexthdr;
+               *nhoff += sizeof(*iph6);
+               *ip_proto = iph6->nexthdr;
        } else {
                return false;
        }
 
-       if (l34 && *proto >= 0)
-               fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto);
+       if (l34 && *ip_proto >= 0)
+               fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
 
        return true;
 }
 
-static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
+static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
 {
-       struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
        u32 srcmac_vendor = 0, srcmac_dev = 0;
-       u16 vlan;
+       struct ethhdr *mac_hdr;
+       u16 vlan = 0;
        int i;
 
+       data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+       if (!data)
+               return 0;
+       mac_hdr = (struct ethhdr *)(data + mhoff);
+
        for (i = 0; i < 3; i++)
                srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i];
 
        for (i = 3; i < ETH_ALEN; i++)
                srcmac_dev = (srcmac_dev << 8) | mac_hdr->h_source[i];
 
-       if (!skb_vlan_tag_present(skb))
-               return srcmac_vendor ^ srcmac_dev;
-
-       vlan = skb_vlan_tag_get(skb);
+       if (skb && skb_vlan_tag_present(skb))
+               vlan = skb_vlan_tag_get(skb);
 
        return vlan ^ srcmac_vendor ^ srcmac_dev;
 }
 
 /* Extract the appropriate headers based on bond's xmit policy */
-static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
-                             struct flow_keys *fk)
+static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data,
+                             __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk)
 {
        bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
-       int noff, proto = -1;
+       int ip_proto = -1;
 
        switch (bond->params.xmit_policy) {
        case BOND_XMIT_POLICY_ENCAP23:
        case BOND_XMIT_POLICY_ENCAP34:
                memset(fk, 0, sizeof(*fk));
                return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
-                                         fk, NULL, 0, 0, 0, 0);
+                                         fk, data, l2_proto, nhoff, hlen, 0);
        default:
                break;
        }
 
        fk->ports.ports = 0;
        memset(&fk->icmp, 0, sizeof(fk->icmp));
-       noff = skb_network_offset(skb);
-       if (!bond_flow_ip(skb, fk, &noff, &proto, l34))
+       if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34))
                return false;
 
        /* ICMP error packets contains at least 8 bytes of the header
@@ -3704,22 +3799,20 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
         * to correlate ICMP error packets within the same flow which
         * generated the error.
         */
-       if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
-               skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
-                                     skb_transport_offset(skb),
-                                     skb_headlen(skb));
-               if (proto == IPPROTO_ICMP) {
+       if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
+               skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen);
+               if (ip_proto == IPPROTO_ICMP) {
                        if (!icmp_is_err(fk->icmp.type))
                                return true;
 
-                       noff += sizeof(struct icmphdr);
-               } else if (proto == IPPROTO_ICMPV6) {
+                       nhoff += sizeof(struct icmphdr);
+               } else if (ip_proto == IPPROTO_ICMPV6) {
                        if (!icmpv6_is_err(fk->icmp.type))
                                return true;
 
-                       noff += sizeof(struct icmp6hdr);
+                       nhoff += sizeof(struct icmp6hdr);
                }
-               return bond_flow_ip(skb, fk, &noff, &proto, l34);
+               return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34);
        }
 
        return true;
@@ -3735,33 +3828,26 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
        return hash >> 1;
 }
 
-/**
- * bond_xmit_hash - generate a hash value based on the xmit policy
- * @bond: bonding device
- * @skb: buffer to use for headers
- *
- * This function will extract the necessary headers from the skb buffer and use
- * them to generate a hash based on the xmit_policy set in the bonding device
+/* Generate hash based on xmit policy. If @skb is given it is used to linearize
+ * the data as required, but this function can be used without it if the data is
+ * known to be linear (e.g. with xdp_buff).
  */
-u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data,
+                           __be16 l2_proto, int mhoff, int nhoff, int hlen)
 {
        struct flow_keys flow;
        u32 hash;
 
-       if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
-           skb->l4_hash)
-               return skb->hash;
-
        if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
-               return bond_vlan_srcmac_hash(skb);
+               return bond_vlan_srcmac_hash(skb, data, mhoff, hlen);
 
        if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
-           !bond_flow_dissect(bond, skb, &flow))
-               return bond_eth_hash(skb);
+           !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow))
+               return bond_eth_hash(skb, data, mhoff, hlen);
 
        if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
            bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
-               hash = bond_eth_hash(skb);
+               hash = bond_eth_hash(skb, data, mhoff, hlen);
        } else {
                if (flow.icmp.id)
                        memcpy(&hash, &flow.icmp, sizeof(hash));
@@ -3772,6 +3858,45 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
        return bond_ip_hash(hash, &flow);
 }
 
+/**
+ * bond_xmit_hash - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @skb: buffer to use for headers
+ *
+ * This function will extract the necessary headers from the skb buffer and use
+ * them to generate a hash based on the xmit_policy set in the bonding device
+ */
+u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+{
+       if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
+           skb->l4_hash)
+               return skb->hash;
+
+       return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
+                               skb->mac_header, skb->network_header,
+                               skb_headlen(skb));
+}
+
+/**
+ * bond_xmit_hash_xdp - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @xdp: buffer to use for headers
+ *
+ * The XDP variant of bond_xmit_hash.
+ */
+static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
+{
+       struct ethhdr *eth;
+
+       if (xdp->data + sizeof(struct ethhdr) > xdp->data_end)
+               return 0;
+
+       eth = (struct ethhdr *)xdp->data;
+
+       return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0,
+                               sizeof(struct ethhdr), xdp->data_end - xdp->data);
+}
+
 /*-------------------------- Device entry points ----------------------------*/
 
 void bond_work_init_all(struct bonding *bond)
@@ -3962,20 +4087,13 @@ static void bond_get_stats(struct net_device *bond_dev,
        rcu_read_unlock();
 }
 
-static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
+static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
 {
        struct bonding *bond = netdev_priv(bond_dev);
-       struct net_device *slave_dev = NULL;
-       struct ifbond k_binfo;
-       struct ifbond __user *u_binfo = NULL;
-       struct ifslave k_sinfo;
-       struct ifslave __user *u_sinfo = NULL;
        struct mii_ioctl_data *mii = NULL;
-       struct bond_opt_value newval;
-       struct net *net;
-       int res = 0;
+       int res;
 
-       netdev_dbg(bond_dev, "bond_ioctl: cmd=%d\n", cmd);
+       netdev_dbg(bond_dev, "bond_eth_ioctl: cmd=%d\n", cmd);
 
        switch (cmd) {
        case SIOCGMIIPHY:
@@ -4000,7 +4118,28 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
                }
 
                return 0;
-       case BOND_INFO_QUERY_OLD:
+       default:
+               res = -EOPNOTSUPP;
+       }
+
+       return res;
+}
+
+static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
+{
+       struct bonding *bond = netdev_priv(bond_dev);
+       struct net_device *slave_dev = NULL;
+       struct ifbond k_binfo;
+       struct ifbond __user *u_binfo = NULL;
+       struct ifslave k_sinfo;
+       struct ifslave __user *u_sinfo = NULL;
+       struct bond_opt_value newval;
+       struct net *net;
+       int res = 0;
+
+       netdev_dbg(bond_dev, "bond_ioctl: cmd=%d\n", cmd);
+
+       switch (cmd) {
        case SIOCBONDINFOQUERY:
                u_binfo = (struct ifbond __user *)ifr->ifr_data;
 
@@ -4012,7 +4151,6 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
                        return -EFAULT;
 
                return 0;
-       case BOND_SLAVE_INFO_QUERY_OLD:
        case SIOCBONDSLAVEINFOQUERY:
                u_sinfo = (struct ifslave __user *)ifr->ifr_data;
 
@@ -4042,19 +4180,15 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
                return -ENODEV;
 
        switch (cmd) {
-       case BOND_ENSLAVE_OLD:
        case SIOCBONDENSLAVE:
                res = bond_enslave(bond_dev, slave_dev, NULL);
                break;
-       case BOND_RELEASE_OLD:
        case SIOCBONDRELEASE:
                res = bond_release(bond_dev, slave_dev);
                break;
-       case BOND_SETHWADDR_OLD:
        case SIOCBONDSETHWADDR:
                res = bond_set_dev_addr(bond_dev, slave_dev);
                break;
-       case BOND_CHANGE_ACTIVE_OLD:
        case SIOCBONDCHANGEACTIVE:
                bond_opt_initstr(&newval, slave_dev->name);
                res = __bond_opt_set_notify(bond, BOND_OPT_ACTIVE_SLAVE,
@@ -4067,6 +4201,29 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
        return res;
 }
 
+static int bond_siocdevprivate(struct net_device *bond_dev, struct ifreq *ifr,
+                              void __user *data, int cmd)
+{
+       struct ifreq ifrdata = { .ifr_data = data };
+
+       switch (cmd) {
+       case BOND_INFO_QUERY_OLD:
+               return bond_do_ioctl(bond_dev, &ifrdata, SIOCBONDINFOQUERY);
+       case BOND_SLAVE_INFO_QUERY_OLD:
+               return bond_do_ioctl(bond_dev, &ifrdata, SIOCBONDSLAVEINFOQUERY);
+       case BOND_ENSLAVE_OLD:
+               return bond_do_ioctl(bond_dev, ifr, SIOCBONDENSLAVE);
+       case BOND_RELEASE_OLD:
+               return bond_do_ioctl(bond_dev, ifr, SIOCBONDRELEASE);
+       case BOND_SETHWADDR_OLD:
+               return bond_do_ioctl(bond_dev, ifr, SIOCBONDSETHWADDR);
+       case BOND_CHANGE_ACTIVE_OLD:
+               return bond_do_ioctl(bond_dev, ifr, SIOCBONDCHANGEACTIVE);
+       }
+
+       return -EOPNOTSUPP;
+}
+
 static void bond_change_rx_flags(struct net_device *bond_dev, int change)
 {
        struct bonding *bond = netdev_priv(bond_dev);
@@ -4388,6 +4545,47 @@ non_igmp:
        return NULL;
 }
 
+static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond,
+                                                       struct xdp_buff *xdp)
+{
+       struct slave *slave;
+       int slave_cnt;
+       u32 slave_id;
+       const struct ethhdr *eth;
+       void *data = xdp->data;
+
+       if (data + sizeof(struct ethhdr) > xdp->data_end)
+               goto non_igmp;
+
+       eth = (struct ethhdr *)data;
+       data += sizeof(struct ethhdr);
+
+       /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */
+       if (eth->h_proto == htons(ETH_P_IP)) {
+               const struct iphdr *iph;
+
+               if (data + sizeof(struct iphdr) > xdp->data_end)
+                       goto non_igmp;
+
+               iph = (struct iphdr *)data;
+
+               if (iph->protocol == IPPROTO_IGMP) {
+                       slave = rcu_dereference(bond->curr_active_slave);
+                       if (slave)
+                               return slave;
+                       return bond_get_slave_by_id(bond, 0);
+               }
+       }
+
+non_igmp:
+       slave_cnt = READ_ONCE(bond->slave_cnt);
+       if (likely(slave_cnt)) {
+               slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
+               return bond_get_slave_by_id(bond, slave_id);
+       }
+       return NULL;
+}
+
 static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
                                        struct net_device *bond_dev)
 {
@@ -4401,8 +4599,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
        return bond_tx_drop(bond_dev, skb);
 }
 
-static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
-                                                     struct sk_buff *skb)
+static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond)
 {
        return rcu_dereference(bond->curr_active_slave);
 }
@@ -4416,7 +4613,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
        struct bonding *bond = netdev_priv(bond_dev);
        struct slave *slave;
 
-       slave = bond_xmit_activebackup_slave_get(bond, skb);
+       slave = bond_xmit_activebackup_slave_get(bond);
        if (slave)
                return bond_dev_queue_xmit(bond, skb, slave->dev);
 
@@ -4604,6 +4801,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
        return slave;
 }
 
+static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
+                                                    struct xdp_buff *xdp)
+{
+       struct bond_up_slave *slaves;
+       unsigned int count;
+       u32 hash;
+
+       hash = bond_xmit_hash_xdp(bond, xdp);
+       slaves = rcu_dereference(bond->usable_slaves);
+       count = slaves ? READ_ONCE(slaves->count) : 0;
+       if (unlikely(!count))
+               return NULL;
+
+       return slaves->arr[hash % count];
+}
+
 /* Use this Xmit function for 3AD as well as XOR modes. The current
  * usable slave array is formed in the control path. The xmit function
  * just calculates hash and sends the packet out.
@@ -4714,7 +4927,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
                slave = bond_xmit_roundrobin_slave_get(bond, skb);
                break;
        case BOND_MODE_ACTIVEBACKUP:
-               slave = bond_xmit_activebackup_slave_get(bond, skb);
+               slave = bond_xmit_activebackup_slave_get(bond);
                break;
        case BOND_MODE_8023AD:
        case BOND_MODE_XOR:
@@ -4888,6 +5101,172 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
        return ret;
 }
 
+static struct net_device *
+bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
+{
+       struct bonding *bond = netdev_priv(bond_dev);
+       struct slave *slave;
+
+       /* Caller needs to hold rcu_read_lock() */
+
+       switch (BOND_MODE(bond)) {
+       case BOND_MODE_ROUNDROBIN:
+               slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp);
+               break;
+
+       case BOND_MODE_ACTIVEBACKUP:
+               slave = bond_xmit_activebackup_slave_get(bond);
+               break;
+
+       case BOND_MODE_8023AD:
+       case BOND_MODE_XOR:
+               slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp);
+               break;
+
+       default:
+               /* Should never happen. Mode guarded by bond_xdp_check() */
+               netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
+               WARN_ON_ONCE(1);
+               return NULL;
+       }
+
+       if (slave)
+               return slave->dev;
+
+       return NULL;
+}
+
+static int bond_xdp_xmit(struct net_device *bond_dev,
+                        int n, struct xdp_frame **frames, u32 flags)
+{
+       int nxmit, err = -ENXIO;
+
+       rcu_read_lock();
+
+       for (nxmit = 0; nxmit < n; nxmit++) {
+               struct xdp_frame *frame = frames[nxmit];
+               struct xdp_frame *frames1[] = {frame};
+               struct net_device *slave_dev;
+               struct xdp_buff xdp;
+
+               xdp_convert_frame_to_buff(frame, &xdp);
+
+               slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp);
+               if (!slave_dev) {
+                       err = -ENXIO;
+                       break;
+               }
+
+               err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags);
+               if (err < 1)
+                       break;
+       }
+
+       rcu_read_unlock();
+
+       /* If error happened on the first frame then we can pass the error up, otherwise
+        * report the number of frames that were xmitted.
+        */
+       if (err < 0)
+               return (nxmit == 0 ? err : nxmit);
+
+       return nxmit;
+}
+
+static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+                       struct netlink_ext_ack *extack)
+{
+       struct bonding *bond = netdev_priv(dev);
+       struct list_head *iter;
+       struct slave *slave, *rollback_slave;
+       struct bpf_prog *old_prog;
+       struct netdev_bpf xdp = {
+               .command = XDP_SETUP_PROG,
+               .flags   = 0,
+               .prog    = prog,
+               .extack  = extack,
+       };
+       int err;
+
+       ASSERT_RTNL();
+
+       if (!bond_xdp_check(bond))
+               return -EOPNOTSUPP;
+
+       old_prog = bond->xdp_prog;
+       bond->xdp_prog = prog;
+
+       bond_for_each_slave(bond, slave, iter) {
+               struct net_device *slave_dev = slave->dev;
+
+               if (!slave_dev->netdev_ops->ndo_bpf ||
+                   !slave_dev->netdev_ops->ndo_xdp_xmit) {
+                       SLAVE_NL_ERR(dev, slave_dev, extack,
+                                    "Slave device does not support XDP");
+                       err = -EOPNOTSUPP;
+                       goto err;
+               }
+
+               if (dev_xdp_prog_count(slave_dev) > 0) {
+                       SLAVE_NL_ERR(dev, slave_dev, extack,
+                                    "Slave has XDP program loaded, please unload before enslaving");
+                       err = -EOPNOTSUPP;
+                       goto err;
+               }
+
+               err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (err < 0) {
+                       /* ndo_bpf() sets extack error message */
+                       slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
+                       goto err;
+               }
+               if (prog)
+                       bpf_prog_inc(prog);
+       }
+
+       if (old_prog)
+               bpf_prog_put(old_prog);
+
+       if (prog)
+               static_branch_inc(&bpf_master_redirect_enabled_key);
+       else
+               static_branch_dec(&bpf_master_redirect_enabled_key);
+
+       return 0;
+
+err:
+       /* unwind the program changes */
+       bond->xdp_prog = old_prog;
+       xdp.prog = old_prog;
+       xdp.extack = NULL; /* do not overwrite original error */
+
+       bond_for_each_slave(bond, rollback_slave, iter) {
+               struct net_device *slave_dev = rollback_slave->dev;
+               int err_unwind;
+
+               if (slave == rollback_slave)
+                       break;
+
+               err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (err_unwind < 0)
+                       slave_err(dev, slave_dev,
+                                 "Error %d when unwinding XDP program change\n", err_unwind);
+               else if (xdp.prog)
+                       bpf_prog_inc(xdp.prog);
+       }
+       return err;
+}
+
+static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+       switch (xdp->command) {
+       case XDP_SETUP_PROG:
+               return bond_xdp_set(dev, xdp->prog, xdp->extack);
+       default:
+               return -EINVAL;
+       }
+}
+
 static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed)
 {
        if (speed == 0 || speed == SPEED_UNKNOWN)
@@ -4955,7 +5334,9 @@ static const struct net_device_ops bond_netdev_ops = {
        .ndo_start_xmit         = bond_start_xmit,
        .ndo_select_queue       = bond_select_queue,
        .ndo_get_stats64        = bond_get_stats,
-       .ndo_do_ioctl           = bond_do_ioctl,
+       .ndo_eth_ioctl          = bond_eth_ioctl,
+       .ndo_siocbond           = bond_do_ioctl,
+       .ndo_siocdevprivate     = bond_siocdevprivate,
        .ndo_change_rx_flags    = bond_change_rx_flags,
        .ndo_set_rx_mode        = bond_set_rx_mode,
        .ndo_change_mtu         = bond_change_mtu,
@@ -4974,6 +5355,9 @@ static const struct net_device_ops bond_netdev_ops = {
        .ndo_features_check     = passthru_features_check,
        .ndo_get_xmit_slave     = bond_xmit_get_slave,
        .ndo_sk_get_lower_dev   = bond_sk_get_lower_dev,
+       .ndo_bpf                = bond_xdp,
+       .ndo_xdp_xmit           = bond_xdp_xmit,
+       .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave,
 };
 
 static const struct device_type bond_type = {
@@ -5443,6 +5827,7 @@ static int bond_check_params(struct bond_params *params)
        params->downdelay = downdelay;
        params->peer_notif_delay = 0;
        params->use_carrier = use_carrier;
+       params->lacp_active = 1;
        params->lacp_fast = lacp_fast;
        params->primary[0] = 0;
        params->primary_reselect = primary_reselect_value;
index 0561ece..5d54e11 100644 (file)
@@ -100,6 +100,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
        [IFLA_BOND_MIN_LINKS]           = { .type = NLA_U32 },
        [IFLA_BOND_LP_INTERVAL]         = { .type = NLA_U32 },
        [IFLA_BOND_PACKETS_PER_SLAVE]   = { .type = NLA_U32 },
+       [IFLA_BOND_AD_LACP_ACTIVE]      = { .type = NLA_U8 },
        [IFLA_BOND_AD_LACP_RATE]        = { .type = NLA_U8 },
        [IFLA_BOND_AD_SELECT]           = { .type = NLA_U8 },
        [IFLA_BOND_AD_INFO]             = { .type = NLA_NESTED },
@@ -387,6 +388,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[],
                if (err)
                        return err;
        }
+
+       if (data[IFLA_BOND_AD_LACP_ACTIVE]) {
+               int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]);
+
+               bond_opt_initval(&newval, lacp_active);
+               err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval);
+               if (err)
+                       return err;
+       }
+
        if (data[IFLA_BOND_AD_LACP_RATE]) {
                int lacp_rate =
                        nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]);
@@ -490,6 +501,7 @@ static size_t bond_get_size(const struct net_device *bond_dev)
                nla_total_size(sizeof(u32)) +   /* IFLA_BOND_MIN_LINKS */
                nla_total_size(sizeof(u32)) +   /* IFLA_BOND_LP_INTERVAL */
                nla_total_size(sizeof(u32)) +  /* IFLA_BOND_PACKETS_PER_SLAVE */
+               nla_total_size(sizeof(u8)) +    /* IFLA_BOND_AD_LACP_ACTIVE */
                nla_total_size(sizeof(u8)) +    /* IFLA_BOND_AD_LACP_RATE */
                nla_total_size(sizeof(u8)) +    /* IFLA_BOND_AD_SELECT */
                nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */
@@ -622,6 +634,10 @@ static int bond_fill_info(struct sk_buff *skb,
                        packets_per_slave))
                goto nla_put_failure;
 
+       if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE,
+                      bond->params.lacp_active))
+               goto nla_put_failure;
+
        if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE,
                       bond->params.lacp_fast))
                goto nla_put_failure;
index 0cf25de..a8fde3b 100644 (file)
@@ -58,6 +58,8 @@ static int bond_option_lp_interval_set(struct bonding *bond,
                                       const struct bond_opt_value *newval);
 static int bond_option_pps_set(struct bonding *bond,
                               const struct bond_opt_value *newval);
+static int bond_option_lacp_active_set(struct bonding *bond,
+                                      const struct bond_opt_value *newval);
 static int bond_option_lacp_rate_set(struct bonding *bond,
                                     const struct bond_opt_value *newval);
 static int bond_option_ad_select_set(struct bonding *bond,
@@ -135,6 +137,12 @@ static const struct bond_opt_value bond_intmax_tbl[] = {
        { NULL,      -1,      0}
 };
 
+static const struct bond_opt_value bond_lacp_active[] = {
+       { "off", 0,  0},
+       { "on",  1,  BOND_VALFLAG_DEFAULT},
+       { NULL,  -1, 0}
+};
+
 static const struct bond_opt_value bond_lacp_rate_tbl[] = {
        { "slow", AD_LACP_SLOW, 0},
        { "fast", AD_LACP_FAST, 0},
@@ -283,6 +291,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
                .values = bond_intmax_tbl,
                .set = bond_option_updelay_set
        },
+       [BOND_OPT_LACP_ACTIVE] = {
+               .id = BOND_OPT_LACP_ACTIVE,
+               .name = "lacp_active",
+               .desc = "Send LACPDU frames with configured lacp rate or acts as speak when spoken to",
+               .flags = BOND_OPTFLAG_IFDOWN,
+               .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)),
+               .values = bond_lacp_active,
+               .set = bond_option_lacp_active_set
+       },
        [BOND_OPT_LACP_RATE] = {
                .id = BOND_OPT_LACP_RATE,
                .name = "lacp_rate",
@@ -1333,6 +1350,16 @@ static int bond_option_pps_set(struct bonding *bond,
        return 0;
 }
 
+static int bond_option_lacp_active_set(struct bonding *bond,
+                                      const struct bond_opt_value *newval)
+{
+       netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n",
+                  newval->string, newval->value);
+       bond->params.lacp_active = newval->value;
+
+       return 0;
+}
+
 static int bond_option_lacp_rate_set(struct bonding *bond,
                                     const struct bond_opt_value *newval)
 {
index 0fb1da3..f3e3bfd 100644 (file)
@@ -133,6 +133,8 @@ static void bond_info_show_master(struct seq_file *seq)
                struct ad_info ad_info;
 
                seq_puts(seq, "\n802.3ad info\n");
+               seq_printf(seq, "LACP active: %s\n",
+                          (bond->params.lacp_active) ? "on" : "off");
                seq_printf(seq, "LACP rate: %s\n",
                           (bond->params.lacp_fast) ? "fast" : "slow");
                seq_printf(seq, "Min links: %d\n", bond->params.min_links);
index 5f9e9a2..b9e9842 100644 (file)
@@ -339,10 +339,24 @@ static ssize_t bonding_show_peer_notif_delay(struct device *d,
 static DEVICE_ATTR(peer_notif_delay, 0644,
                   bonding_show_peer_notif_delay, bonding_sysfs_store_option);
 
-/* Show the LACP interval. */
-static ssize_t bonding_show_lacp(struct device *d,
-                                struct device_attribute *attr,
-                                char *buf)
+/* Show the LACP activity and interval. */
+static ssize_t bonding_show_lacp_active(struct device *d,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       struct bonding *bond = to_bond(d);
+       const struct bond_opt_value *val;
+
+       val = bond_opt_get_val(BOND_OPT_LACP_ACTIVE, bond->params.lacp_active);
+
+       return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_active);
+}
+static DEVICE_ATTR(lacp_active, 0644,
+                  bonding_show_lacp_active, bonding_sysfs_store_option);
+
+static ssize_t bonding_show_lacp_rate(struct device *d,
+                                     struct device_attribute *attr,
+                                     char *buf)
 {
        struct bonding *bond = to_bond(d);
        const struct bond_opt_value *val;
@@ -352,7 +366,7 @@ static ssize_t bonding_show_lacp(struct device *d,
        return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast);
 }
 static DEVICE_ATTR(lacp_rate, 0644,
-                  bonding_show_lacp, bonding_sysfs_store_option);
+                  bonding_show_lacp_rate, bonding_sysfs_store_option);
 
 static ssize_t bonding_show_min_links(struct device *d,
                                      struct device_attribute *attr,
@@ -738,6 +752,7 @@ static struct attribute *per_bond_attrs[] = {
        &dev_attr_downdelay.attr,
        &dev_attr_updelay.attr,
        &dev_attr_peer_notif_delay.attr,
+       &dev_attr_lacp_active.attr,
        &dev_attr_lacp_rate.attr,
        &dev_attr_ad_select.attr,
        &dev_attr_xmit_hash_policy.attr,
index e355d39..fff2592 100644 (file)
@@ -97,7 +97,8 @@ config CAN_AT91
 
 config CAN_FLEXCAN
        tristate "Support for Freescale FLEXCAN based chips"
-       depends on OF && HAS_IOMEM
+       depends on OF || COLDFIRE || COMPILE_TEST
+       depends on HAS_IOMEM
        help
          Say Y here if you want to support for Freescale FlexCAN.
 
index 04d0bb3..b06af90 100644 (file)
@@ -43,14 +43,14 @@ enum at91_reg {
 };
 
 /* Mailbox registers (0 <= i <= 15) */
-#define AT91_MMR(i)            (enum at91_reg)(0x200 + ((i) * 0x20))
-#define AT91_MAM(i)            (enum at91_reg)(0x204 + ((i) * 0x20))
-#define AT91_MID(i)            (enum at91_reg)(0x208 + ((i) * 0x20))
-#define AT91_MFID(i)           (enum at91_reg)(0x20C + ((i) * 0x20))
-#define AT91_MSR(i)            (enum at91_reg)(0x210 + ((i) * 0x20))
-#define AT91_MDL(i)            (enum at91_reg)(0x214 + ((i) * 0x20))
-#define AT91_MDH(i)            (enum at91_reg)(0x218 + ((i) * 0x20))
-#define AT91_MCR(i)            (enum at91_reg)(0x21C + ((i) * 0x20))
+#define AT91_MMR(i)            ((enum at91_reg)(0x200 + ((i) * 0x20)))
+#define AT91_MAM(i)            ((enum at91_reg)(0x204 + ((i) * 0x20)))
+#define AT91_MID(i)            ((enum at91_reg)(0x208 + ((i) * 0x20)))
+#define AT91_MFID(i)           ((enum at91_reg)(0x20C + ((i) * 0x20)))
+#define AT91_MSR(i)            ((enum at91_reg)(0x210 + ((i) * 0x20)))
+#define AT91_MDL(i)            ((enum at91_reg)(0x214 + ((i) * 0x20)))
+#define AT91_MDH(i)            ((enum at91_reg)(0x218 + ((i) * 0x20)))
+#define AT91_MCR(i)            ((enum at91_reg)(0x21C + ((i) * 0x20)))
 
 /* Register bits */
 #define AT91_MR_CANEN          BIT(0)
@@ -87,19 +87,19 @@ enum at91_mb_mode {
 };
 
 /* Interrupt mask bits */
-#define AT91_IRQ_ERRA          (1 << 16)
-#define AT91_IRQ_WARN          (1 << 17)
-#define AT91_IRQ_ERRP          (1 << 18)
-#define AT91_IRQ_BOFF          (1 << 19)
-#define AT91_IRQ_SLEEP         (1 << 20)
-#define AT91_IRQ_WAKEUP                (1 << 21)
-#define AT91_IRQ_TOVF          (1 << 22)
-#define AT91_IRQ_TSTP          (1 << 23)
-#define AT91_IRQ_CERR          (1 << 24)
-#define AT91_IRQ_SERR          (1 << 25)
-#define AT91_IRQ_AERR          (1 << 26)
-#define AT91_IRQ_FERR          (1 << 27)
-#define AT91_IRQ_BERR          (1 << 28)
+#define AT91_IRQ_ERRA          BIT(16)
+#define AT91_IRQ_WARN          BIT(17)
+#define AT91_IRQ_ERRP          BIT(18)
+#define AT91_IRQ_BOFF          BIT(19)
+#define AT91_IRQ_SLEEP         BIT(20)
+#define AT91_IRQ_WAKEUP                BIT(21)
+#define AT91_IRQ_TOVF          BIT(22)
+#define AT91_IRQ_TSTP          BIT(23)
+#define AT91_IRQ_CERR          BIT(24)
+#define AT91_IRQ_SERR          BIT(25)
+#define AT91_IRQ_AERR          BIT(26)
+#define AT91_IRQ_FERR          BIT(27)
+#define AT91_IRQ_BERR          BIT(28)
 
 #define AT91_IRQ_ERR_ALL       (0x1fff0000)
 #define AT91_IRQ_ERR_FRAME     (AT91_IRQ_CERR | AT91_IRQ_SERR | \
@@ -163,7 +163,7 @@ static const struct can_bittiming_const at91_bittiming_const = {
        .tseg2_min      = 2,
        .tseg2_max      = 8,
        .sjw_max        = 4,
-       .brp_min        = 2,
+       .brp_min        = 2,
        .brp_max        = 128,
        .brp_inc        = 1,
 };
@@ -281,19 +281,20 @@ static inline u32 at91_read(const struct at91_priv *priv, enum at91_reg reg)
 }
 
 static inline void at91_write(const struct at91_priv *priv, enum at91_reg reg,
-               u32 value)
+                             u32 value)
 {
        writel_relaxed(value, priv->reg_base + reg);
 }
 
 static inline void set_mb_mode_prio(const struct at91_priv *priv,
-               unsigned int mb, enum at91_mb_mode mode, int prio)
+                                   unsigned int mb, enum at91_mb_mode mode,
+                                   int prio)
 {
        at91_write(priv, AT91_MMR(mb), (mode << 24) | (prio << 16));
 }
 
 static inline void set_mb_mode(const struct at91_priv *priv, unsigned int mb,
-               enum at91_mb_mode mode)
+                              enum at91_mb_mode mode)
 {
        set_mb_mode_prio(priv, mb, mode, 0);
 }
@@ -316,8 +317,7 @@ static void at91_setup_mailboxes(struct net_device *dev)
        unsigned int i;
        u32 reg_mid;
 
-       /*
-        * Due to a chip bug (errata 50.2.6.3 & 50.3.5.3) the first
+       /* Due to a chip bug (errata 50.2.6.3 & 50.3.5.3) the first
         * mailbox is disabled. The next 11 mailboxes are used as a
         * reception FIFO. The last mailbox is configured with
         * overwrite option. The overwrite flag indicates a FIFO
@@ -368,7 +368,7 @@ static int at91_set_bittiming(struct net_device *dev)
 }
 
 static int at91_get_berr_counter(const struct net_device *dev,
-               struct can_berr_counter *bec)
+                                struct can_berr_counter *bec)
 {
        const struct at91_priv *priv = netdev_priv(dev);
        u32 reg_ecr = at91_read(priv, AT91_ECR);
@@ -423,8 +423,7 @@ static void at91_chip_stop(struct net_device *dev, enum can_state state)
        priv->can.state = state;
 }
 
-/*
- * theory of operation:
+/* theory of operation:
  *
  * According to the datasheet priority 0 is the highest priority, 15
  * is the lowest. If two mailboxes have the same priority level the
@@ -486,8 +485,7 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev)
        /* _NOTE_: subtract AT91_MB_TX_FIRST offset from mb! */
        can_put_echo_skb(skb, dev, mb - get_mb_tx_first(priv), 0);
 
-       /*
-        * we have to stop the queue and deliver all messages in case
+       /* we have to stop the queue and deliver all messages in case
         * of a prio+mb counter wrap around. This is the case if
         * tx_next buffer prio and mailbox equals 0.
         *
@@ -515,6 +513,7 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev)
 static inline void at91_activate_rx_low(const struct at91_priv *priv)
 {
        u32 mask = get_mb_rx_low_mask(priv);
+
        at91_write(priv, AT91_TCR, mask);
 }
 
@@ -526,9 +525,10 @@ static inline void at91_activate_rx_low(const struct at91_priv *priv)
  * Reenables given mailbox for reception of new CAN messages
  */
 static inline void at91_activate_rx_mb(const struct at91_priv *priv,
-               unsigned int mb)
+                                      unsigned int mb)
 {
        u32 mask = 1 << mb;
+
        at91_write(priv, AT91_TCR, mask);
 }
 
@@ -568,7 +568,7 @@ static void at91_rx_overflow_err(struct net_device *dev)
  * given can frame. "mb" and "cf" must be valid.
  */
 static void at91_read_mb(struct net_device *dev, unsigned int mb,
-               struct can_frame *cf)
+                        struct can_frame *cf)
 {
        const struct at91_priv *priv = netdev_priv(dev);
        u32 reg_msr, reg_mid;
@@ -582,9 +582,9 @@ static void at91_read_mb(struct net_device *dev, unsigned int mb,
        reg_msr = at91_read(priv, AT91_MSR(mb));
        cf->len = can_cc_dlc2len((reg_msr >> 16) & 0xf);
 
-       if (reg_msr & AT91_MSR_MRTR)
+       if (reg_msr & AT91_MSR_MRTR) {
                cf->can_id |= CAN_RTR_FLAG;
-       else {
+       else {
                *(u32 *)(cf->data + 0) = at91_read(priv, AT91_MDL(mb));
                *(u32 *)(cf->data + 4) = at91_read(priv, AT91_MDH(mb));
        }
@@ -685,7 +685,7 @@ static int at91_poll_rx(struct net_device *dev, int quota)
        if (priv->rx_next > get_mb_rx_low_last(priv) &&
            reg_sr & get_mb_rx_low_mask(priv))
                netdev_info(dev,
-                       "order of incoming frames cannot be guaranteed\n");
+                           "order of incoming frames cannot be guaranteed\n");
 
  again:
        for (mb = find_next_bit(addr, get_mb_tx_first(priv), priv->rx_next);
@@ -718,7 +718,7 @@ static int at91_poll_rx(struct net_device *dev, int quota)
 }
 
 static void at91_poll_err_frame(struct net_device *dev,
-               struct can_frame *cf, u32 reg_sr)
+                               struct can_frame *cf, u32 reg_sr)
 {
        struct at91_priv *priv = netdev_priv(dev);
 
@@ -796,8 +796,7 @@ static int at91_poll(struct napi_struct *napi, int quota)
        if (reg_sr & get_irq_mb_rx(priv))
                work_done += at91_poll_rx(dev, quota - work_done);
 
-       /*
-        * The error bits are clear on read,
+       /* The error bits are clear on read,
         * so use saved value from irq handler.
         */
        reg_sr |= priv->reg_sr;
@@ -807,6 +806,7 @@ static int at91_poll(struct napi_struct *napi, int quota)
        if (work_done < quota) {
                /* enable IRQs for frame errors and all mailboxes >= rx_next */
                u32 reg_ier = AT91_IRQ_ERR_FRAME;
+
                reg_ier |= get_irq_mb_rx(priv) & ~AT91_MB_MASK(priv->rx_next);
 
                napi_complete_done(napi, work_done);
@@ -816,8 +816,7 @@ static int at91_poll(struct napi_struct *napi, int quota)
        return work_done;
 }
 
-/*
- * theory of operation:
+/* theory of operation:
  *
  * priv->tx_echo holds the number of the oldest can_frame put for
  * transmission into the hardware, but not yet ACKed by the CAN tx
@@ -846,8 +845,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr)
                /* Disable irq for this TX mailbox */
                at91_write(priv, AT91_IDR, 1 << mb);
 
-               /*
-                * only echo if mailbox signals us a transfer
+               /* only echo if mailbox signals us a transfer
                 * complete (MSR_MRDY). Otherwise it's a tansfer
                 * abort. "can_bus_off()" takes care about the skbs
                 * parked in the echo queue.
@@ -862,8 +860,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr)
                }
        }
 
-       /*
-        * restart queue if we don't have a wrap around but restart if
+       /* restart queue if we don't have a wrap around but restart if
         * we get a TX int for the last can frame directly before a
         * wrap around.
         */
@@ -873,7 +870,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr)
 }
 
 static void at91_irq_err_state(struct net_device *dev,
-               struct can_frame *cf, enum can_state new_state)
+                              struct can_frame *cf, enum can_state new_state)
 {
        struct at91_priv *priv = netdev_priv(dev);
        u32 reg_idr = 0, reg_ier = 0;
@@ -883,8 +880,7 @@ static void at91_irq_err_state(struct net_device *dev,
 
        switch (priv->can.state) {
        case CAN_STATE_ERROR_ACTIVE:
-               /*
-                * from: ERROR_ACTIVE
+               /* from: ERROR_ACTIVE
                 * to  : ERROR_WARNING, ERROR_PASSIVE, BUS_OFF
                 * =>  : there was a warning int
                 */
@@ -900,8 +896,7 @@ static void at91_irq_err_state(struct net_device *dev,
                }
                fallthrough;
        case CAN_STATE_ERROR_WARNING:
-               /*
-                * from: ERROR_ACTIVE, ERROR_WARNING
+               /* from: ERROR_ACTIVE, ERROR_WARNING
                 * to  : ERROR_PASSIVE, BUS_OFF
                 * =>  : error passive int
                 */
@@ -917,8 +912,7 @@ static void at91_irq_err_state(struct net_device *dev,
                }
                break;
        case CAN_STATE_BUS_OFF:
-               /*
-                * from: BUS_OFF
+               /* from: BUS_OFF
                 * to  : ERROR_ACTIVE, ERROR_WARNING, ERROR_PASSIVE
                 */
                if (new_state <= CAN_STATE_ERROR_PASSIVE) {
@@ -935,12 +929,10 @@ static void at91_irq_err_state(struct net_device *dev,
                break;
        }
 
-
        /* process state changes depending on the new state */
        switch (new_state) {
        case CAN_STATE_ERROR_ACTIVE:
-               /*
-                * actually we want to enable AT91_IRQ_WARN here, but
+               /* actually we want to enable AT91_IRQ_WARN here, but
                 * it screws up the system under certain
                 * circumstances. so just enable AT91_IRQ_ERRP, thus
                 * the "fallthrough"
@@ -983,7 +975,7 @@ static void at91_irq_err_state(struct net_device *dev,
 }
 
 static int at91_get_state_by_bec(const struct net_device *dev,
-               enum can_state *state)
+                                enum can_state *state)
 {
        struct can_berr_counter bec;
        int err;
@@ -1004,7 +996,6 @@ static int at91_get_state_by_bec(const struct net_device *dev,
        return 0;
 }
 
-
 static void at91_irq_err(struct net_device *dev)
 {
        struct at91_priv *priv = netdev_priv(dev);
@@ -1018,15 +1009,15 @@ static void at91_irq_err(struct net_device *dev)
                reg_sr = at91_read(priv, AT91_SR);
 
                /* we need to look at the unmasked reg_sr */
-               if (unlikely(reg_sr & AT91_IRQ_BOFF))
+               if (unlikely(reg_sr & AT91_IRQ_BOFF)) {
                        new_state = CAN_STATE_BUS_OFF;
-               else if (unlikely(reg_sr & AT91_IRQ_ERRP))
+               } else if (unlikely(reg_sr & AT91_IRQ_ERRP)) {
                        new_state = CAN_STATE_ERROR_PASSIVE;
-               else if (unlikely(reg_sr & AT91_IRQ_WARN))
+               } else if (unlikely(reg_sr & AT91_IRQ_WARN)) {
                        new_state = CAN_STATE_ERROR_WARNING;
-               else if (likely(reg_sr & AT91_IRQ_ERRA))
+               } else if (likely(reg_sr & AT91_IRQ_ERRA)) {
                        new_state = CAN_STATE_ERROR_ACTIVE;
-               else {
+               else {
                        netdev_err(dev, "BUG! hardware in undefined state\n");
                        return;
                }
@@ -1053,8 +1044,7 @@ static void at91_irq_err(struct net_device *dev)
        priv->can.state = new_state;
 }
 
-/*
- * interrupt handler
+/* interrupt handler
  */
 static irqreturn_t at91_irq(int irq, void *dev_id)
 {
@@ -1075,8 +1065,7 @@ static irqreturn_t at91_irq(int irq, void *dev_id)
 
        /* Receive or error interrupt? -> napi */
        if (reg_sr & (get_irq_mb_rx(priv) | AT91_IRQ_ERR_FRAME)) {
-               /*
-                * The error bits are clear on read,
+               /* The error bits are clear on read,
                 * save for later use.
                 */
                priv->reg_sr = reg_sr;
@@ -1133,8 +1122,7 @@ static int at91_open(struct net_device *dev)
        return err;
 }
 
-/*
- * stop CAN bus activity
+/* stop CAN bus activity
  */
 static int at91_close(struct net_device *dev)
 {
@@ -1176,8 +1164,8 @@ static const struct net_device_ops at91_netdev_ops = {
        .ndo_change_mtu = can_change_mtu,
 };
 
-static ssize_t at91_sysfs_show_mb0_id(struct device *dev,
-               struct device_attribute *attr, char *buf)
+static ssize_t mb0_id_show(struct device *dev,
+                          struct device_attribute *attr, char *buf)
 {
        struct at91_priv *priv = netdev_priv(to_net_dev(dev));
 
@@ -1187,8 +1175,9 @@ static ssize_t at91_sysfs_show_mb0_id(struct device *dev,
                return snprintf(buf, PAGE_SIZE, "0x%03x\n", priv->mb0_id);
 }
 
-static ssize_t at91_sysfs_set_mb0_id(struct device *dev,
-               struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t mb0_id_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
 {
        struct net_device *ndev = to_net_dev(dev);
        struct at91_priv *priv = netdev_priv(ndev);
@@ -1222,7 +1211,7 @@ static ssize_t at91_sysfs_set_mb0_id(struct device *dev,
        return ret;
 }
 
-static DEVICE_ATTR(mb0_id, 0644, at91_sysfs_show_mb0_id, at91_sysfs_set_mb0_id);
+static DEVICE_ATTR_RW(mb0_id);
 
 static struct attribute *at91_sysfs_attrs[] = {
        &dev_attr_mb0_id.attr,
index 4247ff8..08b6efa 100644 (file)
@@ -176,6 +176,13 @@ struct c_can_raminit {
        bool needs_pulse;
 };
 
+/* c_can tx ring structure */
+struct c_can_tx_ring {
+       unsigned int head;
+       unsigned int tail;
+       unsigned int obj_num;
+};
+
 /* c_can private data structure */
 struct c_can_priv {
        struct can_priv can;    /* must be the first member */
@@ -190,17 +197,16 @@ struct c_can_priv {
        unsigned int msg_obj_tx_first;
        unsigned int msg_obj_tx_last;
        u32 msg_obj_rx_mask;
-       atomic_t tx_active;
        atomic_t sie_pending;
        unsigned long tx_dir;
        int last_status;
+       struct c_can_tx_ring tx;
        u16 (*read_reg)(const struct c_can_priv *priv, enum reg index);
        void (*write_reg)(const struct c_can_priv *priv, enum reg index, u16 val);
        u32 (*read_reg32)(const struct c_can_priv *priv, enum reg index);
        void (*write_reg32)(const struct c_can_priv *priv, enum reg index, u32 val);
        void __iomem *base;
        const u16 *regs;
-       void *priv;             /* for board-specific data */
        enum c_can_dev_id type;
        struct c_can_raminit raminit_sys;       /* RAMINIT via syscon regmap */
        void (*raminit)(const struct c_can_priv *priv, bool enable);
@@ -220,4 +226,19 @@ int c_can_power_down(struct net_device *dev);
 
 void c_can_set_ethtool_ops(struct net_device *dev);
 
+static inline u8 c_can_get_tx_head(const struct c_can_tx_ring *ring)
+{
+       return ring->head & (ring->obj_num - 1);
+}
+
+static inline u8 c_can_get_tx_tail(const struct c_can_tx_ring *ring)
+{
+       return ring->tail & (ring->obj_num - 1);
+}
+
+static inline u8 c_can_get_tx_free(const struct c_can_tx_ring *ring)
+{
+       return ring->obj_num - (ring->head - ring->tail);
+}
+
 #endif /* C_CAN_H */
index 7588f70..52671d1 100644 (file)
 
 #define IF_MCONT_TX            (IF_MCONT_TXIE | IF_MCONT_EOB)
 
-/* Use IF1 for RX and IF2 for TX */
-#define IF_RX                  0
+/* Use IF1 in NAPI path and IF2 in TX path */
+#define IF_NAPI                        0
 #define IF_TX                  1
 
 /* minimum timeout for checking BUSY status */
@@ -427,24 +427,51 @@ static void c_can_setup_receive_object(struct net_device *dev, int iface,
        c_can_object_put(dev, iface, obj, IF_COMM_RCV_SETUP);
 }
 
+static bool c_can_tx_busy(const struct c_can_priv *priv,
+                         const struct c_can_tx_ring *tx_ring)
+{
+       if (c_can_get_tx_free(tx_ring) > 0)
+               return false;
+
+       netif_stop_queue(priv->dev);
+
+       /* Memory barrier before checking tx_free (head and tail) */
+       smp_mb();
+
+       if (c_can_get_tx_free(tx_ring) == 0) {
+               netdev_dbg(priv->dev,
+                          "Stopping tx-queue (tx_head=0x%08x, tx_tail=0x%08x, len=%d).\n",
+                          tx_ring->head, tx_ring->tail,
+                          tx_ring->head - tx_ring->tail);
+               return true;
+       }
+
+       netif_start_queue(priv->dev);
+       return false;
+}
+
 static netdev_tx_t c_can_start_xmit(struct sk_buff *skb,
                                    struct net_device *dev)
 {
        struct can_frame *frame = (struct can_frame *)skb->data;
        struct c_can_priv *priv = netdev_priv(dev);
-       u32 idx, obj;
+       struct c_can_tx_ring *tx_ring = &priv->tx;
+       u32 idx, obj, cmd = IF_COMM_TX;
 
        if (can_dropped_invalid_skb(dev, skb))
                return NETDEV_TX_OK;
-       /* This is not a FIFO. C/D_CAN sends out the buffers
-        * prioritized. The lowest buffer number wins.
-        */
-       idx = fls(atomic_read(&priv->tx_active));
-       obj = idx + priv->msg_obj_tx_first;
 
-       /* If this is the last buffer, stop the xmit queue */
-       if (idx == priv->msg_obj_tx_num - 1)
+       if (c_can_tx_busy(priv, tx_ring))
+               return NETDEV_TX_BUSY;
+
+       idx = c_can_get_tx_head(tx_ring);
+       tx_ring->head++;
+       if (c_can_get_tx_free(tx_ring) == 0)
                netif_stop_queue(dev);
+
+       if (idx < c_can_get_tx_tail(tx_ring))
+               cmd &= ~IF_COMM_TXRQST; /* Cache the message */
+
        /* Store the message in the interface so we can call
         * can_put_echo_skb(). We must do this before we enable
         * transmit as we might race against do_tx().
@@ -452,11 +479,8 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb,
        c_can_setup_tx_object(dev, IF_TX, frame, idx);
        priv->dlc[idx] = frame->len;
        can_put_echo_skb(skb, dev, idx, 0);
-
-       /* Update the active bits */
-       atomic_add(BIT(idx), &priv->tx_active);
-       /* Start transmission */
-       c_can_object_put(dev, IF_TX, obj, IF_COMM_TX);
+       obj = idx + priv->msg_obj_tx_first;
+       c_can_object_put(dev, IF_TX, obj, cmd);
 
        return NETDEV_TX_OK;
 }
@@ -529,13 +553,13 @@ static void c_can_configure_msg_objects(struct net_device *dev)
 
        /* first invalidate all message objects */
        for (i = priv->msg_obj_rx_first; i <= priv->msg_obj_num; i++)
-               c_can_inval_msg_object(dev, IF_RX, i);
+               c_can_inval_msg_object(dev, IF_NAPI, i);
 
        /* setup receive message objects */
        for (i = priv->msg_obj_rx_first; i < priv->msg_obj_rx_last; i++)
-               c_can_setup_receive_object(dev, IF_RX, i, 0, 0, IF_MCONT_RCV);
+               c_can_setup_receive_object(dev, IF_NAPI, i, 0, 0, IF_MCONT_RCV);
 
-       c_can_setup_receive_object(dev, IF_RX, priv->msg_obj_rx_last, 0, 0,
+       c_can_setup_receive_object(dev, IF_NAPI, priv->msg_obj_rx_last, 0, 0,
                                   IF_MCONT_RCV_EOB);
 }
 
@@ -567,6 +591,7 @@ static int c_can_software_reset(struct net_device *dev)
 static int c_can_chip_config(struct net_device *dev)
 {
        struct c_can_priv *priv = netdev_priv(dev);
+       struct c_can_tx_ring *tx_ring = &priv->tx;
        int err;
 
        err = c_can_software_reset(dev);
@@ -598,7 +623,8 @@ static int c_can_chip_config(struct net_device *dev)
        priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED);
 
        /* Clear all internal status */
-       atomic_set(&priv->tx_active, 0);
+       tx_ring->head = 0;
+       tx_ring->tail = 0;
        priv->tx_dir = 0;
 
        /* set bittiming params */
@@ -696,40 +722,57 @@ static int c_can_get_berr_counter(const struct net_device *dev,
 static void c_can_do_tx(struct net_device *dev)
 {
        struct c_can_priv *priv = netdev_priv(dev);
+       struct c_can_tx_ring *tx_ring = &priv->tx;
        struct net_device_stats *stats = &dev->stats;
-       u32 idx, obj, pkts = 0, bytes = 0, pend, clr;
+       u32 idx, obj, pkts = 0, bytes = 0, pend;
+       u8 tail;
 
        if (priv->msg_obj_tx_last > 32)
                pend = priv->read_reg32(priv, C_CAN_INTPND3_REG);
        else
                pend = priv->read_reg(priv, C_CAN_INTPND2_REG);
-       clr = pend;
 
        while ((idx = ffs(pend))) {
                idx--;
                pend &= ~BIT(idx);
                obj = idx + priv->msg_obj_tx_first;
 
-               /* We use IF_RX interface instead of IF_TX because we
+               /* We use IF_NAPI interface instead of IF_TX because we
                 * are called from c_can_poll(), which runs inside
-                * NAPI. We are not trasmitting.
+                * NAPI. We are not transmitting.
                 */
-               c_can_inval_tx_object(dev, IF_RX, obj);
+               c_can_inval_tx_object(dev, IF_NAPI, obj);
                can_get_echo_skb(dev, idx, NULL);
                bytes += priv->dlc[idx];
                pkts++;
        }
 
-       /* Clear the bits in the tx_active mask */
-       atomic_sub(clr, &priv->tx_active);
+       if (!pkts)
+               return;
 
-       if (clr & BIT(priv->msg_obj_tx_num - 1))
-               netif_wake_queue(dev);
+       tx_ring->tail += pkts;
+       if (c_can_get_tx_free(tx_ring)) {
+               /* Make sure that anybody stopping the queue after
+                * this sees the new tx_ring->tail.
+                */
+               smp_mb();
+               netif_wake_queue(priv->dev);
+       }
 
-       if (pkts) {
-               stats->tx_bytes += bytes;
-               stats->tx_packets += pkts;
-               can_led_event(dev, CAN_LED_EVENT_TX);
+       stats->tx_bytes += bytes;
+       stats->tx_packets += pkts;
+       can_led_event(dev, CAN_LED_EVENT_TX);
+
+       tail = c_can_get_tx_tail(tx_ring);
+
+       if (tail == 0) {
+               u8 head = c_can_get_tx_head(tx_ring);
+
+               /* Start transmission for all cached messages */
+               for (idx = tail; idx < head; idx++) {
+                       obj = idx + priv->msg_obj_tx_first;
+                       c_can_object_put(dev, IF_NAPI, obj, IF_COMM_TXRQST);
+               }
        }
 }
 
@@ -766,14 +809,14 @@ static u32 c_can_adjust_pending(u32 pend, u32 rx_mask)
 static inline void c_can_rx_object_get(struct net_device *dev,
                                       struct c_can_priv *priv, u32 obj)
 {
-       c_can_object_get(dev, IF_RX, obj, priv->comm_rcv_high);
+       c_can_object_get(dev, IF_NAPI, obj, priv->comm_rcv_high);
 }
 
 static inline void c_can_rx_finalize(struct net_device *dev,
                                     struct c_can_priv *priv, u32 obj)
 {
        if (priv->type != BOSCH_D_CAN)
-               c_can_object_get(dev, IF_RX, obj, IF_COMM_CLR_NEWDAT);
+               c_can_object_get(dev, IF_NAPI, obj, IF_COMM_CLR_NEWDAT);
 }
 
 static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
@@ -785,10 +828,12 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
                pend &= ~BIT(obj - 1);
 
                c_can_rx_object_get(dev, priv, obj);
-               ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_RX));
+               ctrl = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, IF_NAPI));
 
                if (ctrl & IF_MCONT_MSGLST) {
-                       int n = c_can_handle_lost_msg_obj(dev, IF_RX, obj, ctrl);
+                       int n;
+
+                       n = c_can_handle_lost_msg_obj(dev, IF_NAPI, obj, ctrl);
 
                        pkts += n;
                        quota -= n;
@@ -803,7 +848,7 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
                        continue;
 
                /* read the data from the message object */
-               c_can_read_msg_object(dev, IF_RX, ctrl);
+               c_can_read_msg_object(dev, IF_NAPI, ctrl);
 
                c_can_rx_finalize(dev, priv, obj);
 
@@ -1205,6 +1250,10 @@ struct net_device *alloc_c_can_dev(int msg_obj_num)
        priv->msg_obj_tx_last =
                priv->msg_obj_tx_first + priv->msg_obj_tx_num - 1;
 
+       priv->tx.head = 0;
+       priv->tx.tail = 0;
+       priv->tx.obj_num = msg_obj_tx_num;
+
        netif_napi_add(dev, &priv->napi, c_can_poll, priv->msg_obj_rx_num);
 
        priv->dev = dev;
index 3695036..86e95e9 100644 (file)
@@ -385,7 +385,6 @@ static int c_can_plat_probe(struct platform_device *pdev)
        priv->base = addr;
        priv->device = &pdev->dev;
        priv->can.clock.freq = clk_get_rate(clk);
-       priv->priv = clk;
        priv->type = drvdata->id;
 
        platform_set_drvdata(pdev, dev);
index 311d856..e3d840b 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/can/dev.h>
 #include <linux/can/skb.h>
 #include <linux/can/led.h>
+#include <linux/gpio/consumer.h>
 #include <linux/of.h>
 
 #define MOD_DESC "CAN device driver interface"
@@ -400,10 +401,69 @@ void close_candev(struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(close_candev);
 
+static int can_set_termination(struct net_device *ndev, u16 term)
+{
+       struct can_priv *priv = netdev_priv(ndev);
+       int set;
+
+       if (term == priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_ENABLED])
+               set = 1;
+       else
+               set = 0;
+
+       gpiod_set_value(priv->termination_gpio, set);
+
+       return 0;
+}
+
+static int can_get_termination(struct net_device *ndev)
+{
+       struct can_priv *priv = netdev_priv(ndev);
+       struct device *dev = ndev->dev.parent;
+       struct gpio_desc *gpio;
+       u32 term;
+       int ret;
+
+       /* Disabling termination by default is the safe choice: Else if many
+        * bus participants enable it, no communication is possible at all.
+        */
+       gpio = devm_gpiod_get_optional(dev, "termination", GPIOD_OUT_LOW);
+       if (IS_ERR(gpio))
+               return dev_err_probe(dev, PTR_ERR(gpio),
+                                    "Cannot get termination-gpios\n");
+
+       if (!gpio)
+               return 0;
+
+       ret = device_property_read_u32(dev, "termination-ohms", &term);
+       if (ret) {
+               netdev_err(ndev, "Cannot get termination-ohms: %pe\n",
+                          ERR_PTR(ret));
+               return ret;
+       }
+
+       if (term > U16_MAX) {
+               netdev_err(ndev, "Invalid termination-ohms value (%u > %u)\n",
+                          term, U16_MAX);
+               return -EINVAL;
+       }
+
+       priv->termination_const_cnt = ARRAY_SIZE(priv->termination_gpio_ohms);
+       priv->termination_const = priv->termination_gpio_ohms;
+       priv->termination_gpio = gpio;
+       priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_DISABLED] =
+               CAN_TERMINATION_DISABLED;
+       priv->termination_gpio_ohms[CAN_TERMINATION_GPIO_ENABLED] = term;
+       priv->do_set_termination = can_set_termination;
+
+       return 0;
+}
+
 /* Register the CAN network device */
 int register_candev(struct net_device *dev)
 {
        struct can_priv *priv = netdev_priv(dev);
+       int err;
 
        /* Ensure termination_const, termination_const_cnt and
         * do_set_termination consistency. All must be either set or
@@ -419,6 +479,12 @@ int register_candev(struct net_device *dev)
        if (!priv->data_bitrate_const != !priv->data_bitrate_const_cnt)
                return -EINVAL;
 
+       if (!priv->termination_const) {
+               err = can_get_termination(dev);
+               if (err)
+                       return err;
+       }
+
        dev->rtnl_link_ops = &can_link_ops;
        netif_carrier_off(dev);
 
index e38c256..8042563 100644 (file)
@@ -47,7 +47,7 @@ static int can_validate(struct nlattr *tb[], struct nlattr *data[],
        }
 
        if (data[IFLA_CAN_DATA_BITTIMING]) {
-               if (!is_can_fd || !data[IFLA_CAN_BITTIMING])
+               if (!is_can_fd)
                        return -EOPNOTSUPP;
        }
 
@@ -116,7 +116,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
                maskedflags = cm->flags & cm->mask;
 
                /* check whether provided bits are allowed to be passed */
-               if (cm->mask & ~(priv->ctrlmode_supported | ctrlstatic))
+               if (maskedflags & ~(priv->ctrlmode_supported | ctrlstatic))
                        return -EOPNOTSUPP;
 
                /* do not check for static fd-non-iso if 'fd' is disabled */
@@ -132,10 +132,13 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
                priv->ctrlmode |= maskedflags;
 
                /* CAN_CTRLMODE_FD can only be set when driver supports FD */
-               if (priv->ctrlmode & CAN_CTRLMODE_FD)
+               if (priv->ctrlmode & CAN_CTRLMODE_FD) {
                        dev->mtu = CANFD_MTU;
-               else
+               } else {
                        dev->mtu = CAN_MTU;
+                       memset(&priv->data_bittiming, 0,
+                              sizeof(priv->data_bittiming));
+               }
        }
 
        if (data[IFLA_CAN_RESTART_MS]) {
index ab2c154..37b0cc6 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2014      Protonic Holland,
  *                         David Jander
- * Copyright (C) 2014-2017 Pengutronix,
+ * Copyright (C) 2014-2021 Pengutronix,
  *                         Marc Kleine-Budde <kernel@pengutronix.de>
  */
 
@@ -174,10 +174,8 @@ can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n)
 int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload,
                                         u64 pending)
 {
-       struct sk_buff_head skb_queue;
        unsigned int i;
-
-       __skb_queue_head_init(&skb_queue);
+       int received = 0;
 
        for (i = offload->mb_first;
             can_rx_offload_le(offload, i, offload->mb_last);
@@ -191,26 +189,12 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload,
                if (IS_ERR_OR_NULL(skb))
                        continue;
 
-               __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare);
-       }
-
-       if (!skb_queue_empty(&skb_queue)) {
-               unsigned long flags;
-               u32 queue_len;
-
-               spin_lock_irqsave(&offload->skb_queue.lock, flags);
-               skb_queue_splice_tail(&skb_queue, &offload->skb_queue);
-               spin_unlock_irqrestore(&offload->skb_queue.lock, flags);
-
-               queue_len = skb_queue_len(&offload->skb_queue);
-               if (queue_len > offload->skb_queue_len_max / 8)
-                       netdev_dbg(offload->dev, "%s: queue_len=%d\n",
-                                  __func__, queue_len);
-
-               can_rx_offload_schedule(offload);
+               __skb_queue_add_sort(&offload->skb_irq_queue, skb,
+                                    can_rx_offload_compare);
+               received++;
        }
 
-       return skb_queue_len(&skb_queue);
+       return received;
 }
 EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_timestamp);
 
@@ -226,13 +210,10 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload)
                if (!skb)
                        break;
 
-               skb_queue_tail(&offload->skb_queue, skb);
+               __skb_queue_tail(&offload->skb_irq_queue, skb);
                received++;
        }
 
-       if (received)
-               can_rx_offload_schedule(offload);
-
        return received;
 }
 EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_fifo);
@@ -241,7 +222,6 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
                                struct sk_buff *skb, u32 timestamp)
 {
        struct can_rx_offload_cb *cb;
-       unsigned long flags;
 
        if (skb_queue_len(&offload->skb_queue) >
            offload->skb_queue_len_max) {
@@ -252,11 +232,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
        cb = can_rx_offload_get_cb(skb);
        cb->timestamp = timestamp;
 
-       spin_lock_irqsave(&offload->skb_queue.lock, flags);
-       __skb_queue_add_sort(&offload->skb_queue, skb, can_rx_offload_compare);
-       spin_unlock_irqrestore(&offload->skb_queue.lock, flags);
-
-       can_rx_offload_schedule(offload);
+       __skb_queue_add_sort(&offload->skb_irq_queue, skb,
+                            can_rx_offload_compare);
 
        return 0;
 }
@@ -295,13 +272,56 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload,
                return -ENOBUFS;
        }
 
-       skb_queue_tail(&offload->skb_queue, skb);
-       can_rx_offload_schedule(offload);
+       __skb_queue_tail(&offload->skb_irq_queue, skb);
 
        return 0;
 }
 EXPORT_SYMBOL_GPL(can_rx_offload_queue_tail);
 
+void can_rx_offload_irq_finish(struct can_rx_offload *offload)
+{
+       unsigned long flags;
+       int queue_len;
+
+       if (skb_queue_empty_lockless(&offload->skb_irq_queue))
+               return;
+
+       spin_lock_irqsave(&offload->skb_queue.lock, flags);
+       skb_queue_splice_tail_init(&offload->skb_irq_queue, &offload->skb_queue);
+       spin_unlock_irqrestore(&offload->skb_queue.lock, flags);
+
+       queue_len = skb_queue_len(&offload->skb_queue);
+       if (queue_len > offload->skb_queue_len_max / 8)
+               netdev_dbg(offload->dev, "%s: queue_len=%d\n",
+                          __func__, queue_len);
+
+       napi_schedule(&offload->napi);
+}
+EXPORT_SYMBOL_GPL(can_rx_offload_irq_finish);
+
+void can_rx_offload_threaded_irq_finish(struct can_rx_offload *offload)
+{
+       unsigned long flags;
+       int queue_len;
+
+       if (skb_queue_empty_lockless(&offload->skb_irq_queue))
+               return;
+
+       spin_lock_irqsave(&offload->skb_queue.lock, flags);
+       skb_queue_splice_tail_init(&offload->skb_irq_queue, &offload->skb_queue);
+       spin_unlock_irqrestore(&offload->skb_queue.lock, flags);
+
+       queue_len = skb_queue_len(&offload->skb_queue);
+       if (queue_len > offload->skb_queue_len_max / 8)
+               netdev_dbg(offload->dev, "%s: queue_len=%d\n",
+                          __func__, queue_len);
+
+       local_bh_disable();
+       napi_schedule(&offload->napi);
+       local_bh_enable();
+}
+EXPORT_SYMBOL_GPL(can_rx_offload_threaded_irq_finish);
+
 static int can_rx_offload_init_queue(struct net_device *dev,
                                     struct can_rx_offload *offload,
                                     unsigned int weight)
@@ -312,6 +332,7 @@ static int can_rx_offload_init_queue(struct net_device *dev,
        offload->skb_queue_len_max = 2 << fls(weight);
        offload->skb_queue_len_max *= 4;
        skb_queue_head_init(&offload->skb_queue);
+       __skb_queue_head_init(&offload->skb_irq_queue);
 
        netif_napi_add(dev, &offload->napi, can_rx_offload_napi_poll, weight);
 
@@ -373,5 +394,6 @@ void can_rx_offload_del(struct can_rx_offload *offload)
 {
        netif_napi_del(&offload->napi);
        skb_queue_purge(&offload->skb_queue);
+       __skb_queue_purge(&offload->skb_irq_queue);
 }
 EXPORT_SYMBOL_GPL(can_rx_offload_del);
index 57f3635..7734229 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
+#include <linux/can/platform/flexcan.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 /* FLEXCAN hardware feature flags
  *
  * Below is some version info we got:
- *    SOC   Version   IP-Version  Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece-   FD Mode
+ *    SOC   Version   IP-Version  Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece-   FD Mode     MB
  *                                Filter? connected?  Passive detection  ption in MB Supported?
- *   MX25  FlexCAN2  03.00.00.00     no        no        no       no        no           no
- *   MX28  FlexCAN2  03.00.04.00    yes       yes        no       no        no           no
- *   MX35  FlexCAN2  03.00.00.00     no        no        no       no        no           no
- *   MX53  FlexCAN2  03.00.00.00    yes        no        no       no        no           no
- *   MX6s  FlexCAN3  10.00.12.00    yes       yes        no       no       yes           no
- *   MX8QM FlexCAN3  03.00.23.00    yes       yes        no       no       yes          yes
- *   MX8MP FlexCAN3  03.00.17.01    yes       yes        no      yes       yes          yes
- *   VF610 FlexCAN3  ?               no       yes        no      yes       yes?          no
- * LS1021A FlexCAN2  03.00.04.00     no       yes        no       no       yes           no
- * LX2160A FlexCAN3  03.00.23.00     no       yes        no      yes       yes          yes
+ * MCF5441X FlexCAN2  ?               no       yes        no       no       yes           no     16
+ *    MX25  FlexCAN2  03.00.00.00     no        no        no       no        no           no     64
+ *    MX28  FlexCAN2  03.00.04.00    yes       yes        no       no        no           no     64
+ *    MX35  FlexCAN2  03.00.00.00     no        no        no       no        no           no     64
+ *    MX53  FlexCAN2  03.00.00.00    yes        no        no       no        no           no     64
+ *    MX6s  FlexCAN3  10.00.12.00    yes       yes        no       no       yes           no     64
+ *    MX8QM FlexCAN3  03.00.23.00    yes       yes        no       no       yes          yes     64
+ *    MX8MP FlexCAN3  03.00.17.01    yes       yes        no      yes       yes          yes     64
+ *    VF610 FlexCAN3  ?               no       yes        no      yes       yes?          no     64
+ *  LS1021A FlexCAN2  03.00.04.00     no       yes        no       no       yes           no     64
+ *  LX2160A FlexCAN3  03.00.23.00     no       yes        no      yes       yes          yes     64
  *
  * Some SOCs do not have the RX_WARN & TX_WARN interrupt line connected.
  */
 #define FLEXCAN_QUIRK_SUPPORT_ECC BIT(10)
 /* Setup stop mode with SCU firmware to support wakeup */
 #define FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW BIT(11)
+/* Setup 3 separate interrupts, main, boff and err */
+#define FLEXCAN_QUIRK_NR_IRQ_3 BIT(12)
+/* Setup 16 mailboxes */
+#define FLEXCAN_QUIRK_NR_MB_16 BIT(13)
 
 /* Structure of the message buffer */
 struct flexcan_mb {
@@ -363,6 +369,9 @@ struct flexcan_priv {
        struct regulator *reg_xceiver;
        struct flexcan_stop_mode stm;
 
+       int irq_boff;
+       int irq_err;
+
        /* IPC handle when setup stop mode by System Controller firmware(scfw) */
        struct imx_sc_ipc *sc_ipc_handle;
 
@@ -371,6 +380,11 @@ struct flexcan_priv {
        void (*write)(u32 val, void __iomem *addr);
 };
 
+static const struct flexcan_devtype_data fsl_mcf5441x_devtype_data = {
+       .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE |
+               FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16,
+};
+
 static const struct flexcan_devtype_data fsl_p1010_devtype_data = {
        .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE |
                FLEXCAN_QUIRK_BROKEN_PERR_STATE |
@@ -635,15 +649,19 @@ static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv)
 
 static int flexcan_clks_enable(const struct flexcan_priv *priv)
 {
-       int err;
+       int err = 0;
 
-       err = clk_prepare_enable(priv->clk_ipg);
-       if (err)
-               return err;
+       if (priv->clk_ipg) {
+               err = clk_prepare_enable(priv->clk_ipg);
+               if (err)
+                       return err;
+       }
 
-       err = clk_prepare_enable(priv->clk_per);
-       if (err)
-               clk_disable_unprepare(priv->clk_ipg);
+       if (priv->clk_per) {
+               err = clk_prepare_enable(priv->clk_per);
+               if (err)
+                       clk_disable_unprepare(priv->clk_ipg);
+       }
 
        return err;
 }
@@ -1198,6 +1216,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
                }
        }
 
+       if (handled)
+               can_rx_offload_irq_finish(&priv->offload);
+
        return handled;
 }
 
@@ -1401,8 +1422,12 @@ static int flexcan_rx_offload_setup(struct net_device *dev)
                priv->mb_size = sizeof(struct flexcan_mb) + CANFD_MAX_DLEN;
        else
                priv->mb_size = sizeof(struct flexcan_mb) + CAN_MAX_DLEN;
-       priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) +
-                        (sizeof(priv->regs->mb[1]) / priv->mb_size);
+
+       if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_MB_16)
+               priv->mb_count = 16;
+       else
+               priv->mb_count = (sizeof(priv->regs->mb[0]) / priv->mb_size) +
+                                (sizeof(priv->regs->mb[1]) / priv->mb_size);
 
        if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
                priv->tx_mb_reserved =
@@ -1774,6 +1799,18 @@ static int flexcan_open(struct net_device *dev)
        if (err)
                goto out_can_rx_offload_disable;
 
+       if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+               err = request_irq(priv->irq_boff,
+                                 flexcan_irq, IRQF_SHARED, dev->name, dev);
+               if (err)
+                       goto out_free_irq;
+
+               err = request_irq(priv->irq_err,
+                                 flexcan_irq, IRQF_SHARED, dev->name, dev);
+               if (err)
+                       goto out_free_irq_boff;
+       }
+
        flexcan_chip_interrupts_enable(dev);
 
        can_led_event(dev, CAN_LED_EVENT_OPEN);
@@ -1782,6 +1819,10 @@ static int flexcan_open(struct net_device *dev)
 
        return 0;
 
+ out_free_irq_boff:
+       free_irq(priv->irq_boff, dev);
+ out_free_irq:
+       free_irq(dev->irq, dev);
  out_can_rx_offload_disable:
        can_rx_offload_disable(&priv->offload);
        flexcan_chip_stop(dev);
@@ -1803,6 +1844,12 @@ static int flexcan_close(struct net_device *dev)
 
        netif_stop_queue(dev);
        flexcan_chip_interrupts_disable(dev);
+
+       if (priv->devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+               free_irq(priv->irq_err, dev);
+               free_irq(priv->irq_boff, dev);
+       }
+
        free_irq(dev->irq, dev);
        can_rx_offload_disable(&priv->offload);
        flexcan_chip_stop_disable_on_error(dev);
@@ -2039,14 +2086,26 @@ static const struct of_device_id flexcan_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, flexcan_of_match);
 
+static const struct platform_device_id flexcan_id_table[] = {
+       {
+               .name = "flexcan-mcf5441x",
+               .driver_data = (kernel_ulong_t)&fsl_mcf5441x_devtype_data,
+       }, {
+               /* sentinel */
+       },
+};
+MODULE_DEVICE_TABLE(platform, flexcan_id_table);
+
 static int flexcan_probe(struct platform_device *pdev)
 {
+       const struct of_device_id *of_id;
        const struct flexcan_devtype_data *devtype_data;
        struct net_device *dev;
        struct flexcan_priv *priv;
        struct regulator *reg_xceiver;
        struct clk *clk_ipg = NULL, *clk_per = NULL;
        struct flexcan_regs __iomem *regs;
+       struct flexcan_platform_data *pdata;
        int err, irq;
        u8 clk_src = 1;
        u32 clock_freq = 0;
@@ -2064,6 +2123,12 @@ static int flexcan_probe(struct platform_device *pdev)
                                     "clock-frequency", &clock_freq);
                of_property_read_u8(pdev->dev.of_node,
                                    "fsl,clk-source", &clk_src);
+       } else {
+               pdata = dev_get_platdata(&pdev->dev);
+               if (pdata) {
+                       clock_freq = pdata->clock_frequency;
+                       clk_src = pdata->clk_src;
+               }
        }
 
        if (!clock_freq) {
@@ -2089,7 +2154,14 @@ static int flexcan_probe(struct platform_device *pdev)
        if (IS_ERR(regs))
                return PTR_ERR(regs);
 
-       devtype_data = of_device_get_match_data(&pdev->dev);
+       of_id = of_match_device(flexcan_of_match, &pdev->dev);
+       if (of_id)
+               devtype_data = of_id->data;
+       else if (platform_get_device_id(pdev)->driver_data)
+               devtype_data = (struct flexcan_devtype_data *)
+                       platform_get_device_id(pdev)->driver_data;
+       else
+               return -ENODEV;
 
        if ((devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) &&
            !(devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)) {
@@ -2133,6 +2205,19 @@ static int flexcan_probe(struct platform_device *pdev)
        priv->devtype_data = devtype_data;
        priv->reg_xceiver = reg_xceiver;
 
+       if (devtype_data->quirks & FLEXCAN_QUIRK_NR_IRQ_3) {
+               priv->irq_boff = platform_get_irq(pdev, 1);
+               if (priv->irq_boff <= 0) {
+                       err = -ENODEV;
+                       goto failed_platform_get_irq;
+               }
+               priv->irq_err = platform_get_irq(pdev, 2);
+               if (priv->irq_err <= 0) {
+                       err = -ENODEV;
+                       goto failed_platform_get_irq;
+               }
+       }
+
        if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SUPPORT_FD) {
                priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD |
                        CAN_CTRLMODE_FD_NON_ISO;
@@ -2170,6 +2255,7 @@ static int flexcan_probe(struct platform_device *pdev)
  failed_register:
        pm_runtime_put_noidle(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
+ failed_platform_get_irq:
        free_candev(dev);
        return err;
 }
@@ -2322,6 +2408,7 @@ static struct platform_driver flexcan_driver = {
        },
        .probe = flexcan_probe,
        .remove = flexcan_remove,
+       .id_table = flexcan_id_table,
 };
 
 module_platform_driver(flexcan_driver);
index 2a6c918..c68ad56 100644 (file)
@@ -1815,9 +1815,9 @@ static int ican3_get_berr_counter(const struct net_device *ndev,
  * Sysfs Attributes
  */
 
-static ssize_t ican3_sysfs_show_term(struct device *dev,
-                                    struct device_attribute *attr,
-                                    char *buf)
+static ssize_t termination_show(struct device *dev,
+                               struct device_attribute *attr,
+                               char *buf)
 {
        struct ican3_dev *mod = netdev_priv(to_net_dev(dev));
        int ret;
@@ -1834,9 +1834,9 @@ static ssize_t ican3_sysfs_show_term(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%u\n", mod->termination_enabled);
 }
 
-static ssize_t ican3_sysfs_set_term(struct device *dev,
-                                   struct device_attribute *attr,
-                                   const char *buf, size_t count)
+static ssize_t termination_store(struct device *dev,
+                                struct device_attribute *attr,
+                                const char *buf, size_t count)
 {
        struct ican3_dev *mod = netdev_priv(to_net_dev(dev));
        unsigned long enable;
@@ -1852,18 +1852,17 @@ static ssize_t ican3_sysfs_set_term(struct device *dev,
        return count;
 }
 
-static ssize_t ican3_sysfs_show_fwinfo(struct device *dev,
-                                      struct device_attribute *attr,
-                                      char *buf)
+static ssize_t fwinfo_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
 {
        struct ican3_dev *mod = netdev_priv(to_net_dev(dev));
 
        return scnprintf(buf, PAGE_SIZE, "%s\n", mod->fwinfo);
 }
 
-static DEVICE_ATTR(termination, 0644, ican3_sysfs_show_term,
-                  ican3_sysfs_set_term);
-static DEVICE_ATTR(fwinfo, 0444, ican3_sysfs_show_fwinfo, NULL);
+static DEVICE_ATTR_RW(termination);
+static DEVICE_ATTR_RO(fwinfo);
 
 static struct attribute *ican3_sysfs_attrs[] = {
        &dev_attr_termination.attr,
index 43bca31..2470c47 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/iopoll.h>
 #include <linux/can/dev.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/phy/phy.h>
 
 #include "m_can.h"
 
@@ -278,7 +279,7 @@ enum m_can_reg {
 /* Message RAM Elements */
 #define M_CAN_FIFO_ID          0x0
 #define M_CAN_FIFO_DLC         0x4
-#define M_CAN_FIFO_DATA(n)     (0x8 + ((n) << 2))
+#define M_CAN_FIFO_DATA                0x8
 
 /* Rx Buffer Element */
 /* R0 */
@@ -308,6 +309,15 @@ enum m_can_reg {
 #define TX_EVENT_MM_MASK       GENMASK(31, 24)
 #define TX_EVENT_TXTS_MASK     GENMASK(15, 0)
 
+/* The ID and DLC registers are adjacent in M_CAN FIFO memory,
+ * and we can save a (potentially slow) bus round trip by combining
+ * reads and writes to them.
+ */
+struct id_and_dlc {
+       u32 id;
+       u32 dlc;
+};
+
 static inline u32 m_can_read(struct m_can_classdev *cdev, enum m_can_reg reg)
 {
        return cdev->ops->read_reg(cdev, reg);
@@ -319,36 +329,39 @@ static inline void m_can_write(struct m_can_classdev *cdev, enum m_can_reg reg,
        cdev->ops->write_reg(cdev, reg, val);
 }
 
-static u32 m_can_fifo_read(struct m_can_classdev *cdev,
-                          u32 fgi, unsigned int offset)
+static int
+m_can_fifo_read(struct m_can_classdev *cdev,
+               u32 fgi, unsigned int offset, void *val, size_t val_count)
 {
        u32 addr_offset = cdev->mcfg[MRAM_RXF0].off + fgi * RXF0_ELEMENT_SIZE +
                offset;
 
-       return cdev->ops->read_fifo(cdev, addr_offset);
+       return cdev->ops->read_fifo(cdev, addr_offset, val, val_count);
 }
 
-static void m_can_fifo_write(struct m_can_classdev *cdev,
-                            u32 fpi, unsigned int offset, u32 val)
+static int
+m_can_fifo_write(struct m_can_classdev *cdev,
+                u32 fpi, unsigned int offset, const void *val, size_t val_count)
 {
        u32 addr_offset = cdev->mcfg[MRAM_TXB].off + fpi * TXB_ELEMENT_SIZE +
                offset;
 
-       cdev->ops->write_fifo(cdev, addr_offset, val);
+       return cdev->ops->write_fifo(cdev, addr_offset, val, val_count);
 }
 
-static inline void m_can_fifo_write_no_off(struct m_can_classdev *cdev,
-                                          u32 fpi, u32 val)
+static inline int m_can_fifo_write_no_off(struct m_can_classdev *cdev,
+                                         u32 fpi, u32 val)
 {
-       cdev->ops->write_fifo(cdev, fpi, val);
+       return cdev->ops->write_fifo(cdev, fpi, &val, 1);
 }
 
-static u32 m_can_txe_fifo_read(struct m_can_classdev *cdev, u32 fgi, u32 offset)
+static int
+m_can_txe_fifo_read(struct m_can_classdev *cdev, u32 fgi, u32 offset, u32 *val)
 {
        u32 addr_offset = cdev->mcfg[MRAM_TXE].off + fgi * TXE_ELEMENT_SIZE +
                offset;
 
-       return cdev->ops->read_fifo(cdev, addr_offset);
+       return cdev->ops->read_fifo(cdev, addr_offset, val, 1);
 }
 
 static inline bool m_can_tx_fifo_full(struct m_can_classdev *cdev)
@@ -436,7 +449,7 @@ static void m_can_clean(struct net_device *net)
  * napi. For non-peripherals, RX is done in napi already, so push
  * directly. timestamp is used to ensure good skb ordering in
  * rx-offload and is ignored for non-peripherals.
-*/
+ */
 static void m_can_receive_skb(struct m_can_classdev *cdev,
                              struct sk_buff *skb,
                              u32 timestamp)
@@ -454,54 +467,57 @@ static void m_can_receive_skb(struct m_can_classdev *cdev,
        }
 }
 
-static void m_can_read_fifo(struct net_device *dev, u32 rxfs)
+static int m_can_read_fifo(struct net_device *dev, u32 rxfs)
 {
        struct net_device_stats *stats = &dev->stats;
        struct m_can_classdev *cdev = netdev_priv(dev);
        struct canfd_frame *cf;
        struct sk_buff *skb;
-       u32 id, fgi, dlc;
+       struct id_and_dlc fifo_header;
+       u32 fgi;
        u32 timestamp = 0;
-       int i;
+       int err;
 
        /* calculate the fifo get index for where to read data */
        fgi = FIELD_GET(RXFS_FGI_MASK, rxfs);
-       dlc = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_DLC);
-       if (dlc & RX_BUF_FDF)
+       err = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_ID, &fifo_header, 2);
+       if (err)
+               goto out_fail;
+
+       if (fifo_header.dlc & RX_BUF_FDF)
                skb = alloc_canfd_skb(dev, &cf);
        else
                skb = alloc_can_skb(dev, (struct can_frame **)&cf);
        if (!skb) {
                stats->rx_dropped++;
-               return;
+               return 0;
        }
 
-       if (dlc & RX_BUF_FDF)
-               cf->len = can_fd_dlc2len((dlc >> 16) & 0x0F);
+       if (fifo_header.dlc & RX_BUF_FDF)
+               cf->len = can_fd_dlc2len((fifo_header.dlc >> 16) & 0x0F);
        else
-               cf->len = can_cc_dlc2len((dlc >> 16) & 0x0F);
+               cf->len = can_cc_dlc2len((fifo_header.dlc >> 16) & 0x0F);
 
-       id = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_ID);
-       if (id & RX_BUF_XTD)
-               cf->can_id = (id & CAN_EFF_MASK) | CAN_EFF_FLAG;
+       if (fifo_header.id & RX_BUF_XTD)
+               cf->can_id = (fifo_header.id & CAN_EFF_MASK) | CAN_EFF_FLAG;
        else
-               cf->can_id = (id >> 18) & CAN_SFF_MASK;
+               cf->can_id = (fifo_header.id >> 18) & CAN_SFF_MASK;
 
-       if (id & RX_BUF_ESI) {
+       if (fifo_header.id & RX_BUF_ESI) {
                cf->flags |= CANFD_ESI;
                netdev_dbg(dev, "ESI Error\n");
        }
 
-       if (!(dlc & RX_BUF_FDF) && (id & RX_BUF_RTR)) {
+       if (!(fifo_header.dlc & RX_BUF_FDF) && (fifo_header.id & RX_BUF_RTR)) {
                cf->can_id |= CAN_RTR_FLAG;
        } else {
-               if (dlc & RX_BUF_BRS)
+               if (fifo_header.dlc & RX_BUF_BRS)
                        cf->flags |= CANFD_BRS;
 
-               for (i = 0; i < cf->len; i += 4)
-                       *(u32 *)(cf->data + i) =
-                               m_can_fifo_read(cdev, fgi,
-                                               M_CAN_FIFO_DATA(i / 4));
+               err = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_DATA,
+                                     cf->data, DIV_ROUND_UP(cf->len, 4));
+               if (err)
+                       goto out_fail;
        }
 
        /* acknowledge rx fifo 0 */
@@ -510,9 +526,15 @@ static void m_can_read_fifo(struct net_device *dev, u32 rxfs)
        stats->rx_packets++;
        stats->rx_bytes += cf->len;
 
-       timestamp = FIELD_GET(RX_BUF_RXTS_MASK, dlc);
+       timestamp = FIELD_GET(RX_BUF_RXTS_MASK, fifo_header.dlc);
 
        m_can_receive_skb(cdev, skb, timestamp);
+
+       return 0;
+
+out_fail:
+       netdev_err(dev, "FIFO read returned %d\n", err);
+       return err;
 }
 
 static int m_can_do_rx_poll(struct net_device *dev, int quota)
@@ -520,6 +542,7 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota)
        struct m_can_classdev *cdev = netdev_priv(dev);
        u32 pkts = 0;
        u32 rxfs;
+       int err;
 
        rxfs = m_can_read(cdev, M_CAN_RXF0S);
        if (!(rxfs & RXFS_FFL_MASK)) {
@@ -528,7 +551,9 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota)
        }
 
        while ((rxfs & RXFS_FFL_MASK) && (quota > 0)) {
-               m_can_read_fifo(dev, rxfs);
+               err = m_can_read_fifo(dev, rxfs);
+               if (err)
+                       return err;
 
                quota--;
                pkts++;
@@ -874,6 +899,7 @@ static int m_can_handle_bus_errors(struct net_device *dev, u32 irqstatus,
 static int m_can_rx_handler(struct net_device *dev, int quota)
 {
        struct m_can_classdev *cdev = netdev_priv(dev);
+       int rx_work_or_err;
        int work_done = 0;
        u32 irqstatus, psr;
 
@@ -910,8 +936,13 @@ static int m_can_rx_handler(struct net_device *dev, int quota)
        if (irqstatus & IR_ERR_BUS_30X)
                work_done += m_can_handle_bus_errors(dev, irqstatus, psr);
 
-       if (irqstatus & IR_RF0N)
-               work_done += m_can_do_rx_poll(dev, (quota - work_done));
+       if (irqstatus & IR_RF0N) {
+               rx_work_or_err = m_can_do_rx_poll(dev, (quota - work_done));
+               if (rx_work_or_err < 0)
+                       return rx_work_or_err;
+
+               work_done += rx_work_or_err;
+       }
 end:
        return work_done;
 }
@@ -919,12 +950,17 @@ end:
 static int m_can_rx_peripheral(struct net_device *dev)
 {
        struct m_can_classdev *cdev = netdev_priv(dev);
+       int work_done;
 
-       m_can_rx_handler(dev, M_CAN_NAPI_WEIGHT);
+       work_done = m_can_rx_handler(dev, M_CAN_NAPI_WEIGHT);
 
-       m_can_enable_all_interrupts(cdev);
+       /* Don't re-enable interrupts if the driver had a fatal error
+        * (e.g., FIFO read failure).
+        */
+       if (work_done >= 0)
+               m_can_enable_all_interrupts(cdev);
 
-       return 0;
+       return work_done;
 }
 
 static int m_can_poll(struct napi_struct *napi, int quota)
@@ -934,7 +970,11 @@ static int m_can_poll(struct napi_struct *napi, int quota)
        int work_done;
 
        work_done = m_can_rx_handler(dev, quota);
-       if (work_done < quota) {
+
+       /* Don't re-enable interrupts if the driver had a fatal error
+        * (e.g., FIFO read failure).
+        */
+       if (work_done >= 0 && work_done < quota) {
                napi_complete_done(napi, work_done);
                m_can_enable_all_interrupts(cdev);
        }
@@ -945,7 +985,7 @@ static int m_can_poll(struct napi_struct *napi, int quota)
 /* Echo tx skb and update net stats. Peripherals use rx-offload for
  * echo. timestamp is used for peripherals to ensure correct ordering
  * by rx-offload, and is ignored for non-peripherals.
-*/
+ */
 static void m_can_tx_update_stats(struct m_can_classdev *cdev,
                                  unsigned int msg_mark,
                                  u32 timestamp)
@@ -965,7 +1005,7 @@ static void m_can_tx_update_stats(struct m_can_classdev *cdev,
        stats->tx_packets++;
 }
 
-static void m_can_echo_tx_event(struct net_device *dev)
+static int m_can_echo_tx_event(struct net_device *dev)
 {
        u32 txe_count = 0;
        u32 m_can_txefs;
@@ -984,12 +1024,18 @@ static void m_can_echo_tx_event(struct net_device *dev)
        /* Get and process all sent elements */
        for (i = 0; i < txe_count; i++) {
                u32 txe, timestamp = 0;
+               int err;
 
                /* retrieve get index */
                fgi = FIELD_GET(TXEFS_EFGI_MASK, m_can_read(cdev, M_CAN_TXEFS));
 
                /* get message marker, timestamp */
-               txe = m_can_txe_fifo_read(cdev, fgi, 4);
+               err = m_can_txe_fifo_read(cdev, fgi, 4, &txe);
+               if (err) {
+                       netdev_err(dev, "TXE FIFO read returned %d\n", err);
+                       return err;
+               }
+
                msg_mark = FIELD_GET(TX_EVENT_MM_MASK, txe);
                timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe);
 
@@ -1000,6 +1046,8 @@ static void m_can_echo_tx_event(struct net_device *dev)
                /* update stats */
                m_can_tx_update_stats(cdev, msg_mark, timestamp);
        }
+
+       return 0;
 }
 
 static irqreturn_t m_can_isr(int irq, void *dev_id)
@@ -1031,8 +1079,8 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
                m_can_disable_all_interrupts(cdev);
                if (!cdev->is_peripheral)
                        napi_schedule(&cdev->napi);
-               else
-                       m_can_rx_peripheral(dev);
+               else if (m_can_rx_peripheral(dev) < 0)
+                       goto out_fail;
        }
 
        if (cdev->version == 30) {
@@ -1050,7 +1098,9 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
        } else  {
                if (ir & IR_TEFN) {
                        /* New TX FIFO Element arrived */
-                       m_can_echo_tx_event(dev);
+                       if (m_can_echo_tx_event(dev) != 0)
+                               goto out_fail;
+
                        can_led_event(dev, CAN_LED_EVENT_TX);
                        if (netif_queue_stopped(dev) &&
                            !m_can_tx_fifo_full(cdev))
@@ -1058,6 +1108,13 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
                }
        }
 
+       if (cdev->is_peripheral)
+               can_rx_offload_threaded_irq_finish(&cdev->offload);
+
+       return IRQ_HANDLED;
+
+out_fail:
+       m_can_disable_all_interrupts(cdev);
        return IRQ_HANDLED;
 }
 
@@ -1302,7 +1359,8 @@ static void m_can_chip_config(struct net_device *dev)
        m_can_set_bittiming(dev);
 
        /* enable internal timestamp generation, with a prescalar of 16. The
-        * prescalar is applied to the nominal bit timing */
+        * prescalar is applied to the nominal bit timing
+        */
        m_can_write(cdev, M_CAN_TSCC, FIELD_PREP(TSCC_TCP_MASK, 0xf));
 
        m_can_config_endisable(cdev, false);
@@ -1436,32 +1494,20 @@ static int m_can_dev_setup(struct m_can_classdev *cdev)
        case 30:
                /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.0.x */
                can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
-               cdev->can.bittiming_const = cdev->bit_timing ?
-                       cdev->bit_timing : &m_can_bittiming_const_30X;
-
-               cdev->can.data_bittiming_const = cdev->data_timing ?
-                       cdev->data_timing :
-                       &m_can_data_bittiming_const_30X;
+               cdev->can.bittiming_const = &m_can_bittiming_const_30X;
+               cdev->can.data_bittiming_const = &m_can_data_bittiming_const_30X;
                break;
        case 31:
                /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.1.x */
                can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO);
-               cdev->can.bittiming_const = cdev->bit_timing ?
-                       cdev->bit_timing : &m_can_bittiming_const_31X;
-
-               cdev->can.data_bittiming_const = cdev->data_timing ?
-                       cdev->data_timing :
-                       &m_can_data_bittiming_const_31X;
+               cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+               cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
                break;
        case 32:
        case 33:
                /* Support both MCAN version v3.2.x and v3.3.0 */
-               cdev->can.bittiming_const = cdev->bit_timing ?
-                       cdev->bit_timing : &m_can_bittiming_const_31X;
-
-               cdev->can.data_bittiming_const = cdev->data_timing ?
-                       cdev->data_timing :
-                       &m_can_data_bittiming_const_31X;
+               cdev->can.bittiming_const = &m_can_bittiming_const_31X;
+               cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X;
 
                cdev->can.ctrlmode_supported |=
                        (m_can_niso_supported(cdev) ?
@@ -1518,6 +1564,8 @@ static int m_can_close(struct net_device *dev)
        close_candev(dev);
        can_led_event(dev, CAN_LED_EVENT_STOP);
 
+       phy_power_off(cdev->transceiver);
+
        return 0;
 }
 
@@ -1540,8 +1588,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
        struct canfd_frame *cf = (struct canfd_frame *)cdev->tx_skb->data;
        struct net_device *dev = cdev->net;
        struct sk_buff *skb = cdev->tx_skb;
-       u32 id, cccr, fdflags;
-       int i;
+       struct id_and_dlc fifo_header;
+       u32 cccr, fdflags;
+       int err;
        int putidx;
 
        cdev->tx_skb = NULL;
@@ -1549,27 +1598,29 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
        /* Generate ID field for TX buffer Element */
        /* Common to all supported M_CAN versions */
        if (cf->can_id & CAN_EFF_FLAG) {
-               id = cf->can_id & CAN_EFF_MASK;
-               id |= TX_BUF_XTD;
+               fifo_header.id = cf->can_id & CAN_EFF_MASK;
+               fifo_header.id |= TX_BUF_XTD;
        } else {
-               id = ((cf->can_id & CAN_SFF_MASK) << 18);
+               fifo_header.id = ((cf->can_id & CAN_SFF_MASK) << 18);
        }
 
        if (cf->can_id & CAN_RTR_FLAG)
-               id |= TX_BUF_RTR;
+               fifo_header.id |= TX_BUF_RTR;
 
        if (cdev->version == 30) {
                netif_stop_queue(dev);
 
-               /* message ram configuration */
-               m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, id);
-               m_can_fifo_write(cdev, 0, M_CAN_FIFO_DLC,
-                                can_fd_len2dlc(cf->len) << 16);
+               fifo_header.dlc = can_fd_len2dlc(cf->len) << 16;
+
+               /* Write the frame ID, DLC, and payload to the FIFO element. */
+               err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_ID, &fifo_header, 2);
+               if (err)
+                       goto out_fail;
 
-               for (i = 0; i < cf->len; i += 4)
-                       m_can_fifo_write(cdev, 0,
-                                        M_CAN_FIFO_DATA(i / 4),
-                                        *(u32 *)(cf->data + i));
+               err = m_can_fifo_write(cdev, 0, M_CAN_FIFO_DATA,
+                                      cf->data, DIV_ROUND_UP(cf->len, 4));
+               if (err)
+                       goto out_fail;
 
                can_put_echo_skb(skb, dev, 0, 0);
 
@@ -1613,8 +1664,11 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
                /* get put index for frame */
                putidx = FIELD_GET(TXFQS_TFQPI_MASK,
                                   m_can_read(cdev, M_CAN_TXFQS));
-               /* Write ID Field to FIFO Element */
-               m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID, id);
+
+               /* Construct DLC Field, with CAN-FD configuration.
+                * Use the put index of the fifo as the message marker,
+                * used in the TX interrupt for sending the correct echo frame.
+                */
 
                /* get CAN FD configuration of frame */
                fdflags = 0;
@@ -1624,20 +1678,17 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
                                fdflags |= TX_BUF_BRS;
                }
 
-               /* Construct DLC Field. Also contains CAN-FD configuration
-                * use put index of fifo as message marker
-                * it is used in TX interrupt for
-                * sending the correct echo frame
-                */
-               m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DLC,
-                                FIELD_PREP(TX_BUF_MM_MASK, putidx) |
-                                FIELD_PREP(TX_BUF_DLC_MASK,
-                                           can_fd_len2dlc(cf->len)) |
-                                fdflags | TX_BUF_EFC);
+               fifo_header.dlc = FIELD_PREP(TX_BUF_MM_MASK, putidx) |
+                       FIELD_PREP(TX_BUF_DLC_MASK, can_fd_len2dlc(cf->len)) |
+                       fdflags | TX_BUF_EFC;
+               err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_ID, &fifo_header, 2);
+               if (err)
+                       goto out_fail;
 
-               for (i = 0; i < cf->len; i += 4)
-                       m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DATA(i / 4),
-                                        *(u32 *)(cf->data + i));
+               err = m_can_fifo_write(cdev, putidx, M_CAN_FIFO_DATA,
+                                      cf->data, DIV_ROUND_UP(cf->len, 4));
+               if (err)
+                       goto out_fail;
 
                /* Push loopback echo.
                 * Will be looped back on TX interrupt based on message marker
@@ -1654,6 +1705,11 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
        }
 
        return NETDEV_TX_OK;
+
+out_fail:
+       netdev_err(dev, "FIFO write returned %d\n", err);
+       m_can_disable_all_interrupts(cdev);
+       return NETDEV_TX_BUSY;
 }
 
 static void m_can_tx_work_queue(struct work_struct *ws)
@@ -1703,10 +1759,14 @@ static int m_can_open(struct net_device *dev)
        struct m_can_classdev *cdev = netdev_priv(dev);
        int err;
 
-       err = m_can_clk_start(cdev);
+       err = phy_power_on(cdev->transceiver);
        if (err)
                return err;
 
+       err = m_can_clk_start(cdev);
+       if (err)
+               goto out_phy_power_off;
+
        /* open the can device */
        err = open_candev(dev);
        if (err) {
@@ -1763,6 +1823,8 @@ out_wq_fail:
        close_candev(dev);
 exit_disable_clks:
        m_can_clk_stop(cdev);
+out_phy_power_off:
+       phy_power_off(cdev->transceiver);
        return err;
 }
 
@@ -1819,9 +1881,10 @@ static void m_can_of_parse_mram(struct m_can_classdev *cdev,
                cdev->mcfg[MRAM_TXB].off, cdev->mcfg[MRAM_TXB].num);
 }
 
-void m_can_init_ram(struct m_can_classdev *cdev)
+int m_can_init_ram(struct m_can_classdev *cdev)
 {
        int end, i, start;
+       int err = 0;
 
        /* initialize the entire Message RAM in use to avoid possible
         * ECC/parity checksum errors when reading an uninitialized buffer
@@ -1830,8 +1893,13 @@ void m_can_init_ram(struct m_can_classdev *cdev)
        end = cdev->mcfg[MRAM_TXB].off +
                cdev->mcfg[MRAM_TXB].num * TXB_ELEMENT_SIZE;
 
-       for (i = start; i < end; i += 4)
-               m_can_fifo_write_no_off(cdev, i, 0x0);
+       for (i = start; i < end; i += 4) {
+               err = m_can_fifo_write_no_off(cdev, i, 0x0);
+               if (err)
+                       break;
+       }
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(m_can_init_ram);
 
index ace071c..d18b515 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/iopoll.h>
 #include <linux/can/dev.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/phy/phy.h>
 
 /* m_can lec values */
 enum m_can_lec_type {
@@ -64,9 +65,9 @@ struct m_can_ops {
        int (*clear_interrupts)(struct m_can_classdev *cdev);
        u32 (*read_reg)(struct m_can_classdev *cdev, int reg);
        int (*write_reg)(struct m_can_classdev *cdev, int reg, int val);
-       u32 (*read_fifo)(struct m_can_classdev *cdev, int addr_offset);
+       int (*read_fifo)(struct m_can_classdev *cdev, int addr_offset, void *val, size_t val_count);
        int (*write_fifo)(struct m_can_classdev *cdev, int addr_offset,
-                         int val);
+                         const void *val, size_t val_count);
        int (*init)(struct m_can_classdev *cdev);
 };
 
@@ -82,9 +83,7 @@ struct m_can_classdev {
        struct workqueue_struct *tx_wq;
        struct work_struct tx_work;
        struct sk_buff *tx_skb;
-
-       struct can_bittiming_const *bit_timing;
-       struct can_bittiming_const *data_timing;
+       struct phy *transceiver;
 
        struct m_can_ops *ops;
 
@@ -102,7 +101,7 @@ void m_can_class_free_dev(struct net_device *net);
 int m_can_class_register(struct m_can_classdev *cdev);
 void m_can_class_unregister(struct m_can_classdev *cdev);
 int m_can_class_get_clocks(struct m_can_classdev *cdev);
-void m_can_init_ram(struct m_can_classdev *priv);
+int m_can_init_ram(struct m_can_classdev *priv);
 
 int m_can_class_suspend(struct device *dev);
 int m_can_class_resume(struct device *dev);
index 1288086..89cc3d4 100644 (file)
@@ -39,11 +39,13 @@ static u32 iomap_read_reg(struct m_can_classdev *cdev, int reg)
        return readl(priv->base + reg);
 }
 
-static u32 iomap_read_fifo(struct m_can_classdev *cdev, int offset)
+static int iomap_read_fifo(struct m_can_classdev *cdev, int offset, void *val, size_t val_count)
 {
        struct m_can_pci_priv *priv = cdev_to_priv(cdev);
 
-       return readl(priv->base + offset);
+       ioread32_rep(priv->base + offset, val, val_count);
+
+       return 0;
 }
 
 static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val)
@@ -55,11 +57,12 @@ static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val)
        return 0;
 }
 
-static int iomap_write_fifo(struct m_can_classdev *cdev, int offset, int val)
+static int iomap_write_fifo(struct m_can_classdev *cdev, int offset,
+                           const void *val, size_t val_count)
 {
        struct m_can_pci_priv *priv = cdev_to_priv(cdev);
 
-       writel(val, priv->base + offset);
+       iowrite32_rep(priv->base + offset, val, val_count);
 
        return 0;
 }
index 599de0e..308d4f2 100644 (file)
@@ -6,6 +6,7 @@
 // Copyright (C) 2018-19 Texas Instruments Incorporated - http://www.ti.com/
 
 #include <linux/platform_device.h>
+#include <linux/phy/phy.h>
 
 #include "m_can.h"
 
@@ -28,11 +29,13 @@ static u32 iomap_read_reg(struct m_can_classdev *cdev, int reg)
        return readl(priv->base + reg);
 }
 
-static u32 iomap_read_fifo(struct m_can_classdev *cdev, int offset)
+static int iomap_read_fifo(struct m_can_classdev *cdev, int offset, void *val, size_t val_count)
 {
        struct m_can_plat_priv *priv = cdev_to_priv(cdev);
 
-       return readl(priv->mram_base + offset);
+       ioread32_rep(priv->mram_base + offset, val, val_count);
+
+       return 0;
 }
 
 static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val)
@@ -44,11 +47,12 @@ static int iomap_write_reg(struct m_can_classdev *cdev, int reg, int val)
        return 0;
 }
 
-static int iomap_write_fifo(struct m_can_classdev *cdev, int offset, int val)
+static int iomap_write_fifo(struct m_can_classdev *cdev, int offset,
+                           const void *val, size_t val_count)
 {
        struct m_can_plat_priv *priv = cdev_to_priv(cdev);
 
-       writel(val, priv->mram_base + offset);
+       iowrite32_rep(priv->base + offset, val, val_count);
 
        return 0;
 }
@@ -67,6 +71,7 @@ static int m_can_plat_probe(struct platform_device *pdev)
        struct resource *res;
        void __iomem *addr;
        void __iomem *mram_addr;
+       struct phy *transceiver;
        int irq, ret = 0;
 
        mcan_class = m_can_class_allocate_dev(&pdev->dev,
@@ -80,8 +85,7 @@ static int m_can_plat_probe(struct platform_device *pdev)
        if (ret)
                goto probe_fail;
 
-       res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "m_can");
-       addr = devm_ioremap_resource(&pdev->dev, res);
+       addr = devm_platform_ioremap_resource_byname(pdev, "m_can");
        irq = platform_get_irq_byname(pdev, "int0");
        if (IS_ERR(addr) || irq < 0) {
                ret = -EINVAL;
@@ -101,6 +105,16 @@ static int m_can_plat_probe(struct platform_device *pdev)
                goto probe_fail;
        }
 
+       transceiver = devm_phy_optional_get(&pdev->dev, NULL);
+       if (IS_ERR(transceiver)) {
+               ret = PTR_ERR(transceiver);
+               dev_err_probe(&pdev->dev, ret, "failed to get phy\n");
+               goto probe_fail;
+       }
+
+       if (transceiver)
+               mcan_class->can.bitrate_max = transceiver->attrs.max_link_rate;
+
        priv->base = addr;
        priv->mram_base = mram_addr;
 
@@ -108,6 +122,7 @@ static int m_can_plat_probe(struct platform_device *pdev)
        mcan_class->pm_clock_support = 1;
        mcan_class->can.clock.freq = clk_get_rate(mcan_class->cclk);
        mcan_class->dev = &pdev->dev;
+       mcan_class->transceiver = transceiver;
 
        mcan_class->ops = &m_can_plat_ops;
 
@@ -115,7 +130,9 @@ static int m_can_plat_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, mcan_class);
 
-       m_can_init_ram(mcan_class);
+       ret = m_can_init_ram(mcan_class);
+       if (ret)
+               goto probe_fail;
 
        pm_runtime_enable(mcan_class->dev);
        ret = m_can_class_register(mcan_class);
index 4147cec..04687b1 100644 (file)
 static inline struct tcan4x5x_priv *cdev_to_priv(struct m_can_classdev *cdev)
 {
        return container_of(cdev, struct tcan4x5x_priv, cdev);
-
 }
 
 static void tcan4x5x_check_wake(struct tcan4x5x_priv *priv)
@@ -154,14 +153,12 @@ static u32 tcan4x5x_read_reg(struct m_can_classdev *cdev, int reg)
        return val;
 }
 
-static u32 tcan4x5x_read_fifo(struct m_can_classdev *cdev, int addr_offset)
+static int tcan4x5x_read_fifo(struct m_can_classdev *cdev, int addr_offset,
+                             void *val, size_t val_count)
 {
        struct tcan4x5x_priv *priv = cdev_to_priv(cdev);
-       u32 val;
-
-       regmap_read(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, &val);
 
-       return val;
+       return regmap_bulk_read(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, val, val_count);
 }
 
 static int tcan4x5x_write_reg(struct m_can_classdev *cdev, int reg, int val)
@@ -172,11 +169,11 @@ static int tcan4x5x_write_reg(struct m_can_classdev *cdev, int reg, int val)
 }
 
 static int tcan4x5x_write_fifo(struct m_can_classdev *cdev,
-                              int addr_offset, int val)
+                              int addr_offset, const void *val, size_t val_count)
 {
        struct tcan4x5x_priv *priv = cdev_to_priv(cdev);
 
-       return regmap_write(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, val);
+       return regmap_bulk_write(priv->regmap, TCAN4X5X_MRAM_START + addr_offset, val, val_count);
 }
 
 static int tcan4x5x_power_enable(struct regulator *reg, int enable)
@@ -238,7 +235,9 @@ static int tcan4x5x_init(struct m_can_classdev *cdev)
                return ret;
 
        /* Zero out the MCAN buffers */
-       m_can_init_ram(cdev);
+       ret = m_can_init_ram(cdev);
+       if (ret)
+               return ret;
 
        ret = regmap_update_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG,
                                 TCAN4X5X_MODE_SEL_MASK, TCAN4X5X_MODE_NORMAL);
index e254e04..35892c1 100644 (file)
@@ -279,7 +279,6 @@ static u32 mpc512x_can_get_clock(struct platform_device *ofdev,
 static const struct of_device_id mpc5xxx_can_table[];
 static int mpc5xxx_can_probe(struct platform_device *ofdev)
 {
-       const struct of_device_id *match;
        const struct mpc5xxx_can_data *data;
        struct device_node *np = ofdev->dev.of_node;
        struct net_device *dev;
@@ -289,10 +288,9 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev)
        int irq, mscan_clksrc = 0;
        int err = -ENOMEM;
 
-       match = of_match_device(mpc5xxx_can_table, &ofdev->dev);
-       if (!match)
+       data = of_device_get_match_data(&ofdev->dev);
+       if (!data)
                return -EINVAL;
-       data = match->data;
 
        base = of_iomap(np, 0);
        if (!base) {
@@ -319,7 +317,6 @@ static int mpc5xxx_can_probe(struct platform_device *ofdev)
 
        clock_name = of_get_property(np, "fsl,mscan-clock-source", NULL);
 
-       BUG_ON(!data);
        priv->type = data->type;
        priv->can.clock.freq = data->get_clock(ofdev, clock_name,
                                               &mscan_clksrc);
index 29cabc2..56320a7 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 config CAN_RCAR
        tristate "Renesas R-Car and RZ/G CAN controller"
-       depends on ARCH_RENESAS || ARM
+       depends on ARCH_RENESAS || ARM || COMPILE_TEST
        help
          Say Y here if you want to use CAN controller found on Renesas R-Car
          or RZ/G SoCs.
@@ -11,7 +11,7 @@ config CAN_RCAR
 
 config CAN_RCAR_CANFD
        tristate "Renesas R-Car CAN FD controller"
-       depends on ARCH_RENESAS || ARM
+       depends on ARCH_RENESAS || ARM || COMPILE_TEST
        help
          Say Y here if you want to use CAN FD controller found on
          Renesas R-Car SoCs. The driver puts the controller in CAN FD only
index 311e6ca..c47988d 100644 (file)
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
 #include <linux/iopoll.h>
+#include <linux/reset.h>
 
 #define RCANFD_DRV_NAME                        "rcar_canfd"
 
+enum rcanfd_chip_id {
+       RENESAS_RCAR_GEN3 = 0,
+       RENESAS_RZG2L,
+};
+
 /* Global register bits */
 
 /* RSCFDnCFDGRMCFG */
@@ -513,6 +519,9 @@ struct rcar_canfd_global {
        enum rcar_canfd_fcanclk fcan;   /* CANFD or Ext clock */
        unsigned long channels_mask;    /* Enabled channels mask */
        bool fdmode;                    /* CAN FD or Classical CAN only mode */
+       struct reset_control *rstc1;
+       struct reset_control *rstc2;
+       enum rcanfd_chip_id chip_id;
 };
 
 /* CAN FD mode nominal rate constants */
@@ -1070,38 +1079,70 @@ static void rcar_canfd_tx_done(struct net_device *ndev)
        can_led_event(ndev, CAN_LED_EVENT_TX);
 }
 
+static void rcar_canfd_handle_global_err(struct rcar_canfd_global *gpriv, u32 ch)
+{
+       struct rcar_canfd_channel *priv = gpriv->ch[ch];
+       struct net_device *ndev = priv->ndev;
+       u32 gerfl;
+
+       /* Handle global error interrupts */
+       gerfl = rcar_canfd_read(priv->base, RCANFD_GERFL);
+       if (unlikely(RCANFD_GERFL_ERR(gpriv, gerfl)))
+               rcar_canfd_global_error(ndev);
+}
+
+static irqreturn_t rcar_canfd_global_err_interrupt(int irq, void *dev_id)
+{
+       struct rcar_canfd_global *gpriv = dev_id;
+       u32 ch;
+
+       for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS)
+               rcar_canfd_handle_global_err(gpriv, ch);
+
+       return IRQ_HANDLED;
+}
+
+static void rcar_canfd_handle_global_receive(struct rcar_canfd_global *gpriv, u32 ch)
+{
+       struct rcar_canfd_channel *priv = gpriv->ch[ch];
+       u32 ridx = ch + RCANFD_RFFIFO_IDX;
+       u32 sts;
+
+       /* Handle Rx interrupts */
+       sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx));
+       if (likely(sts & RCANFD_RFSTS_RFIF)) {
+               if (napi_schedule_prep(&priv->napi)) {
+                       /* Disable Rx FIFO interrupts */
+                       rcar_canfd_clear_bit(priv->base,
+                                            RCANFD_RFCC(ridx),
+                                            RCANFD_RFCC_RFIE);
+                       __napi_schedule(&priv->napi);
+               }
+       }
+}
+
+static irqreturn_t rcar_canfd_global_receive_fifo_interrupt(int irq, void *dev_id)
+{
+       struct rcar_canfd_global *gpriv = dev_id;
+       u32 ch;
+
+       for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS)
+               rcar_canfd_handle_global_receive(gpriv, ch);
+
+       return IRQ_HANDLED;
+}
+
 static irqreturn_t rcar_canfd_global_interrupt(int irq, void *dev_id)
 {
        struct rcar_canfd_global *gpriv = dev_id;
-       struct net_device *ndev;
-       struct rcar_canfd_channel *priv;
-       u32 sts, gerfl;
-       u32 ch, ridx;
+       u32 ch;
 
        /* Global error interrupts still indicate a condition specific
         * to a channel. RxFIFO interrupt is a global interrupt.
         */
        for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) {
-               priv = gpriv->ch[ch];
-               ndev = priv->ndev;
-               ridx = ch + RCANFD_RFFIFO_IDX;
-
-               /* Global error interrupts */
-               gerfl = rcar_canfd_read(priv->base, RCANFD_GERFL);
-               if (unlikely(RCANFD_GERFL_ERR(gpriv, gerfl)))
-                       rcar_canfd_global_error(ndev);
-
-               /* Handle Rx interrupts */
-               sts = rcar_canfd_read(priv->base, RCANFD_RFSTS(ridx));
-               if (likely(sts & RCANFD_RFSTS_RFIF)) {
-                       if (napi_schedule_prep(&priv->napi)) {
-                               /* Disable Rx FIFO interrupts */
-                               rcar_canfd_clear_bit(priv->base,
-                                                    RCANFD_RFCC(ridx),
-                                                    RCANFD_RFCC_RFIE);
-                               __napi_schedule(&priv->napi);
-                       }
-               }
+               rcar_canfd_handle_global_err(gpriv, ch);
+               rcar_canfd_handle_global_receive(gpriv, ch);
        }
        return IRQ_HANDLED;
 }
@@ -1139,38 +1180,73 @@ static void rcar_canfd_state_change(struct net_device *ndev,
        }
 }
 
-static irqreturn_t rcar_canfd_channel_interrupt(int irq, void *dev_id)
+static void rcar_canfd_handle_channel_tx(struct rcar_canfd_global *gpriv, u32 ch)
+{
+       struct rcar_canfd_channel *priv = gpriv->ch[ch];
+       struct net_device *ndev = priv->ndev;
+       u32 sts;
+
+       /* Handle Tx interrupts */
+       sts = rcar_canfd_read(priv->base,
+                             RCANFD_CFSTS(ch, RCANFD_CFFIFO_IDX));
+       if (likely(sts & RCANFD_CFSTS_CFTXIF))
+               rcar_canfd_tx_done(ndev);
+}
+
+static irqreturn_t rcar_canfd_channel_tx_interrupt(int irq, void *dev_id)
 {
        struct rcar_canfd_global *gpriv = dev_id;
-       struct net_device *ndev;
-       struct rcar_canfd_channel *priv;
-       u32 sts, ch, cerfl;
+       u32 ch;
+
+       for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS)
+               rcar_canfd_handle_channel_tx(gpriv, ch);
+
+       return IRQ_HANDLED;
+}
+
+static void rcar_canfd_handle_channel_err(struct rcar_canfd_global *gpriv, u32 ch)
+{
+       struct rcar_canfd_channel *priv = gpriv->ch[ch];
+       struct net_device *ndev = priv->ndev;
        u16 txerr, rxerr;
+       u32 sts, cerfl;
+
+       /* Handle channel error interrupts */
+       cerfl = rcar_canfd_read(priv->base, RCANFD_CERFL(ch));
+       sts = rcar_canfd_read(priv->base, RCANFD_CSTS(ch));
+       txerr = RCANFD_CSTS_TECCNT(sts);
+       rxerr = RCANFD_CSTS_RECCNT(sts);
+       if (unlikely(RCANFD_CERFL_ERR(cerfl)))
+               rcar_canfd_error(ndev, cerfl, txerr, rxerr);
+
+       /* Handle state change to lower states */
+       if (unlikely(priv->can.state != CAN_STATE_ERROR_ACTIVE &&
+                    priv->can.state != CAN_STATE_BUS_OFF))
+               rcar_canfd_state_change(ndev, txerr, rxerr);
+}
+
+static irqreturn_t rcar_canfd_channel_err_interrupt(int irq, void *dev_id)
+{
+       struct rcar_canfd_global *gpriv = dev_id;
+       u32 ch;
+
+       for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS)
+               rcar_canfd_handle_channel_err(gpriv, ch);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t rcar_canfd_channel_interrupt(int irq, void *dev_id)
+{
+       struct rcar_canfd_global *gpriv = dev_id;
+       u32 ch;
 
        /* Common FIFO is a per channel resource */
        for_each_set_bit(ch, &gpriv->channels_mask, RCANFD_NUM_CHANNELS) {
-               priv = gpriv->ch[ch];
-               ndev = priv->ndev;
-
-               /* Channel error interrupts */
-               cerfl = rcar_canfd_read(priv->base, RCANFD_CERFL(ch));
-               sts = rcar_canfd_read(priv->base, RCANFD_CSTS(ch));
-               txerr = RCANFD_CSTS_TECCNT(sts);
-               rxerr = RCANFD_CSTS_RECCNT(sts);
-               if (unlikely(RCANFD_CERFL_ERR(cerfl)))
-                       rcar_canfd_error(ndev, cerfl, txerr, rxerr);
-
-               /* Handle state change to lower states */
-               if (unlikely((priv->can.state != CAN_STATE_ERROR_ACTIVE) &&
-                            (priv->can.state != CAN_STATE_BUS_OFF)))
-                       rcar_canfd_state_change(ndev, txerr, rxerr);
-
-               /* Handle Tx interrupts */
-               sts = rcar_canfd_read(priv->base,
-                                     RCANFD_CFSTS(ch, RCANFD_CFFIFO_IDX));
-               if (likely(sts & RCANFD_CFSTS_CFTXIF))
-                       rcar_canfd_tx_done(ndev);
+               rcar_canfd_handle_channel_err(gpriv, ch);
+               rcar_canfd_handle_channel_tx(gpriv, ch);
        }
+
        return IRQ_HANDLED;
 }
 
@@ -1577,6 +1653,53 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch,
        priv->can.clock.freq = fcan_freq;
        dev_info(&pdev->dev, "can_clk rate is %u\n", priv->can.clock.freq);
 
+       if (gpriv->chip_id == RENESAS_RZG2L) {
+               char *irq_name;
+               int err_irq;
+               int tx_irq;
+
+               err_irq = platform_get_irq_byname(pdev, ch == 0 ? "ch0_err" : "ch1_err");
+               if (err_irq < 0) {
+                       err = err_irq;
+                       goto fail;
+               }
+
+               tx_irq = platform_get_irq_byname(pdev, ch == 0 ? "ch0_trx" : "ch1_trx");
+               if (tx_irq < 0) {
+                       err = tx_irq;
+                       goto fail;
+               }
+
+               irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+                                         "canfd.ch%d_err", ch);
+               if (!irq_name) {
+                       err = -ENOMEM;
+                       goto fail;
+               }
+               err = devm_request_irq(&pdev->dev, err_irq,
+                                      rcar_canfd_channel_err_interrupt, 0,
+                                      irq_name, gpriv);
+               if (err) {
+                       dev_err(&pdev->dev, "devm_request_irq CH Err(%d) failed, error %d\n",
+                               err_irq, err);
+                       goto fail;
+               }
+               irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+                                         "canfd.ch%d_trx", ch);
+               if (!irq_name) {
+                       err = -ENOMEM;
+                       goto fail;
+               }
+               err = devm_request_irq(&pdev->dev, tx_irq,
+                                      rcar_canfd_channel_tx_interrupt, 0,
+                                      irq_name, gpriv);
+               if (err) {
+                       dev_err(&pdev->dev, "devm_request_irq Tx (%d) failed, error %d\n",
+                               tx_irq, err);
+                       goto fail;
+               }
+       }
+
        if (gpriv->fdmode) {
                priv->can.bittiming_const = &rcar_canfd_nom_bittiming_const;
                priv->can.data_bittiming_const =
@@ -1636,7 +1759,11 @@ static int rcar_canfd_probe(struct platform_device *pdev)
        struct device_node *of_child;
        unsigned long channels_mask = 0;
        int err, ch_irq, g_irq;
+       int g_err_irq, g_recc_irq;
        bool fdmode = true;                     /* CAN FD only mode - default */
+       enum rcanfd_chip_id chip_id;
+
+       chip_id = (uintptr_t)of_device_get_match_data(&pdev->dev);
 
        if (of_property_read_bool(pdev->dev.of_node, "renesas,no-can-fd"))
                fdmode = false;                 /* Classical CAN only mode */
@@ -1649,16 +1776,30 @@ static int rcar_canfd_probe(struct platform_device *pdev)
        if (of_child && of_device_is_available(of_child))
                channels_mask |= BIT(1);        /* Channel 1 */
 
-       ch_irq = platform_get_irq(pdev, 0);
-       if (ch_irq < 0) {
-               err = ch_irq;
-               goto fail_dev;
-       }
+       if (chip_id == RENESAS_RCAR_GEN3) {
+               ch_irq = platform_get_irq_byname_optional(pdev, "ch_int");
+               if (ch_irq < 0) {
+                       /* For backward compatibility get irq by index */
+                       ch_irq = platform_get_irq(pdev, 0);
+                       if (ch_irq < 0)
+                               return ch_irq;
+               }
 
-       g_irq = platform_get_irq(pdev, 1);
-       if (g_irq < 0) {
-               err = g_irq;
-               goto fail_dev;
+               g_irq = platform_get_irq_byname_optional(pdev, "g_int");
+               if (g_irq < 0) {
+                       /* For backward compatibility get irq by index */
+                       g_irq = platform_get_irq(pdev, 1);
+                       if (g_irq < 0)
+                               return g_irq;
+               }
+       } else {
+               g_err_irq = platform_get_irq_byname(pdev, "g_err");
+               if (g_err_irq < 0)
+                       return g_err_irq;
+
+               g_recc_irq = platform_get_irq_byname(pdev, "g_recc");
+               if (g_recc_irq < 0)
+                       return g_recc_irq;
        }
 
        /* Global controller context */
@@ -1670,6 +1811,19 @@ static int rcar_canfd_probe(struct platform_device *pdev)
        gpriv->pdev = pdev;
        gpriv->channels_mask = channels_mask;
        gpriv->fdmode = fdmode;
+       gpriv->chip_id = chip_id;
+
+       if (gpriv->chip_id == RENESAS_RZG2L) {
+               gpriv->rstc1 = devm_reset_control_get_exclusive(&pdev->dev, "rstp_n");
+               if (IS_ERR(gpriv->rstc1))
+                       return dev_err_probe(&pdev->dev, PTR_ERR(gpriv->rstc1),
+                                            "failed to get rstp_n\n");
+
+               gpriv->rstc2 = devm_reset_control_get_exclusive(&pdev->dev, "rstc_n");
+               if (IS_ERR(gpriv->rstc2))
+                       return dev_err_probe(&pdev->dev, PTR_ERR(gpriv->rstc2),
+                                            "failed to get rstc_n\n");
+       }
 
        /* Peripheral clock */
        gpriv->clkp = devm_clk_get(&pdev->dev, "fck");
@@ -1699,7 +1853,7 @@ static int rcar_canfd_probe(struct platform_device *pdev)
        }
        fcan_freq = clk_get_rate(gpriv->can_clk);
 
-       if (gpriv->fcan == RCANFD_CANFDCLK)
+       if (gpriv->fcan == RCANFD_CANFDCLK && gpriv->chip_id == RENESAS_RCAR_GEN3)
                /* CANFD clock is further divided by (1/2) within the IP */
                fcan_freq /= 2;
 
@@ -1711,20 +1865,51 @@ static int rcar_canfd_probe(struct platform_device *pdev)
        gpriv->base = addr;
 
        /* Request IRQ that's common for both channels */
-       err = devm_request_irq(&pdev->dev, ch_irq,
-                              rcar_canfd_channel_interrupt, 0,
-                              "canfd.chn", gpriv);
-       if (err) {
-               dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
-                       ch_irq, err);
-               goto fail_dev;
+       if (gpriv->chip_id == RENESAS_RCAR_GEN3) {
+               err = devm_request_irq(&pdev->dev, ch_irq,
+                                      rcar_canfd_channel_interrupt, 0,
+                                      "canfd.ch_int", gpriv);
+               if (err) {
+                       dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
+                               ch_irq, err);
+                       goto fail_dev;
+               }
+
+               err = devm_request_irq(&pdev->dev, g_irq,
+                                      rcar_canfd_global_interrupt, 0,
+                                      "canfd.g_int", gpriv);
+               if (err) {
+                       dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
+                               g_irq, err);
+                       goto fail_dev;
+               }
+       } else {
+               err = devm_request_irq(&pdev->dev, g_recc_irq,
+                                      rcar_canfd_global_receive_fifo_interrupt, 0,
+                                      "canfd.g_recc", gpriv);
+
+               if (err) {
+                       dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
+                               g_recc_irq, err);
+                       goto fail_dev;
+               }
+
+               err = devm_request_irq(&pdev->dev, g_err_irq,
+                                      rcar_canfd_global_err_interrupt, 0,
+                                      "canfd.g_err", gpriv);
+               if (err) {
+                       dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
+                               g_err_irq, err);
+                       goto fail_dev;
+               }
        }
-       err = devm_request_irq(&pdev->dev, g_irq,
-                              rcar_canfd_global_interrupt, 0,
-                              "canfd.gbl", gpriv);
+
+       err = reset_control_reset(gpriv->rstc1);
+       if (err)
+               goto fail_dev;
+       err = reset_control_reset(gpriv->rstc2);
        if (err) {
-               dev_err(&pdev->dev, "devm_request_irq(%d) failed, error %d\n",
-                       g_irq, err);
+               reset_control_assert(gpriv->rstc1);
                goto fail_dev;
        }
 
@@ -1733,7 +1918,7 @@ static int rcar_canfd_probe(struct platform_device *pdev)
        if (err) {
                dev_err(&pdev->dev,
                        "failed to enable peripheral clock, error %d\n", err);
-               goto fail_dev;
+               goto fail_reset;
        }
 
        err = rcar_canfd_reset_controller(gpriv);
@@ -1790,6 +1975,9 @@ fail_mode:
        rcar_canfd_disable_global_interrupts(gpriv);
 fail_clk:
        clk_disable_unprepare(gpriv->clkp);
+fail_reset:
+       reset_control_assert(gpriv->rstc1);
+       reset_control_assert(gpriv->rstc2);
 fail_dev:
        return err;
 }
@@ -1810,6 +1998,9 @@ static int rcar_canfd_remove(struct platform_device *pdev)
        /* Enter global sleep mode */
        rcar_canfd_set_bit(gpriv->base, RCANFD_GCTR, RCANFD_GCTR_GSLPR);
        clk_disable_unprepare(gpriv->clkp);
+       reset_control_assert(gpriv->rstc1);
+       reset_control_assert(gpriv->rstc2);
+
        return 0;
 }
 
@@ -1827,7 +2018,8 @@ static SIMPLE_DEV_PM_OPS(rcar_canfd_pm_ops, rcar_canfd_suspend,
                         rcar_canfd_resume);
 
 static const struct of_device_id rcar_canfd_of_table[] = {
-       { .compatible = "renesas,rcar-gen3-canfd" },
+       { .compatible = "renesas,rcar-gen3-canfd", .data = (void *)RENESAS_RCAR_GEN3 },
+       { .compatible = "renesas,rzg2l-canfd", .data = (void *)RENESAS_RZG2L },
        { }
 };
 
index 84eac8c..6db90dc 100644 (file)
@@ -28,6 +28,10 @@ MODULE_LICENSE("GPL v2");
 
 #define DRV_NAME  "peak_pci"
 
+/* FPGA cards FW version registers */
+#define PEAK_VER_REG1          0x40
+#define PEAK_VER_REG2          0x44
+
 struct peak_pciec_card;
 struct peak_pci_chan {
        void __iomem *cfg_base;         /* Common for all channels */
@@ -41,9 +45,7 @@ struct peak_pci_chan {
 #define PEAK_PCI_CDR           (CDR_CBP | CDR_CLKOUT_MASK)
 #define PEAK_PCI_OCR           OCR_TX0_PUSHPULL
 
-/*
- * Important PITA registers
- */
+/* Important PITA registers */
 #define PITA_ICR               0x00    /* Interrupt control register */
 #define PITA_GPIOICR           0x18    /* GPIO interface control register */
 #define PITA_MISC              0x1C    /* Miscellaneous register */
@@ -70,27 +72,47 @@ static const u16 peak_pci_icr_masks[PEAK_PCI_CHAN_MAX] = {
 };
 
 static const struct pci_device_id peak_pci_tbl[] = {
-       {PEAK_PCI_VENDOR_ID, PEAK_PCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
-       {PEAK_PCI_VENDOR_ID, PEAK_PCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
-       {PEAK_PCI_VENDOR_ID, PEAK_MPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
-       {PEAK_PCI_VENDOR_ID, PEAK_MPCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
-       {PEAK_PCI_VENDOR_ID, PEAK_PC_104P_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
-       {PEAK_PCI_VENDOR_ID, PEAK_PCI_104E_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
-       {PEAK_PCI_VENDOR_ID, PEAK_CPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
-       {PEAK_PCI_VENDOR_ID, PEAK_PCIE_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,},
+       {
+               PEAK_PCI_VENDOR_ID, PEAK_PCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+               .driver_data = (kernel_ulong_t)"PCAN-PCI",
+       }, {
+               PEAK_PCI_VENDOR_ID, PEAK_PCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+               .driver_data = (kernel_ulong_t)"PCAN-PCI Express",
+       }, {
+               PEAK_PCI_VENDOR_ID, PEAK_MPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+               .driver_data = (kernel_ulong_t)"PCAN-miniPCI",
+       }, {
+               PEAK_PCI_VENDOR_ID, PEAK_MPCIE_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+               .driver_data = (kernel_ulong_t)"PCAN-miniPCIe",
+       }, {
+               PEAK_PCI_VENDOR_ID, PEAK_PC_104P_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+               .driver_data = (kernel_ulong_t)"PCAN-PC/104-Plus Quad",
+       }, {
+               PEAK_PCI_VENDOR_ID, PEAK_PCI_104E_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+               .driver_data = (kernel_ulong_t)"PCAN-PCI/104-Express",
+       }, {
+               PEAK_PCI_VENDOR_ID, PEAK_CPCI_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+               .driver_data = (kernel_ulong_t)"PCAN-cPCI",
+       }, {
+               PEAK_PCI_VENDOR_ID, PEAK_PCIE_OEM_ID, PCI_ANY_ID, PCI_ANY_ID,
+               .driver_data = (kernel_ulong_t)"PCAN-Chip PCIe",
+       },
 #ifdef CONFIG_CAN_PEAK_PCIEC
-       {PEAK_PCI_VENDOR_ID, PEAK_PCIEC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
-       {PEAK_PCI_VENDOR_ID, PEAK_PCIEC34_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
+       {
+               PEAK_PCI_VENDOR_ID, PEAK_PCIEC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+               .driver_data = (kernel_ulong_t)"PCAN-ExpressCard",
+       }, {
+               PEAK_PCI_VENDOR_ID, PEAK_PCIEC34_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,
+               .driver_data = (kernel_ulong_t)"PCAN-ExpressCard 34",
+       },
 #endif
-       {0,}
+       { /* sentinel */ }
 };
 
 MODULE_DEVICE_TABLE(pci, peak_pci_tbl);
 
 #ifdef CONFIG_CAN_PEAK_PCIEC
-/*
- * PCAN-ExpressCard needs I2C bit-banging configuration option.
- */
+/* PCAN-ExpressCard needs I2C bit-banging configuration option. */
 
 /* GPIOICR byte access offsets */
 #define PITA_GPOUT             0x18    /* GPx output value */
@@ -156,12 +178,14 @@ static void peak_pci_write_reg(const struct sja1000_priv *priv,
 static inline void pita_set_scl_highz(struct peak_pciec_card *card)
 {
        u8 gp_outen = readb(card->cfg_base + PITA_GPOEN) & ~PITA_GPIN_SCL;
+
        writeb(gp_outen, card->cfg_base + PITA_GPOEN);
 }
 
 static inline void pita_set_sda_highz(struct peak_pciec_card *card)
 {
        u8 gp_outen = readb(card->cfg_base + PITA_GPOEN) & ~PITA_GPIN_SDA;
+
        writeb(gp_outen, card->cfg_base + PITA_GPOEN);
 }
 
@@ -230,9 +254,7 @@ static int pita_getscl(void *data)
        return (readb(card->cfg_base + PITA_GPIN) & PITA_GPIN_SCL) ? 1 : 0;
 }
 
-/*
- * write commands to the LED chip though the I2C-bus of the PCAN-PCIeC
- */
+/* write commands to the LED chip though the I2C-bus of the PCAN-PCIeC */
 static int peak_pciec_write_pca9553(struct peak_pciec_card *card,
                                    u8 offset, u8 data)
 {
@@ -248,7 +270,7 @@ static int peak_pciec_write_pca9553(struct peak_pciec_card *card,
        int ret;
 
        /* cache led mask */
-       if ((offset == 5) && (data == card->led_cache))
+       if (offset == 5 && data == card->led_cache)
                return 0;
 
        ret = i2c_transfer(&card->led_chip, &msg, 1);
@@ -261,9 +283,7 @@ static int peak_pciec_write_pca9553(struct peak_pciec_card *card,
        return 0;
 }
 
-/*
- * delayed work callback used to control the LEDs
- */
+/* delayed work callback used to control the LEDs */
 static void peak_pciec_led_work(struct work_struct *work)
 {
        struct peak_pciec_card *card =
@@ -309,9 +329,7 @@ static void peak_pciec_led_work(struct work_struct *work)
                schedule_delayed_work(&card->led_work, HZ);
 }
 
-/*
- * set LEDs blinking state
- */
+/* set LEDs blinking state */
 static void peak_pciec_set_leds(struct peak_pciec_card *card, u8 led_mask, u8 s)
 {
        u8 new_led = card->led_cache;
@@ -328,25 +346,19 @@ static void peak_pciec_set_leds(struct peak_pciec_card *card, u8 led_mask, u8 s)
        peak_pciec_write_pca9553(card, 5, new_led);
 }
 
-/*
- * start one second delayed work to control LEDs
- */
+/* start one second delayed work to control LEDs */
 static void peak_pciec_start_led_work(struct peak_pciec_card *card)
 {
        schedule_delayed_work(&card->led_work, HZ);
 }
 
-/*
- * stop LEDs delayed work
- */
+/* stop LEDs delayed work */
 static void peak_pciec_stop_led_work(struct peak_pciec_card *card)
 {
        cancel_delayed_work_sync(&card->led_work);
 }
 
-/*
- * initialize the PCA9553 4-bit I2C-bus LED chip
- */
+/* initialize the PCA9553 4-bit I2C-bus LED chip */
 static int peak_pciec_init_leds(struct peak_pciec_card *card)
 {
        int err;
@@ -375,17 +387,14 @@ static int peak_pciec_init_leds(struct peak_pciec_card *card)
        return peak_pciec_write_pca9553(card, 5, PCA9553_LS0_INIT);
 }
 
-/*
- * restore LEDs state to off peak_pciec_leds_exit
- */
+/* restore LEDs state to off peak_pciec_leds_exit */
 static void peak_pciec_leds_exit(struct peak_pciec_card *card)
 {
        /* switch LEDs to off */
        peak_pciec_write_pca9553(card, 5, PCA9553_LED_OFF_ALL);
 }
 
-/*
- * normal write sja1000 register method overloaded to catch when controller
+/* normal write sja1000 register method overloaded to catch when controller
  * is started or stopped, to control leds
  */
 static void peak_pciec_write_reg(const struct sja1000_priv *priv,
@@ -443,7 +452,7 @@ static int peak_pciec_probe(struct pci_dev *pdev, struct net_device *dev)
        /* channel is the first one: do the init part */
        } else {
                /* create the bit banging I2C adapter structure */
-               card = kzalloc(sizeof(struct peak_pciec_card), GFP_KERNEL);
+               card = kzalloc(sizeof(*card), GFP_KERNEL);
                if (!card)
                        return -ENOMEM;
 
@@ -506,9 +515,7 @@ static void peak_pciec_remove(struct peak_pciec_card *card)
 
 #else /* CONFIG_CAN_PEAK_PCIEC */
 
-/*
- * Placebo functions when PCAN-ExpressCard support is not selected
- */
+/* Placebo functions when PCAN-ExpressCard support is not selected */
 static inline int peak_pciec_probe(struct pci_dev *pdev, struct net_device *dev)
 {
        return -ENODEV;
@@ -549,6 +556,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        void __iomem *cfg_base, *reg_base;
        u16 sub_sys_id, icr;
        int i, err, channels;
+       char fw_str[14] = "";
 
        err = pci_enable_device(pdev);
        if (err)
@@ -602,6 +610,21 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        /* Leave parport mux mode */
        writeb(0x04, cfg_base + PITA_MISC + 3);
 
+       /* FPGA equipped card if not 0 */
+       if (readl(cfg_base + PEAK_VER_REG1)) {
+               /* FPGA card: display version of the running firmware */
+               u32 fw_ver = readl(cfg_base + PEAK_VER_REG2);
+
+               snprintf(fw_str, sizeof(fw_str), " FW v%u.%u.%u",
+                        (fw_ver >> 12) & 0xf,
+                        (fw_ver >> 8) & 0xf,
+                        (fw_ver >> 4) & 0xf);
+       }
+
+       /* Display commercial name (and, eventually, FW version) of the card */
+       dev_info(&pdev->dev, "%ux CAN %s%s\n",
+                channels, (const char *)ent->driver_data, fw_str);
+
        icr = readw(cfg_base + PITA_ICR + 2);
 
        for (i = 0; i < channels; i++) {
@@ -642,8 +665,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                chan->prev_dev = pci_get_drvdata(pdev);
                pci_set_drvdata(pdev, dev);
 
-               /*
-                * PCAN-ExpressCard needs some additional i2c init.
+               /* PCAN-ExpressCard needs some additional i2c init.
                 * This must be done *before* register_sja1000dev() but
                 * *after* devices linkage
                 */
@@ -709,7 +731,8 @@ failure_disable_pci:
 
        /* pci_xxx_config_word() return positive PCIBIOS_xxx error codes while
         * the probe() function must return a negative errno in case of failure
-        * (err is unchanged if negative) */
+        * (err is unchanged if negative)
+        */
        return pcibios_err_to_errno(err);
 }
 
index 9ae4807..673861a 100644 (file)
 #include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/property.h>
 
 #include <asm/unaligned.h>
 
@@ -1456,7 +1456,7 @@ mcp251xfd_rx_ring_update(const struct mcp251xfd_priv *priv,
 }
 
 static void
-mcp251xfd_hw_rx_obj_to_skb(struct mcp251xfd_priv *priv,
+mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
                           const struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj,
                           struct sk_buff *skb)
 {
@@ -2195,8 +2195,10 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
                        FIELD_GET(MCP251XFD_REG_INT_IE_MASK,
                                  priv->regs_status.intf);
 
-               if (!(intf_pending))
+               if (!(intf_pending)) {
+                       can_rx_offload_threaded_irq_finish(&priv->offload);
                        return handled;
+               }
 
                /* Some interrupts must be ACKed in the
                 * MCP251XFD_REG_INT register.
@@ -2296,6 +2298,8 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
        } while (1);
 
  out_fail:
+       can_rx_offload_threaded_irq_finish(&priv->offload);
+
        netdev_err(priv->ndev, "IRQ handler returned %d (intf=0x%08x).\n",
                   err, priv->regs_status.intf);
        mcp251xfd_dump(priv);
@@ -2524,8 +2528,8 @@ static int mcp251xfd_open(struct net_device *ndev)
        can_rx_offload_enable(&priv->offload);
 
        err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq,
-                                  IRQF_ONESHOT, dev_name(&spi->dev),
-                                  priv);
+                                  IRQF_SHARED | IRQF_ONESHOT,
+                                  dev_name(&spi->dev), priv);
        if (err)
                goto out_can_rx_offload_disable;
 
@@ -2857,7 +2861,7 @@ static int mcp251xfd_probe(struct spi_device *spi)
        struct gpio_desc *rx_int;
        struct regulator *reg_vdd, *reg_xceiver;
        struct clk *clk;
-       u32 freq;
+       u32 freq = 0;
        int err;
 
        if (!spi->irq)
@@ -2884,11 +2888,19 @@ static int mcp251xfd_probe(struct spi_device *spi)
                return dev_err_probe(&spi->dev, PTR_ERR(reg_xceiver),
                                     "Failed to get Transceiver regulator!\n");
 
-       clk = devm_clk_get(&spi->dev, NULL);
+       clk = devm_clk_get_optional(&spi->dev, NULL);
        if (IS_ERR(clk))
                return dev_err_probe(&spi->dev, PTR_ERR(clk),
                                     "Failed to get Oscillator (clock)!\n");
-       freq = clk_get_rate(clk);
+       if (clk) {
+               freq = clk_get_rate(clk);
+       } else {
+               err = device_property_read_u32(&spi->dev, "clock-frequency",
+                                              &freq);
+               if (err)
+                       return dev_err_probe(&spi->dev, err,
+                                            "Failed to get clock-frequency!\n");
+       }
 
        /* Sanity check */
        if (freq < MCP251XFD_SYSCLOCK_HZ_MIN ||
index ed31692..712e091 100644 (file)
@@ -13,7 +13,7 @@
 
 static u64 mcp251xfd_timestamp_read(const struct cyclecounter *cc)
 {
-       struct mcp251xfd_priv *priv;
+       const struct mcp251xfd_priv *priv;
        u32 timestamp = 0;
        int err;
 
@@ -39,7 +39,7 @@ static void mcp251xfd_timestamp_work(struct work_struct *work)
                              MCP251XFD_TIMESTAMP_WORK_DELAY_SEC * HZ);
 }
 
-void mcp251xfd_skb_set_timestamp(struct mcp251xfd_priv *priv,
+void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
                                 struct sk_buff *skb, u32 timestamp)
 {
        struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
index 1002f39..0f322da 100644 (file)
@@ -853,7 +853,7 @@ int mcp251xfd_regmap_init(struct mcp251xfd_priv *priv);
 u16 mcp251xfd_crc16_compute2(const void *cmd, size_t cmd_size,
                             const void *data, size_t data_size);
 u16 mcp251xfd_crc16_compute(const void *data, size_t data_size);
-void mcp251xfd_skb_set_timestamp(struct mcp251xfd_priv *priv,
+void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv,
                                 struct sk_buff *skb, u32 timestamp);
 void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv);
 void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv);
index 73245d8..353062e 100644 (file)
@@ -786,6 +786,8 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id)
                int_status = hecc_read(priv, HECC_CANGIF0);
        }
 
+       can_rx_offload_irq_finish(&priv->offload);
+
        return IRQ_HANDLED;
 }
 
index 95ae740..c6068a2 100644 (file)
@@ -476,7 +476,7 @@ static void esd_usb2_write_bulk_callback(struct urb *urb)
        netif_trans_update(netdev);
 }
 
-static ssize_t show_firmware(struct device *d,
+static ssize_t firmware_show(struct device *d,
                             struct device_attribute *attr, char *buf)
 {
        struct usb_interface *intf = to_usb_interface(d);
@@ -487,9 +487,9 @@ static ssize_t show_firmware(struct device *d,
                       (dev->version >> 8) & 0xf,
                       dev->version & 0xff);
 }
-static DEVICE_ATTR(firmware, 0444, show_firmware, NULL);
+static DEVICE_ATTR_RO(firmware);
 
-static ssize_t show_hardware(struct device *d,
+static ssize_t hardware_show(struct device *d,
                             struct device_attribute *attr, char *buf)
 {
        struct usb_interface *intf = to_usb_interface(d);
@@ -500,9 +500,9 @@ static ssize_t show_hardware(struct device *d,
                       (dev->version >> 24) & 0xf,
                       (dev->version >> 16) & 0xff);
 }
-static DEVICE_ATTR(hardware, 0444, show_hardware, NULL);
+static DEVICE_ATTR_RO(hardware);
 
-static ssize_t show_nets(struct device *d,
+static ssize_t nets_show(struct device *d,
                         struct device_attribute *attr, char *buf)
 {
        struct usb_interface *intf = to_usb_interface(d);
@@ -510,7 +510,7 @@ static ssize_t show_nets(struct device *d,
 
        return sprintf(buf, "%d", dev->net_count);
 }
-static DEVICE_ATTR(nets, 0444, show_nets, NULL);
+static DEVICE_ATTR_RO(nets);
 
 static int esd_usb2_send_msg(struct esd_usb2 *dev, struct esd_usb2_msg *msg)
 {
index 1985f77..14e360c 100644 (file)
@@ -355,7 +355,7 @@ static int es581_4_tx_can_msg(struct es58x_priv *priv,
                return -EMSGSIZE;
 
        if (priv->tx_can_msg_cnt == 0) {
-               msg_len = 1; /* struct es581_4_bulk_tx_can_msg:num_can_msg */
+               msg_len = sizeof(es581_4_urb_cmd->bulk_tx_can_msg.num_can_msg);
                es581_4_fill_urb_header(urb_cmd, ES581_4_CAN_COMMAND_TYPE,
                                        ES581_4_CMD_ID_TX_MSG,
                                        priv->channel_idx, msg_len);
@@ -371,8 +371,7 @@ static int es581_4_tx_can_msg(struct es58x_priv *priv,
                return ret;
 
        /* Fill message contents. */
-       tx_can_msg = (struct es581_4_tx_can_msg *)
-           &es581_4_urb_cmd->bulk_tx_can_msg.tx_can_msg_buf[msg_len - 1];
+       tx_can_msg = (typeof(tx_can_msg))&es581_4_urb_cmd->raw_msg[msg_len];
        put_unaligned_le32(es58x_get_raw_can_id(cf), &tx_can_msg->can_id);
        put_unaligned_le32(priv->tx_head, &tx_can_msg->packet_idx);
        put_unaligned_le16((u16)es58x_get_flags(skb), &tx_can_msg->flags);
index 8e91024..96a13c7 100644 (file)
@@ -19,7 +19,7 @@
 #include "es58x_core.h"
 
 #define DRV_VERSION "1.00"
-MODULE_AUTHOR("Mailhol Vincent <mailhol.vincent@wanadoo.fr>");
+MODULE_AUTHOR("Vincent Mailhol <mailhol.vincent@wanadoo.fr>");
 MODULE_AUTHOR("Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>");
 MODULE_DESCRIPTION("Socket CAN driver for ETAS ES58X USB adapters");
 MODULE_VERSION(DRV_VERSION);
@@ -70,7 +70,7 @@ MODULE_DEVICE_TABLE(usb, es58x_id_table);
  * bytes (the start of frame) are skipped and the CRC calculation
  * starts on the third byte.
  */
-#define ES58X_CRC_CALC_OFFSET 2
+#define ES58X_CRC_CALC_OFFSET sizeof_field(union es58x_urb_cmd, sof)
 
 /**
  * es58x_calculate_crc() - Compute the crc16 of a given URB.
@@ -2107,6 +2107,25 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx)
        return ret;
 }
 
+/**
+ * es58x_free_netdevs() - Release all network resources of the device.
+ * @es58x_dev: ES58X device.
+ */
+static void es58x_free_netdevs(struct es58x_device *es58x_dev)
+{
+       int i;
+
+       for (i = 0; i < es58x_dev->num_can_ch; i++) {
+               struct net_device *netdev = es58x_dev->netdev[i];
+
+               if (!netdev)
+                       continue;
+               unregister_candev(netdev);
+               es58x_dev->netdev[i] = NULL;
+               free_candev(netdev);
+       }
+}
+
 /**
  * es58x_get_product_info() - Get the product information and print them.
  * @es58x_dev: ES58X device.
@@ -2152,14 +2171,13 @@ static int es58x_get_product_info(struct es58x_device *es58x_dev)
 /**
  * es58x_init_es58x_dev() - Initialize the ES58X device.
  * @intf: USB interface.
- * @p_es58x_dev: pointer to the address of the ES58X device.
  * @driver_info: Quirks of the device.
  *
- * Return: zero on success, errno when any error occurs.
+ * Return: pointer to an ES58X device on success, error pointer when
+ *     any error occurs.
  */
-static int es58x_init_es58x_dev(struct usb_interface *intf,
-                               struct es58x_device **p_es58x_dev,
-                               kernel_ulong_t driver_info)
+static struct es58x_device *es58x_init_es58x_dev(struct usb_interface *intf,
+                                                kernel_ulong_t driver_info)
 {
        struct device *dev = &intf->dev;
        struct es58x_device *es58x_dev;
@@ -2176,7 +2194,7 @@ static int es58x_init_es58x_dev(struct usb_interface *intf,
        ret = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out,
                                        NULL, NULL);
        if (ret)
-               return ret;
+               return ERR_PTR(ret);
 
        if (driver_info & ES58X_FD_FAMILY) {
                param = &es58x_fd_param;
@@ -2186,9 +2204,10 @@ static int es58x_init_es58x_dev(struct usb_interface *intf,
                ops = &es581_4_ops;
        }
 
-       es58x_dev = kzalloc(es58x_sizeof_es58x_device(param), GFP_KERNEL);
+       es58x_dev = devm_kzalloc(dev, es58x_sizeof_es58x_device(param),
+                                GFP_KERNEL);
        if (!es58x_dev)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        es58x_dev->param = param;
        es58x_dev->ops = ops;
@@ -2213,9 +2232,7 @@ static int es58x_init_es58x_dev(struct usb_interface *intf,
                                             ep_out->bEndpointAddress);
        es58x_dev->rx_max_packet_size = le16_to_cpu(ep_in->wMaxPacketSize);
 
-       *p_es58x_dev = es58x_dev;
-
-       return 0;
+       return es58x_dev;
 }
 
 /**
@@ -2232,30 +2249,21 @@ static int es58x_probe(struct usb_interface *intf,
        struct es58x_device *es58x_dev;
        int ch_idx, ret;
 
-       ret = es58x_init_es58x_dev(intf, &es58x_dev, id->driver_info);
-       if (ret)
-               return ret;
+       es58x_dev = es58x_init_es58x_dev(intf, id->driver_info);
+       if (IS_ERR(es58x_dev))
+               return PTR_ERR(es58x_dev);
 
        ret = es58x_get_product_info(es58x_dev);
        if (ret)
-               goto cleanup_es58x_dev;
+               return ret;
 
        for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++) {
                ret = es58x_init_netdev(es58x_dev, ch_idx);
-               if (ret)
-                       goto cleanup_candev;
-       }
-
-       return ret;
-
- cleanup_candev:
-       for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++)
-               if (es58x_dev->netdev[ch_idx]) {
-                       unregister_candev(es58x_dev->netdev[ch_idx]);
-                       free_candev(es58x_dev->netdev[ch_idx]);
+               if (ret) {
+                       es58x_free_netdevs(es58x_dev);
+                       return ret;
                }
- cleanup_es58x_dev:
-       kfree(es58x_dev);
+       }
 
        return ret;
 }
@@ -2270,24 +2278,12 @@ static int es58x_probe(struct usb_interface *intf,
 static void es58x_disconnect(struct usb_interface *intf)
 {
        struct es58x_device *es58x_dev = usb_get_intfdata(intf);
-       struct net_device *netdev;
-       int i;
 
        dev_info(&intf->dev, "Disconnecting %s %s\n",
                 es58x_dev->udev->manufacturer, es58x_dev->udev->product);
 
-       for (i = 0; i < es58x_dev->num_can_ch; i++) {
-               netdev = es58x_dev->netdev[i];
-               if (!netdev)
-                       continue;
-               unregister_candev(netdev);
-               es58x_dev->netdev[i] = NULL;
-               free_candev(netdev);
-       }
-
+       es58x_free_netdevs(es58x_dev);
        es58x_free_urbs(es58x_dev);
-
-       kfree(es58x_dev);
        usb_set_intfdata(intf, NULL);
 }
 
index fcf219e..826a158 100644 (file)
@@ -287,7 +287,7 @@ struct es58x_priv {
  * @rx_urb_cmd_max_len: Maximum length of a RX URB command.
  * @fifo_mask: Bit mask to quickly convert the tx_tail and tx_head
  *     field of the struct es58x_priv into echo_skb
- *     indexes. Properties: @fifo_mask = echos_skb_max - 1 where
+ *     indexes. Properties: @fifo_mask = echo_skb_max - 1 where
  *     echo_skb_max must be a power of two. Also, echo_skb_max must
  *     not exceed the maximum size of the device internal TX FIFO
  *     length. This parameter is used to control the network queue
index 1a2779d..af042aa 100644 (file)
@@ -357,8 +357,7 @@ static int es58x_fd_tx_can_msg(struct es58x_priv *priv,
                return ret;
 
        /* Fill message contents. */
-       tx_can_msg = (struct es58x_fd_tx_can_msg *)
-           &es58x_fd_urb_cmd->tx_can_msg_buf[msg_len];
+       tx_can_msg = (typeof(tx_can_msg))&es58x_fd_urb_cmd->raw_msg[msg_len];
        tx_can_msg->packet_idx = (u8)priv->tx_head;
        put_unaligned_le32(es58x_get_raw_can_id(cf), &tx_can_msg->can_id);
        tx_can_msg->flags = (u8)es58x_get_flags(skb);
@@ -463,9 +462,9 @@ static int es58x_fd_get_timestamp(struct es58x_device *es58x_dev)
 }
 
 /* Nominal bittiming constants for ES582.1 and ES584.1 as specified in
- * the microcontroller datasheet: "SAM E701/S70/V70/V71 Family"
- * section 49.6.8 "MCAN Nominal Bit Timing and Prescaler Register"
- * from Microchip.
+ * the microcontroller datasheet: "SAM E70/S70/V70/V71 Family" section
+ * 49.6.8 "MCAN Nominal Bit Timing and Prescaler Register" from
+ * Microchip.
  *
  * The values from the specification are the hardware register
  * values. To convert them to the functional values, all ranges were
@@ -484,8 +483,8 @@ static const struct can_bittiming_const es58x_fd_nom_bittiming_const = {
 };
 
 /* Data bittiming constants for ES582.1 and ES584.1 as specified in
- * the microcontroller datasheet: "SAM E701/S70/V70/V71 Family"
- * section 49.6.4 "MCAN Data Bit Timing and Prescaler Register" from
+ * the microcontroller datasheet: "SAM E70/S70/V70/V71 Family" section
+ * 49.6.4 "MCAN Data Bit Timing and Prescaler Register" from
  * Microchip.
  */
 static const struct can_bittiming_const es58x_fd_data_bittiming_const = {
@@ -501,9 +500,9 @@ static const struct can_bittiming_const es58x_fd_data_bittiming_const = {
 };
 
 /* Transmission Delay Compensation constants for ES582.1 and ES584.1
- * as specified in the microcontroller datasheet: "SAM
- * E701/S70/V70/V71 Family" section 49.6.15 "MCAN Transmitter Delay
- * Compensation Register" from Microchip.
+ * as specified in the microcontroller datasheet: "SAM E70/S70/V70/V71
+ * Family" section 49.6.15 "MCAN Transmitter Delay Compensation
+ * Register" from Microchip.
  */
 static const struct can_tdc_const es58x_tdc_const = {
        .tdcv_max = 0, /* Manual mode not supported. */
index ee18a87..a191891 100644 (file)
@@ -96,23 +96,14 @@ struct es58x_fd_bittiming {
  * @ctrlmode: type enum es58x_fd_ctrlmode.
  * @canfd_enabled: boolean (0: Classical CAN, 1: CAN and/or CANFD).
  * @data_bittiming: Bittiming for flexible data-rate transmission.
- * @tdc_enabled: Transmitter Delay Compensation switch (0: disabled,
- *     1: enabled). On very high bitrates, the delay between when the
- *     bit is sent and received on the CANTX and CANRX pins of the
- *     transceiver start to be significant enough for errors to occur
- *     and thus need to be compensated.
- * @tdco: Transmitter Delay Compensation Offset. Offset value, in time
- *     quanta, defining the delay between the start of the bit
- *     reception on the CANRX pin of the transceiver and the SSP
- *     (Secondary Sample Point). Valid values: 0 to 127.
- * @tdcf: Transmitter Delay Compensation Filter window. Defines the
- *     minimum value for the SSP position, in time quanta. The
- *     feature is enabled when TDCF is configured to a value greater
- *     than TDCO. Valid values: 0 to 127.
+ * @tdc_enabled: Transmitter Delay Compensation switch (0: TDC is
+ *     disabled, 1: TDC is enabled).
+ * @tdco: Transmitter Delay Compensation Offset.
+ * @tdcf: Transmitter Delay Compensation Filter window.
  *
- * Please refer to the microcontroller datasheet: "SAM
- * E701/S70/V70/V71 Family" section 49 "Controller Area Network
- * (MCAN)" for additional information.
+ * Please refer to the microcontroller datasheet: "SAM E70/S70/V70/V71
+ * Family" section 49 "Controller Area Network (MCAN)" for additional
+ * information.
  */
 struct es58x_fd_tx_conf_msg {
        struct es58x_fd_bittiming nominal_bittiming;
index 899a3d2..837b3fe 100644 (file)
@@ -63,6 +63,8 @@
 
 #define PCAN_USB_MSG_HEADER_LEN                2
 
+#define PCAN_USB_MSG_TX_CAN            2       /* Tx msg is a CAN frame */
+
 /* PCAN-USB adapter internal clock (MHz) */
 #define PCAN_USB_CRYSTAL_HZ            16000000
 
 #define PCAN_USB_STATUSLEN_RTR         (1 << 4)
 #define PCAN_USB_STATUSLEN_DLC         (0xf)
 
+/* PCAN-USB 4.1 CAN Id tx extended flags */
+#define PCAN_USB_TX_SRR                        0x01    /* SJA1000 SRR command */
+#define PCAN_USB_TX_AT                 0x02    /* SJA1000 AT command */
+
 /* PCAN-USB error flags */
 #define PCAN_USB_ERROR_TXFULL          0x01
 #define PCAN_USB_ERROR_RXQOVR          0x02
@@ -385,7 +391,8 @@ static int pcan_usb_get_device_id(struct peak_usb_device *dev, u32 *device_id)
        if (err)
                netdev_err(dev->netdev, "getting device id failure: %d\n", err);
 
-       *device_id = args[0];
+       else
+               *device_id = args[0];
 
        return err;
 }
@@ -446,145 +453,65 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n,
 {
        struct sk_buff *skb;
        struct can_frame *cf;
-       enum can_state new_state;
+       enum can_state new_state = CAN_STATE_ERROR_ACTIVE;
 
        /* ignore this error until 1st ts received */
        if (n == PCAN_USB_ERROR_QOVR)
                if (!mc->pdev->time_ref.tick_count)
                        return 0;
 
-       new_state = mc->pdev->dev.can.state;
-
-       switch (mc->pdev->dev.can.state) {
-       case CAN_STATE_ERROR_ACTIVE:
-               if (n & PCAN_USB_ERROR_BUS_LIGHT) {
-                       new_state = CAN_STATE_ERROR_WARNING;
-                       break;
-               }
-               fallthrough;
-
-       case CAN_STATE_ERROR_WARNING:
-               if (n & PCAN_USB_ERROR_BUS_HEAVY) {
-                       new_state = CAN_STATE_ERROR_PASSIVE;
-                       break;
-               }
-               if (n & PCAN_USB_ERROR_BUS_OFF) {
-                       new_state = CAN_STATE_BUS_OFF;
-                       break;
-               }
-               if (n & ~PCAN_USB_ERROR_BUS) {
-                       /*
-                        * trick to bypass next comparison and process other
-                        * errors
-                        */
-                       new_state = CAN_STATE_MAX;
-                       break;
-               }
-               if ((n & PCAN_USB_ERROR_BUS_LIGHT) == 0) {
-                       /* no error (back to active state) */
-                       new_state = CAN_STATE_ERROR_ACTIVE;
-                       break;
-               }
-               break;
-
-       case CAN_STATE_ERROR_PASSIVE:
-               if (n & PCAN_USB_ERROR_BUS_OFF) {
-                       new_state = CAN_STATE_BUS_OFF;
-                       break;
-               }
-               if (n & PCAN_USB_ERROR_BUS_LIGHT) {
-                       new_state = CAN_STATE_ERROR_WARNING;
-                       break;
-               }
-               if (n & ~PCAN_USB_ERROR_BUS) {
-                       /*
-                        * trick to bypass next comparison and process other
-                        * errors
-                        */
-                       new_state = CAN_STATE_MAX;
-                       break;
-               }
-
-               if ((n & PCAN_USB_ERROR_BUS_HEAVY) == 0) {
-                       /* no error (back to warning state) */
-                       new_state = CAN_STATE_ERROR_WARNING;
-                       break;
-               }
-               break;
-
-       default:
-               /* do nothing waiting for restart */
-               return 0;
-       }
-
-       /* donot post any error if current state didn't change */
-       if (mc->pdev->dev.can.state == new_state)
-               return 0;
-
        /* allocate an skb to store the error frame */
        skb = alloc_can_err_skb(mc->netdev, &cf);
-       if (!skb)
-               return -ENOMEM;
-
-       switch (new_state) {
-       case CAN_STATE_BUS_OFF:
-               cf->can_id |= CAN_ERR_BUSOFF;
-               mc->pdev->dev.can.can_stats.bus_off++;
-               can_bus_off(mc->netdev);
-               break;
-
-       case CAN_STATE_ERROR_PASSIVE:
-               cf->can_id |= CAN_ERR_CRTL;
-               cf->data[1] = (mc->pdev->bec.txerr > mc->pdev->bec.rxerr) ?
-                               CAN_ERR_CRTL_TX_PASSIVE :
-                               CAN_ERR_CRTL_RX_PASSIVE;
-               cf->data[6] = mc->pdev->bec.txerr;
-               cf->data[7] = mc->pdev->bec.rxerr;
-
-               mc->pdev->dev.can.can_stats.error_passive++;
-               break;
-
-       case CAN_STATE_ERROR_WARNING:
-               cf->can_id |= CAN_ERR_CRTL;
-               cf->data[1] = (mc->pdev->bec.txerr > mc->pdev->bec.rxerr) ?
-                               CAN_ERR_CRTL_TX_WARNING :
-                               CAN_ERR_CRTL_RX_WARNING;
-               cf->data[6] = mc->pdev->bec.txerr;
-               cf->data[7] = mc->pdev->bec.rxerr;
-
-               mc->pdev->dev.can.can_stats.error_warning++;
-               break;
 
-       case CAN_STATE_ERROR_ACTIVE:
-               cf->can_id |= CAN_ERR_CRTL;
-               cf->data[1] = CAN_ERR_CRTL_ACTIVE;
-
-               /* sync local copies of rxerr/txerr counters */
-               mc->pdev->bec.txerr = 0;
-               mc->pdev->bec.rxerr = 0;
-               break;
-
-       default:
-               /* CAN_STATE_MAX (trick to handle other errors) */
-               if (n & PCAN_USB_ERROR_TXQFULL)
-                       netdev_dbg(mc->netdev, "device Tx queue full)\n");
-
-               if (n & PCAN_USB_ERROR_RXQOVR) {
-                       netdev_dbg(mc->netdev, "data overrun interrupt\n");
+       if (n & PCAN_USB_ERROR_RXQOVR) {
+               /* data overrun interrupt */
+               netdev_dbg(mc->netdev, "data overrun interrupt\n");
+               mc->netdev->stats.rx_over_errors++;
+               mc->netdev->stats.rx_errors++;
+               if (cf) {
                        cf->can_id |= CAN_ERR_CRTL;
                        cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW;
-                       mc->netdev->stats.rx_over_errors++;
-                       mc->netdev->stats.rx_errors++;
                }
+       }
 
-               cf->data[6] = mc->pdev->bec.txerr;
-               cf->data[7] = mc->pdev->bec.rxerr;
+       if (n & PCAN_USB_ERROR_TXQFULL)
+               netdev_dbg(mc->netdev, "device Tx queue full)\n");
 
-               new_state = mc->pdev->dev.can.state;
-               break;
+       if (n & PCAN_USB_ERROR_BUS_OFF) {
+               new_state = CAN_STATE_BUS_OFF;
+       } else if (n & PCAN_USB_ERROR_BUS_HEAVY) {
+               new_state = ((mc->pdev->bec.txerr >= 128) ||
+                            (mc->pdev->bec.rxerr >= 128)) ?
+                               CAN_STATE_ERROR_PASSIVE :
+                               CAN_STATE_ERROR_WARNING;
+       } else {
+               new_state = CAN_STATE_ERROR_ACTIVE;
        }
 
-       mc->pdev->dev.can.state = new_state;
+       /* handle change of state */
+       if (new_state != mc->pdev->dev.can.state) {
+               enum can_state tx_state =
+                       (mc->pdev->bec.txerr >= mc->pdev->bec.rxerr) ?
+                               new_state : 0;
+               enum can_state rx_state =
+                       (mc->pdev->bec.txerr <= mc->pdev->bec.rxerr) ?
+                               new_state : 0;
+
+               can_change_state(mc->netdev, cf, tx_state, rx_state);
+
+               if (new_state == CAN_STATE_BUS_OFF) {
+                       can_bus_off(mc->netdev);
+               } else if (cf && (cf->can_id & CAN_ERR_CRTL)) {
+                       /* Supply TX/RX error counters in case of
+                        * controller error.
+                        */
+                       cf->data[6] = mc->pdev->bec.txerr;
+                       cf->data[7] = mc->pdev->bec.rxerr;
+               }
+       }
+
+       if (!skb)
+               return -ENOMEM;
 
        if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) {
                struct skb_shared_hwtstamps *hwts = skb_hwtstamps(skb);
@@ -706,6 +633,7 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len)
        struct sk_buff *skb;
        struct can_frame *cf;
        struct skb_shared_hwtstamps *hwts;
+       u32 can_id_flags;
 
        skb = alloc_can_skb(mc->netdev, &cf);
        if (!skb)
@@ -715,13 +643,15 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len)
                if ((mc->ptr + 4) > mc->end)
                        goto decode_failed;
 
-               cf->can_id = get_unaligned_le32(mc->ptr) >> 3 | CAN_EFF_FLAG;
+               can_id_flags = get_unaligned_le32(mc->ptr);
+               cf->can_id = can_id_flags >> 3 | CAN_EFF_FLAG;
                mc->ptr += 4;
        } else {
                if ((mc->ptr + 2) > mc->end)
                        goto decode_failed;
 
-               cf->can_id = get_unaligned_le16(mc->ptr) >> 5;
+               can_id_flags = get_unaligned_le16(mc->ptr);
+               cf->can_id = can_id_flags >> 5;
                mc->ptr += 2;
        }
 
@@ -744,6 +674,10 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len)
 
                memcpy(cf->data, mc->ptr, cf->len);
                mc->ptr += rec_len;
+
+               /* Ignore next byte (client private id) if SRR bit is set */
+               if (can_id_flags & PCAN_USB_TX_SRR)
+                       mc->ptr++;
        }
 
        /* convert timestamp into kernel time */
@@ -821,10 +755,11 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb,
        struct net_device *netdev = dev->netdev;
        struct net_device_stats *stats = &netdev->stats;
        struct can_frame *cf = (struct can_frame *)skb->data;
+       u32 can_id_flags = cf->can_id & CAN_ERR_MASK;
        u8 *pc;
 
-       obuf[0] = 2;
-       obuf[1] = 1;
+       obuf[0] = PCAN_USB_MSG_TX_CAN;
+       obuf[1] = 1;    /* only one CAN frame is stored in the packet */
 
        pc = obuf + PCAN_USB_MSG_HEADER_LEN;
 
@@ -839,12 +774,28 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb,
                *pc |= PCAN_USB_STATUSLEN_EXT_ID;
                pc++;
 
-               put_unaligned_le32((cf->can_id & CAN_ERR_MASK) << 3, pc);
+               can_id_flags <<= 3;
+
+               if (dev->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)
+                       can_id_flags |= PCAN_USB_TX_SRR;
+
+               if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT)
+                       can_id_flags |= PCAN_USB_TX_AT;
+
+               put_unaligned_le32(can_id_flags, pc);
                pc += 4;
        } else {
                pc++;
 
-               put_unaligned_le16((cf->can_id & CAN_ERR_MASK) << 5, pc);
+               can_id_flags <<= 5;
+
+               if (dev->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)
+                       can_id_flags |= PCAN_USB_TX_SRR;
+
+               if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT)
+                       can_id_flags |= PCAN_USB_TX_AT;
+
+               put_unaligned_le16(can_id_flags, pc);
                pc += 2;
        }
 
@@ -854,6 +805,10 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb,
                pc += cf->len;
        }
 
+       /* SRR bit needs a writer id (useless here) */
+       if (can_id_flags & PCAN_USB_TX_SRR)
+               *pc++ = 0x80;
+
        obuf[(*size)-1] = (u8)(stats->tx_packets & 0xff);
 
        return 0;
@@ -928,6 +883,19 @@ static int pcan_usb_init(struct peak_usb_device *dev)
                return err;
        }
 
+       /* Since rev 4.1, PCAN-USB is able to make single-shot as well as
+        * looped back frames.
+        */
+       if (dev->device_rev >= 41) {
+               struct can_priv *priv = netdev_priv(dev->netdev);
+
+               priv->ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT |
+                                           CAN_CTRLMODE_LOOPBACK;
+       } else {
+               dev_info(dev->netdev->dev.parent,
+                        "Firmware update available. Please contact support@peak-system.com\n");
+       }
+
        dev_info(dev->netdev->dev.parent,
                 "PEAK-System %s adapter hwrev %u serial %08X (%u channel)\n",
                 pcan_usb.name, dev->device_rev, serial_number,
index b23e348..bd1417a 100644 (file)
@@ -2016,15 +2016,6 @@ int b53_br_flags(struct dsa_switch *ds, int port,
 }
 EXPORT_SYMBOL(b53_br_flags);
 
-int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
-                   struct netlink_ext_ack *extack)
-{
-       b53_port_set_mcast_flood(ds->priv, port, mrouter);
-
-       return 0;
-}
-EXPORT_SYMBOL(b53_set_mrouter);
-
 static bool b53_possible_cpu_port(struct dsa_switch *ds, int port)
 {
        /* Broadcom switches will accept enabling Broadcom tags on the
@@ -2268,7 +2259,6 @@ static const struct dsa_switch_ops b53_switch_ops = {
        .port_bridge_leave      = b53_br_leave,
        .port_pre_bridge_flags  = b53_br_flags_pre,
        .port_bridge_flags      = b53_br_flags,
-       .port_set_mrouter       = b53_set_mrouter,
        .port_stp_state_set     = b53_br_set_stp_state,
        .port_fast_age          = b53_br_fast_age,
        .port_vlan_filtering    = b53_vlan_filtering,
index 82700a5..9bf8319 100644 (file)
@@ -328,8 +328,6 @@ int b53_br_flags_pre(struct dsa_switch *ds, int port,
 int b53_br_flags(struct dsa_switch *ds, int port,
                 struct switchdev_brport_flags flags,
                 struct netlink_ext_ack *extack);
-int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
-                   struct netlink_ext_ack *extack);
 int b53_setup_devlink_resources(struct dsa_switch *ds);
 void b53_port_event(struct dsa_switch *ds, int port);
 void b53_phylink_validate(struct dsa_switch *ds, int port,
index 3b018fc..6ce9ec1 100644 (file)
@@ -1199,7 +1199,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
        .port_pre_bridge_flags  = b53_br_flags_pre,
        .port_bridge_flags      = b53_br_flags,
        .port_stp_state_set     = b53_br_set_stp_state,
-       .port_set_mrouter       = b53_set_mrouter,
        .port_fast_age          = b53_br_fast_age,
        .port_vlan_filtering    = b53_vlan_filtering,
        .port_vlan_add          = b53_vlan_add,
index 7062db6..542cfc4 100644 (file)
@@ -1345,6 +1345,7 @@ static int hellcreek_setup(struct dsa_switch *ds)
         * filtering setups are not supported.
         */
        ds->vlan_filtering_is_global = true;
+       ds->needs_standalone_vlan_filtering = true;
 
        /* Intercept _all_ PTP multicast traffic */
        ret = hellcreek_setup_fdb(hellcreek);
index 632f0fc..d0cba2d 100644 (file)
@@ -367,8 +367,8 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid,
        int i;
 
        reg[1] |= vid & CVID_MASK;
-       if (vid > 1)
-               reg[1] |= ATA2_IVL;
+       reg[1] |= ATA2_IVL;
+       reg[1] |= ATA2_FID(FID_BRIDGED);
        reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER;
        reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP;
        /* STATIC_ENT indicate that entry is static wouldn't
@@ -1022,6 +1022,10 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
        mt7530_write(priv, MT7530_PCR_P(port),
                     PCR_MATRIX(dsa_user_ports(priv->ds)));
 
+       /* Set to fallback mode for independent VLAN learning */
+       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                  MT7530_PORT_FALLBACK_MODE);
+
        return 0;
 }
 
@@ -1144,7 +1148,8 @@ mt7530_stp_state_set(struct dsa_switch *ds, int port, u8 state)
                break;
        }
 
-       mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK, stp_state);
+       mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK(FID_BRIDGED),
+                  FID_PST(FID_BRIDGED, stp_state));
 }
 
 static int
@@ -1185,18 +1190,6 @@ mt7530_port_bridge_flags(struct dsa_switch *ds, int port,
        return 0;
 }
 
-static int
-mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
-                       struct netlink_ext_ack *extack)
-{
-       struct mt7530_priv *priv = ds->priv;
-
-       mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
-                  mrouter ? UNM_FFP(BIT(port)) : 0);
-
-       return 0;
-}
-
 static int
 mt7530_port_bridge_join(struct dsa_switch *ds, int port,
                        struct net_device *bridge)
@@ -1230,6 +1223,10 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port,
                           PCR_MATRIX_MASK, PCR_MATRIX(port_bitmap));
        priv->ports[port].pm |= PCR_MATRIX(port_bitmap);
 
+       /* Set to fallback mode for independent VLAN learning */
+       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                  MT7530_PORT_FALLBACK_MODE);
+
        mutex_unlock(&priv->reg_mutex);
 
        return 0;
@@ -1242,15 +1239,22 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port)
        bool all_user_ports_removed = true;
        int i;
 
-       /* When a port is removed from the bridge, the port would be set up
-        * back to the default as is at initial boot which is a VLAN-unaware
-        * port.
+       /* This is called after .port_bridge_leave when leaving a VLAN-aware
+        * bridge. Don't set standalone ports to fallback mode.
         */
-       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
-                  MT7530_PORT_MATRIX_MODE);
-       mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK,
+       if (dsa_to_port(ds, port)->bridge_dev)
+               mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                          MT7530_PORT_FALLBACK_MODE);
+
+       mt7530_rmw(priv, MT7530_PVC_P(port),
+                  VLAN_ATTR_MASK | PVC_EG_TAG_MASK | ACC_FRM_MASK,
                   VLAN_ATTR(MT7530_VLAN_TRANSPARENT) |
-                  PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
+                  PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT) |
+                  MT7530_VLAN_ACC_ALL);
+
+       /* Set PVID to 0 */
+       mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                  G0_PORT_VID_DEF);
 
        for (i = 0; i < MT7530_NUM_PORTS; i++) {
                if (dsa_is_user_port(ds, i) &&
@@ -1277,15 +1281,19 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port)
        struct mt7530_priv *priv = ds->priv;
 
        /* Trapped into security mode allows packet forwarding through VLAN
-        * table lookup. CPU port is set to fallback mode to let untagged
-        * frames pass through.
+        * table lookup.
         */
-       if (dsa_is_cpu_port(ds, port))
-               mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
-                          MT7530_PORT_FALLBACK_MODE);
-       else
+       if (dsa_is_user_port(ds, port)) {
                mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
                           MT7530_PORT_SECURITY_MODE);
+               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                          G0_PORT_VID(priv->ports[port].pvid));
+
+               /* Only accept tagged frames if PVID is not set */
+               if (!priv->ports[port].pvid)
+                       mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                                  MT7530_VLAN_ACC_TAGGED);
+       }
 
        /* Set the port as a user port which is to be able to recognize VID
         * from incoming packets before fetching entry within the VLAN table.
@@ -1308,11 +1316,8 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
                /* Remove this port from the port matrix of the other ports
                 * in the same bridge. If the port is disabled, port matrix
                 * is kept and not being setup until the port becomes enabled.
-                * And the other port's port matrix cannot be broken when the
-                * other port is still a VLAN-aware port.
                 */
-               if (dsa_is_user_port(ds, i) && i != port &&
-                  !dsa_port_is_vlan_filtering(dsa_to_port(ds, i))) {
+               if (dsa_is_user_port(ds, i) && i != port) {
                        if (dsa_to_port(ds, i)->bridge_dev != bridge)
                                continue;
                        if (priv->ports[i].enable)
@@ -1330,6 +1335,13 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
                           PCR_MATRIX(BIT(MT7530_CPU_PORT)));
        priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT));
 
+       /* When a port is removed from the bridge, the port would be set up
+        * back to the default as is at initial boot which is a VLAN-unaware
+        * port.
+        */
+       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                  MT7530_PORT_MATRIX_MODE);
+
        mutex_unlock(&priv->reg_mutex);
 }
 
@@ -1512,7 +1524,8 @@ mt7530_hw_vlan_add(struct mt7530_priv *priv,
        /* Validate the entry with independent learning, create egress tag per
         * VLAN and joining the port as one of the port members.
         */
-       val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | VLAN_VALID;
+       val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | FID(FID_BRIDGED) |
+             VLAN_VALID;
        mt7530_write(priv, MT7530_VAWD1, val);
 
        /* Decide whether adding tag or not for those outgoing packets from the
@@ -1586,6 +1599,21 @@ mt7530_hw_vlan_update(struct mt7530_priv *priv, u16 vid,
        mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, vid);
 }
 
+static int
+mt7530_setup_vlan0(struct mt7530_priv *priv)
+{
+       u32 val;
+
+       /* Validate the entry with independent learning, keep the original
+        * ingress tag attribute.
+        */
+       val = IVL_MAC | EG_CON | PORT_MEM(MT7530_ALL_MEMBERS) | FID(FID_BRIDGED) |
+             VLAN_VALID;
+       mt7530_write(priv, MT7530_VAWD1, val);
+
+       return mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, 0);
+}
+
 static int
 mt7530_port_vlan_add(struct dsa_switch *ds, int port,
                     const struct switchdev_obj_port_vlan *vlan,
@@ -1602,9 +1630,28 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port,
        mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add);
 
        if (pvid) {
-               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
-                          G0_PORT_VID(vlan->vid));
                priv->ports[port].pvid = vlan->vid;
+
+               /* Accept all frames if PVID is set */
+               mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                          MT7530_VLAN_ACC_ALL);
+
+               /* Only configure PVID if VLAN filtering is enabled */
+               if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+                       mt7530_rmw(priv, MT7530_PPBV1_P(port),
+                                  G0_PORT_VID_MASK,
+                                  G0_PORT_VID(vlan->vid));
+       } else if (vlan->vid && priv->ports[port].pvid == vlan->vid) {
+               /* This VLAN is overwritten without PVID, so unset it */
+               priv->ports[port].pvid = G0_PORT_VID_DEF;
+
+               /* Only accept tagged frames if the port is VLAN-aware */
+               if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+                       mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                                  MT7530_VLAN_ACC_TAGGED);
+
+               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                          G0_PORT_VID_DEF);
        }
 
        mutex_unlock(&priv->reg_mutex);
@@ -1618,11 +1665,9 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
 {
        struct mt7530_hw_vlan_entry target_entry;
        struct mt7530_priv *priv = ds->priv;
-       u16 pvid;
 
        mutex_lock(&priv->reg_mutex);
 
-       pvid = priv->ports[port].pvid;
        mt7530_hw_vlan_entry_init(&target_entry, port, 0);
        mt7530_hw_vlan_update(priv, vlan->vid, &target_entry,
                              mt7530_hw_vlan_del);
@@ -1630,11 +1675,18 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
        /* PVID is being restored to the default whenever the PVID port
         * is being removed from the VLAN.
         */
-       if (pvid == vlan->vid)
-               pvid = G0_PORT_VID_DEF;
+       if (priv->ports[port].pvid == vlan->vid) {
+               priv->ports[port].pvid = G0_PORT_VID_DEF;
+
+               /* Only accept tagged frames if the port is VLAN-aware */
+               if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+                       mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                                  MT7530_VLAN_ACC_TAGGED);
+
+               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                          G0_PORT_VID_DEF);
+       }
 
-       mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, pvid);
-       priv->ports[port].pvid = pvid;
 
        mutex_unlock(&priv->reg_mutex);
 
@@ -1718,15 +1770,7 @@ static enum dsa_tag_protocol
 mtk_get_tag_protocol(struct dsa_switch *ds, int port,
                     enum dsa_tag_protocol mp)
 {
-       struct mt7530_priv *priv = ds->priv;
-
-       if (port != MT7530_CPU_PORT) {
-               dev_warn(priv->dev,
-                        "port not matched with tagging CPU port\n");
-               return DSA_TAG_PROTO_NONE;
-       } else {
-               return DSA_TAG_PROTO_MTK;
-       }
+       return DSA_TAG_PROTO_MTK;
 }
 
 #ifdef CONFIG_GPIOLIB
@@ -2055,6 +2099,7 @@ mt7530_setup(struct dsa_switch *ds)
         * as two netdev instances.
         */
        dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent;
+       ds->assisted_learning_on_cpu_port = true;
        ds->mtu_enforcement_ingress = true;
 
        if (priv->id == ID_MT7530) {
@@ -2125,6 +2170,9 @@ mt7530_setup(struct dsa_switch *ds)
                mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
                           PCR_MATRIX_CLR);
 
+               /* Disable learning by default on all ports */
+               mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+
                if (dsa_is_cpu_port(ds, i)) {
                        ret = mt753x_cpu_port_enable(ds, i);
                        if (ret)
@@ -2132,14 +2180,20 @@ mt7530_setup(struct dsa_switch *ds)
                } else {
                        mt7530_port_disable(ds, i);
 
-                       /* Disable learning by default on all user ports */
-                       mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+                       /* Set default PVID to 0 on all user ports */
+                       mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
+                                  G0_PORT_VID_DEF);
                }
                /* Enable consistent egress tag */
                mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
                           PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
        }
 
+       /* Setup VLAN ID 0 for VLAN-unaware bridges */
+       ret = mt7530_setup_vlan0(priv);
+       if (ret)
+               return ret;
+
        /* Setup port 5 */
        priv->p5_intf_sel = P5_DISABLED;
        interface = PHY_INTERFACE_MODE_NA;
@@ -2290,6 +2344,9 @@ mt7531_setup(struct dsa_switch *ds)
                mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
                           PCR_MATRIX_CLR);
 
+               /* Disable learning by default on all ports */
+               mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+
                mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR);
 
                if (dsa_is_cpu_port(ds, i)) {
@@ -2299,8 +2356,9 @@ mt7531_setup(struct dsa_switch *ds)
                } else {
                        mt7530_port_disable(ds, i);
 
-                       /* Disable learning by default on all user ports */
-                       mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+                       /* Set default PVID to 0 on all user ports */
+                       mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
+                                  G0_PORT_VID_DEF);
                }
 
                /* Enable consistent egress tag */
@@ -2308,6 +2366,12 @@ mt7531_setup(struct dsa_switch *ds)
                           PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
        }
 
+       /* Setup VLAN ID 0 for VLAN-unaware bridges */
+       ret = mt7530_setup_vlan0(priv);
+       if (ret)
+               return ret;
+
+       ds->assisted_learning_on_cpu_port = true;
        ds->mtu_enforcement_ingress = true;
 
        /* Flush the FDB table */
@@ -3061,7 +3125,6 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
        .port_stp_state_set     = mt7530_stp_state_set,
        .port_pre_bridge_flags  = mt7530_port_pre_bridge_flags,
        .port_bridge_flags      = mt7530_port_bridge_flags,
-       .port_set_mrouter       = mt7530_port_set_mrouter,
        .port_bridge_join       = mt7530_port_bridge_join,
        .port_bridge_leave      = mt7530_port_bridge_leave,
        .port_fdb_add           = mt7530_port_fdb_add,
index b19b389..91508e2 100644 (file)
@@ -80,6 +80,7 @@ enum mt753x_bpdu_port_fw {
 #define  STATIC_ENT                    3
 #define MT7530_ATA2                    0x78
 #define  ATA2_IVL                      BIT(15)
+#define  ATA2_FID(x)                   (((x) & 0x7) << 12)
 
 /* Register for address table write data */
 #define MT7530_ATWD                    0x7c
@@ -144,15 +145,24 @@ enum mt7530_vlan_cmd {
 #define  PORT_STAG                     BIT(31)
 /* Independent VLAN Learning */
 #define  IVL_MAC                       BIT(30)
+/* Egress Tag Consistent */
+#define  EG_CON                                BIT(29)
 /* Per VLAN Egress Tag Control */
 #define  VTAG_EN                       BIT(28)
 /* VLAN Member Control */
 #define  PORT_MEM(x)                   (((x) & 0xff) << 16)
+/* Filter ID */
+#define  FID(x)                                (((x) & 0x7) << 1)
 /* VLAN Entry Valid */
 #define  VLAN_VALID                    BIT(0)
 #define  PORT_MEM_SHFT                 16
 #define  PORT_MEM_MASK                 0xff
 
+enum mt7530_fid {
+       FID_STANDALONE = 0,
+       FID_BRIDGED = 1,
+};
+
 #define MT7530_VAWD2                   0x98
 /* Egress Tag Control */
 #define  ETAG_CTRL_P(p, x)             (((x) & 0x3) << ((p) << 1))
@@ -179,8 +189,8 @@ enum mt7530_vlan_egress_attr {
 
 /* Register for port STP state control */
 #define MT7530_SSP_P(x)                        (0x2000 + ((x) * 0x100))
-#define  FID_PST(x)                    ((x) & 0x3)
-#define  FID_PST_MASK                  FID_PST(0x3)
+#define  FID_PST(fid, state)           (((state) & 0x3) << ((fid) * 2))
+#define  FID_PST_MASK(fid)             FID_PST(fid, 0x3)
 
 enum mt7530_stp_state {
        MT7530_STP_DISABLED = 0,
@@ -230,6 +240,7 @@ enum mt7530_port_mode {
 #define  PVC_EG_TAG_MASK               PVC_EG_TAG(7)
 #define  VLAN_ATTR(x)                  (((x) & 0x3) << 6)
 #define  VLAN_ATTR_MASK                        VLAN_ATTR(3)
+#define  ACC_FRM_MASK                  GENMASK(1, 0)
 
 enum mt7530_vlan_port_eg_tag {
        MT7530_VLAN_EG_DISABLED = 0,
@@ -241,13 +252,19 @@ enum mt7530_vlan_port_attr {
        MT7530_VLAN_TRANSPARENT = 3,
 };
 
+enum mt7530_vlan_port_acc_frm {
+       MT7530_VLAN_ACC_ALL = 0,
+       MT7530_VLAN_ACC_TAGGED = 1,
+       MT7530_VLAN_ACC_UNTAGGED = 2,
+};
+
 #define  STAG_VPID                     (((x) & 0xffff) << 16)
 
 /* Register for port port-and-protocol based vlan 1 control */
 #define MT7530_PPBV1_P(x)              (0x2014 + ((x) * 0x100))
 #define  G0_PORT_VID(x)                        (((x) & 0xfff) << 0)
 #define  G0_PORT_VID_MASK              G0_PORT_VID(0xfff)
-#define  G0_PORT_VID_DEF               G0_PORT_VID(1)
+#define  G0_PORT_VID_DEF               G0_PORT_VID(0)
 
 /* Register for port MAC control register */
 #define MT7530_PMCR_P(x)               (0x3000 + ((x) * 0x100))
index 634a48e..7a2445a 100644 (file)
@@ -2,6 +2,7 @@
 config NET_DSA_MV88E6XXX
        tristate "Marvell 88E6xxx Ethernet switch fabric support"
        depends on NET_DSA
+       depends on PTP_1588_CLOCK_OPTIONAL
        select IRQ_DOMAIN
        select NET_DSA_TAG_EDSA
        select NET_DSA_TAG_DSA
index 272b053..c45ca24 100644 (file)
@@ -1221,14 +1221,36 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port)
        bool found = false;
        u16 pvlan;
 
-       list_for_each_entry(dp, &dst->ports, list) {
-               if (dp->ds->index == dev && dp->index == port) {
+       /* dev is a physical switch */
+       if (dev <= dst->last_switch) {
+               list_for_each_entry(dp, &dst->ports, list) {
+                       if (dp->ds->index == dev && dp->index == port) {
+                               /* dp might be a DSA link or a user port, so it
+                                * might or might not have a bridge_dev
+                                * pointer. Use the "found" variable for both
+                                * cases.
+                                */
+                               br = dp->bridge_dev;
+                               found = true;
+                               break;
+                       }
+               }
+       /* dev is a virtual bridge */
+       } else {
+               list_for_each_entry(dp, &dst->ports, list) {
+                       if (dp->bridge_num < 0)
+                               continue;
+
+                       if (dp->bridge_num + 1 + dst->last_switch != dev)
+                               continue;
+
+                       br = dp->bridge_dev;
                        found = true;
                        break;
                }
        }
 
-       /* Prevent frames from unknown switch or port */
+       /* Prevent frames from unknown switch or virtual bridge */
        if (!found)
                return 0;
 
@@ -1236,7 +1258,6 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port)
        if (dp->type == DSA_PORT_TYPE_CPU || dp->type == DSA_PORT_TYPE_DSA)
                return mv88e6xxx_port_mask(chip);
 
-       br = dp->bridge_dev;
        pvlan = 0;
 
        /* Frames from user ports can egress any local DSA links and CPU ports,
@@ -2422,6 +2443,44 @@ static void mv88e6xxx_crosschip_bridge_leave(struct dsa_switch *ds,
        mv88e6xxx_reg_unlock(chip);
 }
 
+/* Treat the software bridge as a virtual single-port switch behind the
+ * CPU and map in the PVT. First dst->last_switch elements are taken by
+ * physical switches, so start from beyond that range.
+ */
+static int mv88e6xxx_map_virtual_bridge_to_pvt(struct dsa_switch *ds,
+                                              int bridge_num)
+{
+       u8 dev = bridge_num + ds->dst->last_switch + 1;
+       struct mv88e6xxx_chip *chip = ds->priv;
+       int err;
+
+       mv88e6xxx_reg_lock(chip);
+       err = mv88e6xxx_pvt_map(chip, dev, 0);
+       mv88e6xxx_reg_unlock(chip);
+
+       return err;
+}
+
+static int mv88e6xxx_bridge_tx_fwd_offload(struct dsa_switch *ds, int port,
+                                          struct net_device *br,
+                                          int bridge_num)
+{
+       return mv88e6xxx_map_virtual_bridge_to_pvt(ds, bridge_num);
+}
+
+static void mv88e6xxx_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
+                                             struct net_device *br,
+                                             int bridge_num)
+{
+       int err;
+
+       err = mv88e6xxx_map_virtual_bridge_to_pvt(ds, bridge_num);
+       if (err) {
+               dev_err(ds->dev, "failed to remap cross-chip Port VLAN: %pe\n",
+                       ERR_PTR(err));
+       }
+}
+
 static int mv88e6xxx_software_reset(struct mv88e6xxx_chip *chip)
 {
        if (chip->info->ops->reset)
@@ -3025,6 +3084,15 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
        chip->ds = ds;
        ds->slave_mii_bus = mv88e6xxx_default_mdio_bus(chip);
 
+       /* Since virtual bridges are mapped in the PVT, the number we support
+        * depends on the physical switch topology. We need to let DSA figure
+        * that out and therefore we cannot set this at dsa_register_switch()
+        * time.
+        */
+       if (mv88e6xxx_has_pvt(chip))
+               ds->num_fwd_offloading_bridges = MV88E6XXX_MAX_PVT_SWITCHES -
+                                                ds->dst->last_switch - 1;
+
        mv88e6xxx_reg_lock(chip);
 
        if (chip->info->ops->setup_errata) {
@@ -5729,7 +5797,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
                                       struct netlink_ext_ack *extack)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
-       bool do_fast_age = false;
        int err = -EOPNOTSUPP;
 
        mv88e6xxx_reg_lock(chip);
@@ -5741,9 +5808,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
                err = mv88e6xxx_port_set_assoc_vector(chip, port, pav);
                if (err)
                        goto out;
-
-               if (!learning)
-                       do_fast_age = true;
        }
 
        if (flags.mask & BR_FLOOD) {
@@ -5775,26 +5839,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
 out:
        mv88e6xxx_reg_unlock(chip);
 
-       if (do_fast_age)
-               mv88e6xxx_port_fast_age(ds, port);
-
-       return err;
-}
-
-static int mv88e6xxx_port_set_mrouter(struct dsa_switch *ds, int port,
-                                     bool mrouter,
-                                     struct netlink_ext_ack *extack)
-{
-       struct mv88e6xxx_chip *chip = ds->priv;
-       int err;
-
-       if (!chip->info->ops->port_set_mcast_flood)
-               return -EOPNOTSUPP;
-
-       mv88e6xxx_reg_lock(chip);
-       err = chip->info->ops->port_set_mcast_flood(chip, port, mrouter);
-       mv88e6xxx_reg_unlock(chip);
-
        return err;
 }
 
@@ -6099,7 +6143,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .port_bridge_leave      = mv88e6xxx_port_bridge_leave,
        .port_pre_bridge_flags  = mv88e6xxx_port_pre_bridge_flags,
        .port_bridge_flags      = mv88e6xxx_port_bridge_flags,
-       .port_set_mrouter       = mv88e6xxx_port_set_mrouter,
        .port_stp_state_set     = mv88e6xxx_port_stp_state_set,
        .port_fast_age          = mv88e6xxx_port_fast_age,
        .port_vlan_filtering    = mv88e6xxx_port_vlan_filtering,
@@ -6128,6 +6171,8 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
        .crosschip_lag_change   = mv88e6xxx_crosschip_lag_change,
        .crosschip_lag_join     = mv88e6xxx_crosschip_lag_join,
        .crosschip_lag_leave    = mv88e6xxx_crosschip_lag_leave,
+       .port_bridge_tx_fwd_offload = mv88e6xxx_bridge_tx_fwd_offload,
+       .port_bridge_tx_fwd_unoffload = mv88e6xxx_bridge_tx_fwd_unoffload,
 };
 
 static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip)
index 932b6b6..9948544 100644 (file)
@@ -5,6 +5,7 @@ config NET_DSA_MSCC_FELIX
        depends on NET_VENDOR_MICROSEMI
        depends on NET_VENDOR_FREESCALE
        depends on HAS_IOMEM
+       depends on PTP_1588_CLOCK_OPTIONAL
        select MSCC_OCELOT_SWITCH_LIB
        select NET_DSA_TAG_OCELOT_8021Q
        select NET_DSA_TAG_OCELOT
@@ -19,6 +20,7 @@ config NET_DSA_MSCC_SEVILLE
        depends on NET_DSA
        depends on NET_VENDOR_MICROSEMI
        depends on HAS_IOMEM
+       depends on PTP_1588_CLOCK_OPTIONAL
        select MSCC_OCELOT_SWITCH_LIB
        select NET_DSA_TAG_OCELOT_8021Q
        select NET_DSA_TAG_OCELOT
index a2a1591..3656e67 100644 (file)
@@ -231,11 +231,6 @@ static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid)
        return 0;
 }
 
-static const struct dsa_8021q_ops felix_tag_8021q_ops = {
-       .vlan_add       = felix_tag_8021q_vlan_add,
-       .vlan_del       = felix_tag_8021q_vlan_del,
-};
-
 /* Alternatively to using the NPI functionality, that same hardware MAC
  * connected internally to the enetc or fman DSA master can be configured to
  * use the software-defined tag_8021q frame format. As far as the hardware is
@@ -425,29 +420,18 @@ static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu)
        ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_MC);
        ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_BC);
 
-       felix->dsa_8021q_ctx = kzalloc(sizeof(*felix->dsa_8021q_ctx),
-                                      GFP_KERNEL);
-       if (!felix->dsa_8021q_ctx)
-               return -ENOMEM;
-
-       felix->dsa_8021q_ctx->ops = &felix_tag_8021q_ops;
-       felix->dsa_8021q_ctx->proto = htons(ETH_P_8021AD);
-       felix->dsa_8021q_ctx->ds = ds;
-
-       err = dsa_8021q_setup(felix->dsa_8021q_ctx, true);
+       err = dsa_tag_8021q_register(ds, htons(ETH_P_8021AD));
        if (err)
-               goto out_free_dsa_8021_ctx;
+               return err;
 
        err = felix_setup_mmio_filtering(felix);
        if (err)
-               goto out_teardown_dsa_8021q;
+               goto out_tag_8021q_unregister;
 
        return 0;
 
-out_teardown_dsa_8021q:
-       dsa_8021q_setup(felix->dsa_8021q_ctx, false);
-out_free_dsa_8021_ctx:
-       kfree(felix->dsa_8021q_ctx);
+out_tag_8021q_unregister:
+       dsa_tag_8021q_unregister(ds);
        return err;
 }
 
@@ -462,11 +446,7 @@ static void felix_teardown_tag_8021q(struct dsa_switch *ds, int cpu)
                dev_err(ds->dev, "felix_teardown_mmio_filtering returned %d",
                        err);
 
-       err = dsa_8021q_setup(felix->dsa_8021q_ctx, false);
-       if (err)
-               dev_err(ds->dev, "dsa_8021q_setup returned %d", err);
-
-       kfree(felix->dsa_8021q_ctx);
+       dsa_tag_8021q_unregister(ds);
 
        for (port = 0; port < ds->num_ports; port++) {
                if (dsa_is_unused_port(ds, port))
@@ -762,7 +742,8 @@ static int felix_lag_change(struct dsa_switch *ds, int port)
 }
 
 static int felix_vlan_prepare(struct dsa_switch *ds, int port,
-                             const struct switchdev_obj_port_vlan *vlan)
+                             const struct switchdev_obj_port_vlan *vlan,
+                             struct netlink_ext_ack *extack)
 {
        struct ocelot *ocelot = ds->priv;
        u16 flags = vlan->flags;
@@ -780,7 +761,8 @@ static int felix_vlan_prepare(struct dsa_switch *ds, int port,
 
        return ocelot_vlan_prepare(ocelot, port, vlan->vid,
                                   flags & BRIDGE_VLAN_INFO_PVID,
-                                  flags & BRIDGE_VLAN_INFO_UNTAGGED);
+                                  flags & BRIDGE_VLAN_INFO_UNTAGGED,
+                                  extack);
 }
 
 static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
@@ -788,7 +770,7 @@ static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
 {
        struct ocelot *ocelot = ds->priv;
 
-       return ocelot_port_vlan_filtering(ocelot, port, enabled);
+       return ocelot_port_vlan_filtering(ocelot, port, enabled, extack);
 }
 
 static int felix_vlan_add(struct dsa_switch *ds, int port,
@@ -799,7 +781,7 @@ static int felix_vlan_add(struct dsa_switch *ds, int port,
        u16 flags = vlan->flags;
        int err;
 
-       err = felix_vlan_prepare(ds, port, vlan);
+       err = felix_vlan_prepare(ds, port, vlan, extack);
        if (err)
                return err;
 
@@ -816,23 +798,6 @@ static int felix_vlan_del(struct dsa_switch *ds, int port,
        return ocelot_vlan_del(ocelot, port, vlan->vid);
 }
 
-static int felix_port_enable(struct dsa_switch *ds, int port,
-                            struct phy_device *phy)
-{
-       struct ocelot *ocelot = ds->priv;
-
-       ocelot_port_enable(ocelot, port, phy);
-
-       return 0;
-}
-
-static void felix_port_disable(struct dsa_switch *ds, int port)
-{
-       struct ocelot *ocelot = ds->priv;
-
-       return ocelot_port_disable(ocelot, port);
-}
-
 static void felix_phylink_validate(struct dsa_switch *ds, int port,
                                   unsigned long *supported,
                                   struct phylink_link_state *state)
@@ -861,25 +826,9 @@ static void felix_phylink_mac_link_down(struct dsa_switch *ds, int port,
                                        phy_interface_t interface)
 {
        struct ocelot *ocelot = ds->priv;
-       struct ocelot_port *ocelot_port = ocelot->ports[port];
-       int err;
-
-       ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
-                        DEV_MAC_ENA_CFG);
 
-       ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0);
-
-       err = ocelot_port_flush(ocelot, port);
-       if (err)
-               dev_err(ocelot->dev, "failed to flush port %d: %d\n",
-                       port, err);
-
-       /* Put the port in reset. */
-       ocelot_port_writel(ocelot_port,
-                          DEV_CLOCK_CFG_MAC_TX_RST |
-                          DEV_CLOCK_CFG_MAC_RX_RST |
-                          DEV_CLOCK_CFG_LINK_SPEED(OCELOT_SPEED_1000),
-                          DEV_CLOCK_CFG);
+       ocelot_phylink_mac_link_down(ocelot, port, link_an_mode, interface,
+                                    FELIX_MAC_QUIRKS);
 }
 
 static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port,
@@ -890,75 +839,11 @@ static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port,
                                      bool tx_pause, bool rx_pause)
 {
        struct ocelot *ocelot = ds->priv;
-       struct ocelot_port *ocelot_port = ocelot->ports[port];
        struct felix *felix = ocelot_to_felix(ocelot);
-       u32 mac_fc_cfg;
-
-       /* Take port out of reset by clearing the MAC_TX_RST, MAC_RX_RST and
-        * PORT_RST bits in DEV_CLOCK_CFG. Note that the way this system is
-        * integrated is that the MAC speed is fixed and it's the PCS who is
-        * performing the rate adaptation, so we have to write "1000Mbps" into
-        * the LINK_SPEED field of DEV_CLOCK_CFG (which is also its default
-        * value).
-        */
-       ocelot_port_writel(ocelot_port,
-                          DEV_CLOCK_CFG_LINK_SPEED(OCELOT_SPEED_1000),
-                          DEV_CLOCK_CFG);
-
-       switch (speed) {
-       case SPEED_10:
-               mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(3);
-               break;
-       case SPEED_100:
-               mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(2);
-               break;
-       case SPEED_1000:
-       case SPEED_2500:
-               mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(1);
-               break;
-       default:
-               dev_err(ocelot->dev, "Unsupported speed on port %d: %d\n",
-                       port, speed);
-               return;
-       }
-
-       /* handle Rx pause in all cases, with 2500base-X this is used for rate
-        * adaptation.
-        */
-       mac_fc_cfg |= SYS_MAC_FC_CFG_RX_FC_ENA;
-
-       if (tx_pause)
-               mac_fc_cfg |= SYS_MAC_FC_CFG_TX_FC_ENA |
-                             SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) |
-                             SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) |
-                             SYS_MAC_FC_CFG_ZERO_PAUSE_ENA;
-
-       /* Flow control. Link speed is only used here to evaluate the time
-        * specification in incoming pause frames.
-        */
-       ocelot_write_rix(ocelot, mac_fc_cfg, SYS_MAC_FC_CFG, port);
-
-       ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
-
-       ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause);
-
-       /* Undo the effects of felix_phylink_mac_link_down:
-        * enable MAC module
-        */
-       ocelot_port_writel(ocelot_port, DEV_MAC_ENA_CFG_RX_ENA |
-                          DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG);
-
-       /* Enable receiving frames on the port, and activate auto-learning of
-        * MAC addresses.
-        */
-       ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO |
-                        ANA_PORT_PORT_CFG_RECV_ENA |
-                        ANA_PORT_PORT_CFG_PORTID_VAL(port),
-                        ANA_PORT_PORT_CFG, port);
 
-       /* Core: Enable port for frame transfer */
-       ocelot_fields_write(ocelot, port,
-                           QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
+       ocelot_phylink_mac_link_up(ocelot, port, phydev, link_an_mode,
+                                  interface, speed, duplex, tx_pause, rx_pause,
+                                  FELIX_MAC_QUIRKS);
 
        if (felix->info->port_sched_speed_set)
                felix->info->port_sched_speed_set(ocelot, port, speed);
@@ -1635,8 +1520,6 @@ const struct dsa_switch_ops felix_switch_ops = {
        .phylink_mac_config             = felix_phylink_mac_config,
        .phylink_mac_link_down          = felix_phylink_mac_link_down,
        .phylink_mac_link_up            = felix_phylink_mac_link_up,
-       .port_enable                    = felix_port_enable,
-       .port_disable                   = felix_port_disable,
        .port_fdb_dump                  = felix_fdb_dump,
        .port_fdb_add                   = felix_fdb_add,
        .port_fdb_del                   = felix_fdb_del,
@@ -1679,6 +1562,8 @@ const struct dsa_switch_ops felix_switch_ops = {
        .port_mrp_del                   = felix_mrp_del,
        .port_mrp_add_ring_role         = felix_mrp_add_ring_role,
        .port_mrp_del_ring_role         = felix_mrp_del_ring_role,
+       .tag_8021q_vlan_add             = felix_tag_8021q_vlan_add,
+       .tag_8021q_vlan_del             = felix_tag_8021q_vlan_del,
 };
 
 struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port)
index 4d96cad..5854bab 100644 (file)
@@ -5,6 +5,7 @@
 #define _MSCC_FELIX_H
 
 #define ocelot_to_felix(o)             container_of((o), struct felix, ocelot)
+#define FELIX_MAC_QUIRKS               OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION
 
 /* Platform-specific information */
 struct felix_info {
@@ -60,7 +61,6 @@ struct felix {
        struct lynx_pcs                 **pcs;
        resource_size_t                 switch_base;
        resource_size_t                 imdio_base;
-       struct dsa_8021q_context        *dsa_8021q_ctx;
        enum dsa_tag_protocol           tag_proto;
 };
 
index b29d41e..1291bba 100644 (file)
@@ -2,6 +2,7 @@
 config NET_DSA_SJA1105
 tristate "NXP SJA1105 Ethernet switch family support"
        depends on NET_DSA && SPI
+       depends on PTP_1588_CLOCK_OPTIONAL
        select NET_DSA_TAG_SJA1105
        select PCS_XPCS
        select PACKING
index 221c7ab..5e5d24e 100644 (file)
@@ -115,12 +115,6 @@ struct sja1105_info {
        const struct sja1105_dynamic_table_ops *dyn_ops;
        const struct sja1105_table_ops *static_ops;
        const struct sja1105_regs *regs;
-       /* Both E/T and P/Q/R/S have quirks when it comes to popping the S-Tag
-        * from double-tagged frames. E/T will pop it only when it's equal to
-        * TPID from the General Parameters Table, while P/Q/R/S will only
-        * pop it when it's equal to TPID2.
-        */
-       u16 qinq_tpid;
        bool can_limit_mcast_flood;
        int (*reset_cmd)(struct dsa_switch *ds);
        int (*setup_rgmii_delay)(const void *ctx, int port);
@@ -226,28 +220,13 @@ struct sja1105_flow_block {
        int num_virtual_links;
 };
 
-struct sja1105_bridge_vlan {
-       struct list_head list;
-       int port;
-       u16 vid;
-       bool pvid;
-       bool untagged;
-};
-
-enum sja1105_vlan_state {
-       SJA1105_VLAN_UNAWARE,
-       SJA1105_VLAN_BEST_EFFORT,
-       SJA1105_VLAN_FILTERING_FULL,
-};
-
 struct sja1105_private {
        struct sja1105_static_config static_config;
        bool rgmii_rx_delay[SJA1105_MAX_NUM_PORTS];
        bool rgmii_tx_delay[SJA1105_MAX_NUM_PORTS];
        phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS];
        bool fixed_link[SJA1105_MAX_NUM_PORTS];
-       bool best_effort_vlan_filtering;
-       unsigned long learn_ena;
+       bool vlan_aware;
        unsigned long ucast_egress_floods;
        unsigned long bcast_egress_floods;
        const struct sja1105_info *info;
@@ -255,16 +234,14 @@ struct sja1105_private {
        struct gpio_desc *reset_gpio;
        struct spi_device *spidev;
        struct dsa_switch *ds;
-       struct list_head dsa_8021q_vlans;
-       struct list_head bridge_vlans;
+       u16 bridge_pvid[SJA1105_MAX_NUM_PORTS];
+       u16 tag_8021q_pvid[SJA1105_MAX_NUM_PORTS];
        struct sja1105_flow_block flow_block;
        struct sja1105_port ports[SJA1105_MAX_NUM_PORTS];
        /* Serializes transmission of management frames so that
         * the switch doesn't confuse them with one another.
         */
        struct mutex mgmt_lock;
-       struct dsa_8021q_context *dsa_8021q_ctx;
-       enum sja1105_vlan_state vlan_state;
        struct devlink_region **regions;
        struct sja1105_cbs_entry *cbs;
        struct mii_bus *mdio_base_t1;
@@ -311,10 +288,6 @@ int sja1110_pcs_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val);
 /* From sja1105_devlink.c */
 int sja1105_devlink_setup(struct dsa_switch *ds);
 void sja1105_devlink_teardown(struct dsa_switch *ds);
-int sja1105_devlink_param_get(struct dsa_switch *ds, u32 id,
-                             struct devlink_param_gset_ctx *ctx);
-int sja1105_devlink_param_set(struct dsa_switch *ds, u32 id,
-                             struct devlink_param_gset_ctx *ctx);
 int sja1105_devlink_info_get(struct dsa_switch *ds,
                             struct devlink_info_req *req,
                             struct netlink_ext_ack *extack);
index b6a4a16..05c7f4c 100644 (file)
@@ -115,105 +115,6 @@ static void sja1105_teardown_devlink_regions(struct dsa_switch *ds)
        kfree(priv->regions);
 }
 
-static int sja1105_best_effort_vlan_filtering_get(struct sja1105_private *priv,
-                                                 bool *be_vlan)
-{
-       *be_vlan = priv->best_effort_vlan_filtering;
-
-       return 0;
-}
-
-static int sja1105_best_effort_vlan_filtering_set(struct sja1105_private *priv,
-                                                 bool be_vlan)
-{
-       struct dsa_switch *ds = priv->ds;
-       bool vlan_filtering;
-       int port;
-       int rc;
-
-       priv->best_effort_vlan_filtering = be_vlan;
-
-       rtnl_lock();
-       for (port = 0; port < ds->num_ports; port++) {
-               struct dsa_port *dp;
-
-               if (!dsa_is_user_port(ds, port))
-                       continue;
-
-               dp = dsa_to_port(ds, port);
-               vlan_filtering = dsa_port_is_vlan_filtering(dp);
-
-               rc = sja1105_vlan_filtering(ds, port, vlan_filtering, NULL);
-               if (rc)
-                       break;
-       }
-       rtnl_unlock();
-
-       return rc;
-}
-
-enum sja1105_devlink_param_id {
-       SJA1105_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
-       SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING,
-};
-
-int sja1105_devlink_param_get(struct dsa_switch *ds, u32 id,
-                             struct devlink_param_gset_ctx *ctx)
-{
-       struct sja1105_private *priv = ds->priv;
-       int err;
-
-       switch (id) {
-       case SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING:
-               err = sja1105_best_effort_vlan_filtering_get(priv,
-                                                            &ctx->val.vbool);
-               break;
-       default:
-               err = -EOPNOTSUPP;
-               break;
-       }
-
-       return err;
-}
-
-int sja1105_devlink_param_set(struct dsa_switch *ds, u32 id,
-                             struct devlink_param_gset_ctx *ctx)
-{
-       struct sja1105_private *priv = ds->priv;
-       int err;
-
-       switch (id) {
-       case SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING:
-               err = sja1105_best_effort_vlan_filtering_set(priv,
-                                                            ctx->val.vbool);
-               break;
-       default:
-               err = -EOPNOTSUPP;
-               break;
-       }
-
-       return err;
-}
-
-static const struct devlink_param sja1105_devlink_params[] = {
-       DSA_DEVLINK_PARAM_DRIVER(SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING,
-                                "best_effort_vlan_filtering",
-                                DEVLINK_PARAM_TYPE_BOOL,
-                                BIT(DEVLINK_PARAM_CMODE_RUNTIME)),
-};
-
-static int sja1105_setup_devlink_params(struct dsa_switch *ds)
-{
-       return dsa_devlink_params_register(ds, sja1105_devlink_params,
-                                          ARRAY_SIZE(sja1105_devlink_params));
-}
-
-static void sja1105_teardown_devlink_params(struct dsa_switch *ds)
-{
-       dsa_devlink_params_unregister(ds, sja1105_devlink_params,
-                                     ARRAY_SIZE(sja1105_devlink_params));
-}
-
 int sja1105_devlink_info_get(struct dsa_switch *ds,
                             struct devlink_info_req *req,
                             struct netlink_ext_ack *extack)
@@ -233,23 +134,10 @@ int sja1105_devlink_info_get(struct dsa_switch *ds,
 
 int sja1105_devlink_setup(struct dsa_switch *ds)
 {
-       int rc;
-
-       rc = sja1105_setup_devlink_params(ds);
-       if (rc)
-               return rc;
-
-       rc = sja1105_setup_devlink_regions(ds);
-       if (rc < 0) {
-               sja1105_teardown_devlink_params(ds);
-               return rc;
-       }
-
-       return 0;
+       return sja1105_setup_devlink_regions(ds);
 }
 
 void sja1105_devlink_teardown(struct dsa_switch *ds)
 {
-       sja1105_teardown_devlink_params(ds);
        sja1105_teardown_devlink_regions(ds);
 }
index 1477091..f2049f5 100644 (file)
@@ -1355,14 +1355,14 @@ u8 sja1105et_fdb_hash(struct sja1105_private *priv, const u8 *addr, u16 vid)
 {
        struct sja1105_l2_lookup_params_entry *l2_lookup_params =
                priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS].entries;
-       u64 poly_koopman = l2_lookup_params->poly;
+       u64 input, poly_koopman = l2_lookup_params->poly;
        /* Convert polynomial from Koopman to 'normal' notation */
        u8 poly = (u8)(1 + (poly_koopman << 1));
-       u64 vlanid = l2_lookup_params->shared_learn ? 0 : vid;
-       u64 input = (vlanid << 48) | ether_addr_to_u64(addr);
        u8 crc = 0; /* seed */
        int i;
 
+       input = ((u64)vid << 48) | ether_addr_to_u64(addr);
+
        /* Mask the eight bytes starting from MSB one at a time */
        for (i = 56; i >= 0; i -= 8) {
                u8 byte = (input & (0xffull << i)) >> i;
index 49eb0ac..2f8cc66 100644 (file)
@@ -26,9 +26,6 @@
 #include "sja1105_tas.h"
 
 #define SJA1105_UNKNOWN_MULTICAST      0x010000000000ull
-#define SJA1105_DEFAULT_VLAN           (VLAN_N_VID - 1)
-
-static const struct dsa_switch_ops sja1105_switch_ops;
 
 static void sja1105_hw_reset(struct gpio_desc *gpio, unsigned int pulse_len,
                             unsigned int startup_delay)
@@ -57,6 +54,93 @@ static bool sja1105_can_forward(struct sja1105_l2_forwarding_entry *l2_fwd,
        return !!(l2_fwd[from].reach_port & BIT(to));
 }
 
+static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid)
+{
+       struct sja1105_vlan_lookup_entry *vlan;
+       int count, i;
+
+       vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries;
+       count = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entry_count;
+
+       for (i = 0; i < count; i++)
+               if (vlan[i].vlanid == vid)
+                       return i;
+
+       /* Return an invalid entry index if not found */
+       return -1;
+}
+
+static int sja1105_drop_untagged(struct dsa_switch *ds, int port, bool drop)
+{
+       struct sja1105_private *priv = ds->priv;
+       struct sja1105_mac_config_entry *mac;
+
+       mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
+
+       if (mac[port].drpuntag == drop)
+               return 0;
+
+       mac[port].drpuntag = drop;
+
+       return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
+                                           &mac[port], true);
+}
+
+static int sja1105_pvid_apply(struct sja1105_private *priv, int port, u16 pvid)
+{
+       struct sja1105_mac_config_entry *mac;
+
+       mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
+
+       if (mac[port].vlanid == pvid)
+               return 0;
+
+       mac[port].vlanid = pvid;
+
+       return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
+                                           &mac[port], true);
+}
+
+static int sja1105_commit_pvid(struct dsa_switch *ds, int port)
+{
+       struct dsa_port *dp = dsa_to_port(ds, port);
+       struct sja1105_private *priv = ds->priv;
+       struct sja1105_vlan_lookup_entry *vlan;
+       bool drop_untagged = false;
+       int match, rc;
+       u16 pvid;
+
+       if (dp->bridge_dev && br_vlan_enabled(dp->bridge_dev))
+               pvid = priv->bridge_pvid[port];
+       else
+               pvid = priv->tag_8021q_pvid[port];
+
+       rc = sja1105_pvid_apply(priv, port, pvid);
+       if (rc)
+               return rc;
+
+       /* Only force dropping of untagged packets when the port is under a
+        * VLAN-aware bridge. When the tag_8021q pvid is used, we are
+        * deliberately removing the RX VLAN from the port's VMEMB_PORT list,
+        * to prevent DSA tag spoofing from the link partner. Untagged packets
+        * are the only ones that should be received with tag_8021q, so
+        * definitely don't drop them.
+        */
+       if (pvid == priv->bridge_pvid[port]) {
+               vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries;
+
+               match = sja1105_is_vlan_configured(priv, pvid);
+
+               if (match < 0 || !(vlan[match].vmemb_port & BIT(port)))
+                       drop_untagged = true;
+       }
+
+       if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+               drop_untagged = true;
+
+       return sja1105_drop_untagged(ds, port, drop_untagged);
+}
+
 static int sja1105_init_mac_settings(struct sja1105_private *priv)
 {
        struct sja1105_mac_config_entry default_mac = {
@@ -101,7 +185,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
        struct sja1105_mac_config_entry *mac;
        struct dsa_switch *ds = priv->ds;
        struct sja1105_table *table;
-       int i;
+       struct dsa_port *dp;
 
        table = &priv->static_config.tables[BLK_IDX_MAC_CONFIG];
 
@@ -120,14 +204,27 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
 
        mac = table->entries;
 
-       for (i = 0; i < ds->num_ports; i++) {
-               mac[i] = default_mac;
+       list_for_each_entry(dp, &ds->dst->ports, list) {
+               if (dp->ds != ds)
+                       continue;
+
+               mac[dp->index] = default_mac;
 
                /* Let sja1105_bridge_stp_state_set() keep address learning
-                * enabled for the CPU port.
+                * enabled for the DSA ports. CPU ports use software-assisted
+                * learning to ensure that only FDB entries belonging to the
+                * bridge are learned, and that they are learned towards all
+                * CPU ports in a cross-chip topology if multiple CPU ports
+                * exist.
                 */
-               if (dsa_is_cpu_port(ds, i))
-                       priv->learn_ena |= BIT(i);
+               if (dsa_port_is_dsa(dp))
+                       dp->learning = true;
+
+               /* Disallow untagged packets from being received on the
+                * CPU and DSA ports.
+                */
+               if (dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp))
+                       mac[dp->index].drpuntag = true;
        }
 
        return 0;
@@ -378,8 +475,6 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
        table->entry_count = 1;
 
        for (port = 0; port < ds->num_ports; port++) {
-               struct sja1105_bridge_vlan *v;
-
                if (dsa_is_unused_port(ds, port))
                        continue;
 
@@ -387,22 +482,10 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
                pvid.vlan_bc |= BIT(port);
                pvid.tag_port &= ~BIT(port);
 
-               v = kzalloc(sizeof(*v), GFP_KERNEL);
-               if (!v)
-                       return -ENOMEM;
-
-               v->port = port;
-               v->vid = SJA1105_DEFAULT_VLAN;
-               v->untagged = true;
-               if (dsa_is_cpu_port(ds, port))
-                       v->pvid = true;
-               list_add(&v->list, &priv->dsa_8021q_vlans);
-
-               v = kmemdup(v, sizeof(*v), GFP_KERNEL);
-               if (!v)
-                       return -ENOMEM;
-
-               list_add(&v->list, &priv->bridge_vlans);
+               if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
+                       priv->tag_8021q_pvid[port] = SJA1105_DEFAULT_VLAN;
+                       priv->bridge_pvid[port] = SJA1105_DEFAULT_VLAN;
+               }
        }
 
        ((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid;
@@ -413,8 +496,11 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 {
        struct sja1105_l2_forwarding_entry *l2fwd;
        struct dsa_switch *ds = priv->ds;
+       struct dsa_switch_tree *dst;
        struct sja1105_table *table;
-       int i, j;
+       struct dsa_link *dl;
+       int port, tc;
+       int from, to;
 
        table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING];
 
@@ -432,47 +518,109 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
 
        l2fwd = table->entries;
 
-       /* First 5 entries define the forwarding rules */
-       for (i = 0; i < ds->num_ports; i++) {
-               unsigned int upstream = dsa_upstream_port(priv->ds, i);
+       /* First 5 entries in the L2 Forwarding Table define the forwarding
+        * rules and the VLAN PCP to ingress queue mapping.
+        * Set up the ingress queue mapping first.
+        */
+       for (port = 0; port < ds->num_ports; port++) {
+               if (dsa_is_unused_port(ds, port))
+                       continue;
+
+               for (tc = 0; tc < SJA1105_NUM_TC; tc++)
+                       l2fwd[port].vlan_pmap[tc] = tc;
+       }
 
-               if (dsa_is_unused_port(ds, i))
+       /* Then manage the forwarding domain for user ports. These can forward
+        * only to the always-on domain (CPU port and DSA links)
+        */
+       for (from = 0; from < ds->num_ports; from++) {
+               if (!dsa_is_user_port(ds, from))
                        continue;
 
-               for (j = 0; j < SJA1105_NUM_TC; j++)
-                       l2fwd[i].vlan_pmap[j] = j;
+               for (to = 0; to < ds->num_ports; to++) {
+                       if (!dsa_is_cpu_port(ds, to) &&
+                           !dsa_is_dsa_port(ds, to))
+                               continue;
 
-               /* All ports start up with egress flooding enabled,
-                * including the CPU port.
-                */
-               priv->ucast_egress_floods |= BIT(i);
-               priv->bcast_egress_floods |= BIT(i);
+                       l2fwd[from].bc_domain |= BIT(to);
+                       l2fwd[from].fl_domain |= BIT(to);
+
+                       sja1105_port_allow_traffic(l2fwd, from, to, true);
+               }
+       }
+
+       /* Then manage the forwarding domain for DSA links and CPU ports (the
+        * always-on domain). These can send packets to any enabled port except
+        * themselves.
+        */
+       for (from = 0; from < ds->num_ports; from++) {
+               if (!dsa_is_cpu_port(ds, from) && !dsa_is_dsa_port(ds, from))
+                       continue;
+
+               for (to = 0; to < ds->num_ports; to++) {
+                       if (dsa_is_unused_port(ds, to))
+                               continue;
+
+                       if (from == to)
+                               continue;
+
+                       l2fwd[from].bc_domain |= BIT(to);
+                       l2fwd[from].fl_domain |= BIT(to);
+
+                       sja1105_port_allow_traffic(l2fwd, from, to, true);
+               }
+       }
+
+       /* In odd topologies ("H" connections where there is a DSA link to
+        * another switch which also has its own CPU port), TX packets can loop
+        * back into the system (they are flooded from CPU port 1 to the DSA
+        * link, and from there to CPU port 2). Prevent this from happening by
+        * cutting RX from DSA links towards our CPU port, if the remote switch
+        * has its own CPU port and therefore doesn't need ours for network
+        * stack termination.
+        */
+       dst = ds->dst;
 
-               if (i == upstream)
+       list_for_each_entry(dl, &dst->rtable, list) {
+               if (dl->dp->ds != ds || dl->link_dp->cpu_dp == dl->dp->cpu_dp)
                        continue;
 
-               sja1105_port_allow_traffic(l2fwd, i, upstream, true);
-               sja1105_port_allow_traffic(l2fwd, upstream, i, true);
+               from = dl->dp->index;
+               to = dsa_upstream_port(ds, from);
+
+               dev_warn(ds->dev,
+                        "H topology detected, cutting RX from DSA link %d to CPU port %d to prevent TX packet loops\n",
+                        from, to);
 
-               l2fwd[i].bc_domain = BIT(upstream);
-               l2fwd[i].fl_domain = BIT(upstream);
+               sja1105_port_allow_traffic(l2fwd, from, to, false);
 
-               l2fwd[upstream].bc_domain |= BIT(i);
-               l2fwd[upstream].fl_domain |= BIT(i);
+               l2fwd[from].bc_domain &= ~BIT(to);
+               l2fwd[from].fl_domain &= ~BIT(to);
+       }
+
+       /* Finally, manage the egress flooding domain. All ports start up with
+        * flooding enabled, including the CPU port and DSA links.
+        */
+       for (port = 0; port < ds->num_ports; port++) {
+               if (dsa_is_unused_port(ds, port))
+                       continue;
+
+               priv->ucast_egress_floods |= BIT(port);
+               priv->bcast_egress_floods |= BIT(port);
        }
 
        /* Next 8 entries define VLAN PCP mapping from ingress to egress.
         * Create a one-to-one mapping.
         */
-       for (i = 0; i < SJA1105_NUM_TC; i++) {
-               for (j = 0; j < ds->num_ports; j++) {
-                       if (dsa_is_unused_port(ds, j))
+       for (tc = 0; tc < SJA1105_NUM_TC; tc++) {
+               for (port = 0; port < ds->num_ports; port++) {
+                       if (dsa_is_unused_port(ds, port))
                                continue;
 
-                       l2fwd[ds->num_ports + i].vlan_pmap[j] = i;
+                       l2fwd[ds->num_ports + tc].vlan_pmap[port] = tc;
                }
 
-               l2fwd[ds->num_ports + i].type_egrpcp2outputq = true;
+               l2fwd[ds->num_ports + tc].type_egrpcp2outputq = true;
        }
 
        return 0;
@@ -551,18 +699,11 @@ void sja1105_frame_memory_partitioning(struct sja1105_private *priv)
 {
        struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
        struct sja1105_vl_forwarding_params_entry *vl_fwd_params;
-       int max_mem = priv->info->max_frame_mem;
        struct sja1105_table *table;
 
-       /* VLAN retagging is implemented using a loopback port that consumes
-        * frame buffers. That leaves less for us.
-        */
-       if (priv->vlan_state == SJA1105_VLAN_BEST_EFFORT)
-               max_mem -= SJA1105_FRAME_MEMORY_RETAGGING_OVERHEAD;
-
        table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING_PARAMS];
        l2_fwd_params = table->entries;
-       l2_fwd_params->part_spc[0] = max_mem;
+       l2_fwd_params->part_spc[0] = SJA1105_MAX_FRAME_MEMORY;
 
        /* If we have any critical-traffic virtual links, we need to reserve
         * some frame buffer memory for them. At the moment, hardcode the value
@@ -634,6 +775,72 @@ static void sja1110_select_tdmaconfigidx(struct sja1105_private *priv)
        general_params->tdmaconfigidx = tdmaconfigidx;
 }
 
+static int sja1105_init_topology(struct sja1105_private *priv,
+                                struct sja1105_general_params_entry *general_params)
+{
+       struct dsa_switch *ds = priv->ds;
+       int port;
+
+       /* The host port is the destination for traffic matching mac_fltres1
+        * and mac_fltres0 on all ports except itself. Default to an invalid
+        * value.
+        */
+       general_params->host_port = ds->num_ports;
+
+       /* Link-local traffic received on casc_port will be forwarded
+        * to host_port without embedding the source port and device ID
+        * info in the destination MAC address, and no RX timestamps will be
+        * taken either (presumably because it is a cascaded port and a
+        * downstream SJA switch already did that).
+        * To disable the feature, we need to do different things depending on
+        * switch generation. On SJA1105 we need to set an invalid port, while
+        * on SJA1110 which support multiple cascaded ports, this field is a
+        * bitmask so it must be left zero.
+        */
+       if (!priv->info->multiple_cascade_ports)
+               general_params->casc_port = ds->num_ports;
+
+       for (port = 0; port < ds->num_ports; port++) {
+               bool is_upstream = dsa_is_upstream_port(ds, port);
+               bool is_dsa_link = dsa_is_dsa_port(ds, port);
+
+               /* Upstream ports can be dedicated CPU ports or
+                * upstream-facing DSA links
+                */
+               if (is_upstream) {
+                       if (general_params->host_port == ds->num_ports) {
+                               general_params->host_port = port;
+                       } else {
+                               dev_err(ds->dev,
+                                       "Port %llu is already a host port, configuring %d as one too is not supported\n",
+                                       general_params->host_port, port);
+                               return -EINVAL;
+                       }
+               }
+
+               /* Cascade ports are downstream-facing DSA links */
+               if (is_dsa_link && !is_upstream) {
+                       if (priv->info->multiple_cascade_ports) {
+                               general_params->casc_port |= BIT(port);
+                       } else if (general_params->casc_port == ds->num_ports) {
+                               general_params->casc_port = port;
+                       } else {
+                               dev_err(ds->dev,
+                                       "Port %llu is already a cascade port, configuring %d as one too is not supported\n",
+                                       general_params->casc_port, port);
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       if (general_params->host_port == ds->num_ports) {
+               dev_err(ds->dev, "No host port configured\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int sja1105_init_general_params(struct sja1105_private *priv)
 {
        struct sja1105_general_params_entry default_general_params = {
@@ -652,12 +859,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
                .mac_flt0    = SJA1105_LINKLOCAL_FILTER_B_MASK,
                .incl_srcpt0 = false,
                .send_meta0  = false,
-               /* The destination for traffic matching mac_fltres1 and
-                * mac_fltres0 on all ports except host_port. Such traffic
-                * receieved on host_port itself would be dropped, except
-                * by installing a temporary 'management route'
-                */
-               .host_port = priv->ds->num_ports,
                /* Default to an invalid value */
                .mirr_port = priv->ds->num_ports,
                /* No TTEthernet */
@@ -677,16 +878,12 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
                .header_type = ETH_P_SJA1110,
        };
        struct sja1105_general_params_entry *general_params;
-       struct dsa_switch *ds = priv->ds;
        struct sja1105_table *table;
-       int port;
+       int rc;
 
-       for (port = 0; port < ds->num_ports; port++) {
-               if (dsa_is_cpu_port(ds, port)) {
-                       default_general_params.host_port = port;
-                       break;
-               }
-       }
+       rc = sja1105_init_topology(priv, &default_general_params);
+       if (rc)
+               return rc;
 
        table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
 
@@ -709,19 +906,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 
        sja1110_select_tdmaconfigidx(priv);
 
-       /* Link-local traffic received on casc_port will be forwarded
-        * to host_port without embedding the source port and device ID
-        * info in the destination MAC address, and no RX timestamps will be
-        * taken either (presumably because it is a cascaded port and a
-        * downstream SJA switch already did that).
-        * To disable the feature, we need to do different things depending on
-        * switch generation. On SJA1105 we need to set an invalid port, while
-        * on SJA1110 which support multiple cascaded ports, this field is a
-        * bitmask so it must be left zero.
-        */
-       if (!priv->info->multiple_cascade_ports)
-               general_params->casc_port = ds->num_ports;
-
        return 0;
 }
 
@@ -849,7 +1033,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
        for (port = 0; port < ds->num_ports; port++) {
                int mtu = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN;
 
-               if (dsa_is_cpu_port(priv->ds, port))
+               if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
                        mtu += VLAN_HLEN;
 
                policing[port].smax = 65535; /* Burst size in bytes */
@@ -1568,18 +1752,6 @@ static int sja1105_fdb_add(struct dsa_switch *ds, int port,
 {
        struct sja1105_private *priv = ds->priv;
 
-       /* dsa_8021q is in effect when the bridge's vlan_filtering isn't,
-        * so the switch still does some VLAN processing internally.
-        * But Shared VLAN Learning (SVL) is also active, and it will take
-        * care of autonomous forwarding between the unique pvid's of each
-        * port.  Here we just make sure that users can't add duplicate FDB
-        * entries when in this mode - the actual VID doesn't matter except
-        * for what gets printed in 'bridge fdb show'.  In the case of zero,
-        * no VID gets printed at all.
-        */
-       if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL)
-               vid = 0;
-
        return priv->info->fdb_add_cmd(ds, port, addr, vid);
 }
 
@@ -1588,9 +1760,6 @@ static int sja1105_fdb_del(struct dsa_switch *ds, int port,
 {
        struct sja1105_private *priv = ds->priv;
 
-       if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL)
-               vid = 0;
-
        return priv->info->fdb_del_cmd(ds, port, addr, vid);
 }
 
@@ -1633,7 +1802,7 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
                u64_to_ether_addr(l2_lookup.macaddr, macaddr);
 
                /* We need to hide the dsa_8021q VLANs from the user. */
-               if (priv->vlan_state == SJA1105_VLAN_UNAWARE)
+               if (!priv->vlan_aware)
                        l2_lookup.vlanid = 0;
                rc = cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data);
                if (rc)
@@ -1642,6 +1811,46 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
        return 0;
 }
 
+static void sja1105_fast_age(struct dsa_switch *ds, int port)
+{
+       struct sja1105_private *priv = ds->priv;
+       int i;
+
+       for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) {
+               struct sja1105_l2_lookup_entry l2_lookup = {0};
+               u8 macaddr[ETH_ALEN];
+               int rc;
+
+               rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+                                                i, &l2_lookup);
+               /* No fdb entry at i, not an issue */
+               if (rc == -ENOENT)
+                       continue;
+               if (rc) {
+                       dev_err(ds->dev, "Failed to read FDB: %pe\n",
+                               ERR_PTR(rc));
+                       return;
+               }
+
+               if (!(l2_lookup.destports & BIT(port)))
+                       continue;
+
+               /* Don't delete static FDB entries */
+               if (l2_lookup.lockeds)
+                       continue;
+
+               u64_to_ether_addr(l2_lookup.macaddr, macaddr);
+
+               rc = sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid);
+               if (rc) {
+                       dev_err(ds->dev,
+                               "Failed to delete FDB entry %pM vid %lld: %pe\n",
+                               macaddr, l2_lookup.vlanid, ERR_PTR(rc));
+                       return;
+               }
+       }
+}
+
 static int sja1105_mdb_add(struct dsa_switch *ds, int port,
                           const struct switchdev_obj_port_mdb *mdb)
 {
@@ -1740,12 +1949,17 @@ static int sja1105_bridge_member(struct dsa_switch *ds, int port,
        if (rc)
                return rc;
 
+       rc = sja1105_commit_pvid(ds, port);
+       if (rc)
+               return rc;
+
        return sja1105_manage_flood_domains(priv);
 }
 
 static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
                                         u8 state)
 {
+       struct dsa_port *dp = dsa_to_port(ds, port);
        struct sja1105_private *priv = ds->priv;
        struct sja1105_mac_config_entry *mac;
 
@@ -1771,12 +1985,12 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
        case BR_STATE_LEARNING:
                mac[port].ingress   = true;
                mac[port].egress    = false;
-               mac[port].dyn_learn = !!(priv->learn_ena & BIT(port));
+               mac[port].dyn_learn = dp->learning;
                break;
        case BR_STATE_FORWARDING:
                mac[port].ingress   = true;
                mac[port].egress    = true;
-               mac[port].dyn_learn = !!(priv->learn_ena & BIT(port));
+               mac[port].dyn_learn = dp->learning;
                break;
        default:
                dev_err(ds->dev, "invalid STP state: %d\n", state);
@@ -2039,826 +2253,52 @@ out:
        return rc;
 }
 
-static int sja1105_pvid_apply(struct sja1105_private *priv, int port, u16 pvid)
+static enum dsa_tag_protocol
+sja1105_get_tag_protocol(struct dsa_switch *ds, int port,
+                        enum dsa_tag_protocol mp)
 {
-       struct sja1105_mac_config_entry *mac;
-
-       mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
-
-       mac[port].vlanid = pvid;
+       struct sja1105_private *priv = ds->priv;
 
-       return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
-                                          &mac[port], true);
+       return priv->info->tag_proto;
 }
 
-static int sja1105_crosschip_bridge_join(struct dsa_switch *ds,
-                                        int tree_index, int sw_index,
-                                        int other_port, struct net_device *br)
+/* The TPID setting belongs to the General Parameters table,
+ * which can only be partially reconfigured at runtime (and not the TPID).
+ * So a switch reset is required.
+ */
+int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
+                          struct netlink_ext_ack *extack)
 {
-       struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index);
-       struct sja1105_private *other_priv = other_ds->priv;
+       struct sja1105_l2_lookup_params_entry *l2_lookup_params;
+       struct sja1105_general_params_entry *general_params;
        struct sja1105_private *priv = ds->priv;
-       int port, rc;
+       struct sja1105_table *table;
+       struct sja1105_rule *rule;
+       u16 tpid, tpid2;
+       int rc;
+
+       list_for_each_entry(rule, &priv->flow_block.rules, list) {
+               if (rule->type == SJA1105_RULE_VL) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Cannot change VLAN filtering with active VL rules");
+                       return -EBUSY;
+               }
+       }
+
+       if (enabled) {
+               /* Enable VLAN filtering. */
+               tpid  = ETH_P_8021Q;
+               tpid2 = ETH_P_8021AD;
+       } else {
+               /* Disable VLAN filtering. */
+               tpid  = ETH_P_SJA1105;
+               tpid2 = ETH_P_SJA1105;
+       }
 
-       if (other_ds->ops != &sja1105_switch_ops)
+       if (priv->vlan_aware == enabled)
                return 0;
 
-       for (port = 0; port < ds->num_ports; port++) {
-               if (!dsa_is_user_port(ds, port))
-                       continue;
-               if (dsa_to_port(ds, port)->bridge_dev != br)
-                       continue;
-
-               rc = dsa_8021q_crosschip_bridge_join(priv->dsa_8021q_ctx,
-                                                    port,
-                                                    other_priv->dsa_8021q_ctx,
-                                                    other_port);
-               if (rc)
-                       return rc;
-
-               rc = dsa_8021q_crosschip_bridge_join(other_priv->dsa_8021q_ctx,
-                                                    other_port,
-                                                    priv->dsa_8021q_ctx,
-                                                    port);
-               if (rc)
-                       return rc;
-       }
-
-       return 0;
-}
-
-static void sja1105_crosschip_bridge_leave(struct dsa_switch *ds,
-                                          int tree_index, int sw_index,
-                                          int other_port,
-                                          struct net_device *br)
-{
-       struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index);
-       struct sja1105_private *other_priv = other_ds->priv;
-       struct sja1105_private *priv = ds->priv;
-       int port;
-
-       if (other_ds->ops != &sja1105_switch_ops)
-               return;
-
-       for (port = 0; port < ds->num_ports; port++) {
-               if (!dsa_is_user_port(ds, port))
-                       continue;
-               if (dsa_to_port(ds, port)->bridge_dev != br)
-                       continue;
-
-               dsa_8021q_crosschip_bridge_leave(priv->dsa_8021q_ctx, port,
-                                                other_priv->dsa_8021q_ctx,
-                                                other_port);
-
-               dsa_8021q_crosschip_bridge_leave(other_priv->dsa_8021q_ctx,
-                                                other_port,
-                                                priv->dsa_8021q_ctx, port);
-       }
-}
-
-static int sja1105_setup_8021q_tagging(struct dsa_switch *ds, bool enabled)
-{
-       struct sja1105_private *priv = ds->priv;
-       int rc;
-
-       rc = dsa_8021q_setup(priv->dsa_8021q_ctx, enabled);
-       if (rc)
-               return rc;
-
-       dev_info(ds->dev, "%s switch tagging\n",
-                enabled ? "Enabled" : "Disabled");
-       return 0;
-}
-
-static enum dsa_tag_protocol
-sja1105_get_tag_protocol(struct dsa_switch *ds, int port,
-                        enum dsa_tag_protocol mp)
-{
-       struct sja1105_private *priv = ds->priv;
-
-       return priv->info->tag_proto;
-}
-
-static int sja1105_find_free_subvlan(u16 *subvlan_map, bool pvid)
-{
-       int subvlan;
-
-       if (pvid)
-               return 0;
-
-       for (subvlan = 1; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
-               if (subvlan_map[subvlan] == VLAN_N_VID)
-                       return subvlan;
-
-       return -1;
-}
-
-static int sja1105_find_subvlan(u16 *subvlan_map, u16 vid)
-{
-       int subvlan;
-
-       for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
-               if (subvlan_map[subvlan] == vid)
-                       return subvlan;
-
-       return -1;
-}
-
-static int sja1105_find_committed_subvlan(struct sja1105_private *priv,
-                                         int port, u16 vid)
-{
-       struct sja1105_port *sp = &priv->ports[port];
-
-       return sja1105_find_subvlan(sp->subvlan_map, vid);
-}
-
-static void sja1105_init_subvlan_map(u16 *subvlan_map)
-{
-       int subvlan;
-
-       for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
-               subvlan_map[subvlan] = VLAN_N_VID;
-}
-
-static void sja1105_commit_subvlan_map(struct sja1105_private *priv, int port,
-                                      u16 *subvlan_map)
-{
-       struct sja1105_port *sp = &priv->ports[port];
-       int subvlan;
-
-       for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
-               sp->subvlan_map[subvlan] = subvlan_map[subvlan];
-}
-
-static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid)
-{
-       struct sja1105_vlan_lookup_entry *vlan;
-       int count, i;
-
-       vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries;
-       count = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entry_count;
-
-       for (i = 0; i < count; i++)
-               if (vlan[i].vlanid == vid)
-                       return i;
-
-       /* Return an invalid entry index if not found */
-       return -1;
-}
-
-static int
-sja1105_find_retagging_entry(struct sja1105_retagging_entry *retagging,
-                            int count, int from_port, u16 from_vid,
-                            u16 to_vid)
-{
-       int i;
-
-       for (i = 0; i < count; i++)
-               if (retagging[i].ing_port == BIT(from_port) &&
-                   retagging[i].vlan_ing == from_vid &&
-                   retagging[i].vlan_egr == to_vid)
-                       return i;
-
-       /* Return an invalid entry index if not found */
-       return -1;
-}
-
-static int sja1105_commit_vlans(struct sja1105_private *priv,
-                               struct sja1105_vlan_lookup_entry *new_vlan,
-                               struct sja1105_retagging_entry *new_retagging,
-                               int num_retagging)
-{
-       struct sja1105_retagging_entry *retagging;
-       struct sja1105_vlan_lookup_entry *vlan;
-       struct sja1105_table *table;
-       int num_vlans = 0;
-       int rc, i, k = 0;
-
-       /* VLAN table */
-       table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
-       vlan = table->entries;
-
-       for (i = 0; i < VLAN_N_VID; i++) {
-               int match = sja1105_is_vlan_configured(priv, i);
-
-               if (new_vlan[i].vlanid != VLAN_N_VID)
-                       num_vlans++;
-
-               if (new_vlan[i].vlanid == VLAN_N_VID && match >= 0) {
-                       /* Was there before, no longer is. Delete */
-                       dev_dbg(priv->ds->dev, "Deleting VLAN %d\n", i);
-                       rc = sja1105_dynamic_config_write(priv,
-                                                         BLK_IDX_VLAN_LOOKUP,
-                                                         i, &vlan[match], false);
-                       if (rc < 0)
-                               return rc;
-               } else if (new_vlan[i].vlanid != VLAN_N_VID) {
-                       /* Nothing changed, don't do anything */
-                       if (match >= 0 &&
-                           vlan[match].vlanid == new_vlan[i].vlanid &&
-                           vlan[match].tag_port == new_vlan[i].tag_port &&
-                           vlan[match].vlan_bc == new_vlan[i].vlan_bc &&
-                           vlan[match].vmemb_port == new_vlan[i].vmemb_port)
-                               continue;
-                       /* Update entry */
-                       dev_dbg(priv->ds->dev, "Updating VLAN %d\n", i);
-                       rc = sja1105_dynamic_config_write(priv,
-                                                         BLK_IDX_VLAN_LOOKUP,
-                                                         i, &new_vlan[i],
-                                                         true);
-                       if (rc < 0)
-                               return rc;
-               }
-       }
-
-       if (table->entry_count)
-               kfree(table->entries);
-
-       table->entries = kcalloc(num_vlans, table->ops->unpacked_entry_size,
-                                GFP_KERNEL);
-       if (!table->entries)
-               return -ENOMEM;
-
-       table->entry_count = num_vlans;
-       vlan = table->entries;
-
-       for (i = 0; i < VLAN_N_VID; i++) {
-               if (new_vlan[i].vlanid == VLAN_N_VID)
-                       continue;
-               vlan[k++] = new_vlan[i];
-       }
-
-       /* VLAN Retagging Table */
-       table = &priv->static_config.tables[BLK_IDX_RETAGGING];
-       retagging = table->entries;
-
-       for (i = 0; i < table->entry_count; i++) {
-               rc = sja1105_dynamic_config_write(priv, BLK_IDX_RETAGGING,
-                                                 i, &retagging[i], false);
-               if (rc)
-                       return rc;
-       }
-
-       if (table->entry_count)
-               kfree(table->entries);
-
-       table->entries = kcalloc(num_retagging, table->ops->unpacked_entry_size,
-                                GFP_KERNEL);
-       if (!table->entries)
-               return -ENOMEM;
-
-       table->entry_count = num_retagging;
-       retagging = table->entries;
-
-       for (i = 0; i < num_retagging; i++) {
-               retagging[i] = new_retagging[i];
-
-               /* Update entry */
-               rc = sja1105_dynamic_config_write(priv, BLK_IDX_RETAGGING,
-                                                 i, &retagging[i], true);
-               if (rc < 0)
-                       return rc;
-       }
-
-       return 0;
-}
-
-struct sja1105_crosschip_vlan {
-       struct list_head list;
-       u16 vid;
-       bool untagged;
-       int port;
-       int other_port;
-       struct dsa_8021q_context *other_ctx;
-};
-
-struct sja1105_crosschip_switch {
-       struct list_head list;
-       struct dsa_8021q_context *other_ctx;
-};
-
-static int sja1105_commit_pvid(struct sja1105_private *priv)
-{
-       struct sja1105_bridge_vlan *v;
-       struct list_head *vlan_list;
-       int rc = 0;
-
-       if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL)
-               vlan_list = &priv->bridge_vlans;
-       else
-               vlan_list = &priv->dsa_8021q_vlans;
-
-       list_for_each_entry(v, vlan_list, list) {
-               if (v->pvid) {
-                       rc = sja1105_pvid_apply(priv, v->port, v->vid);
-                       if (rc)
-                               break;
-               }
-       }
-
-       return rc;
-}
-
-static int
-sja1105_build_bridge_vlans(struct sja1105_private *priv,
-                          struct sja1105_vlan_lookup_entry *new_vlan)
-{
-       struct sja1105_bridge_vlan *v;
-
-       if (priv->vlan_state == SJA1105_VLAN_UNAWARE)
-               return 0;
-
-       list_for_each_entry(v, &priv->bridge_vlans, list) {
-               int match = v->vid;
-
-               new_vlan[match].vlanid = v->vid;
-               new_vlan[match].vmemb_port |= BIT(v->port);
-               new_vlan[match].vlan_bc |= BIT(v->port);
-               if (!v->untagged)
-                       new_vlan[match].tag_port |= BIT(v->port);
-               new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
-       }
-
-       return 0;
-}
-
-static int
-sja1105_build_dsa_8021q_vlans(struct sja1105_private *priv,
-                             struct sja1105_vlan_lookup_entry *new_vlan)
-{
-       struct sja1105_bridge_vlan *v;
-
-       if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL)
-               return 0;
-
-       list_for_each_entry(v, &priv->dsa_8021q_vlans, list) {
-               int match = v->vid;
-
-               new_vlan[match].vlanid = v->vid;
-               new_vlan[match].vmemb_port |= BIT(v->port);
-               new_vlan[match].vlan_bc |= BIT(v->port);
-               if (!v->untagged)
-                       new_vlan[match].tag_port |= BIT(v->port);
-               new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
-       }
-
-       return 0;
-}
-
-static int sja1105_build_subvlans(struct sja1105_private *priv,
-                                 u16 subvlan_map[][DSA_8021Q_N_SUBVLAN],
-                                 struct sja1105_vlan_lookup_entry *new_vlan,
-                                 struct sja1105_retagging_entry *new_retagging,
-                                 int *num_retagging)
-{
-       struct sja1105_bridge_vlan *v;
-       int k = *num_retagging;
-
-       if (priv->vlan_state != SJA1105_VLAN_BEST_EFFORT)
-               return 0;
-
-       list_for_each_entry(v, &priv->bridge_vlans, list) {
-               int upstream = dsa_upstream_port(priv->ds, v->port);
-               int match, subvlan;
-               u16 rx_vid;
-
-               /* Only sub-VLANs on user ports need to be applied.
-                * Bridge VLANs also include VLANs added automatically
-                * by DSA on the CPU port.
-                */
-               if (!dsa_is_user_port(priv->ds, v->port))
-                       continue;
-
-               subvlan = sja1105_find_subvlan(subvlan_map[v->port],
-                                              v->vid);
-               if (subvlan < 0) {
-                       subvlan = sja1105_find_free_subvlan(subvlan_map[v->port],
-                                                           v->pvid);
-                       if (subvlan < 0) {
-                               dev_err(priv->ds->dev, "No more free subvlans\n");
-                               return -ENOSPC;
-                       }
-               }
-
-               rx_vid = dsa_8021q_rx_vid_subvlan(priv->ds, v->port, subvlan);
-
-               /* @v->vid on @v->port needs to be retagged to @rx_vid
-                * on @upstream. Assume @v->vid on @v->port and on
-                * @upstream was already configured by the previous
-                * iteration over bridge_vlans.
-                */
-               match = rx_vid;
-               new_vlan[match].vlanid = rx_vid;
-               new_vlan[match].vmemb_port |= BIT(v->port);
-               new_vlan[match].vmemb_port |= BIT(upstream);
-               new_vlan[match].vlan_bc |= BIT(v->port);
-               new_vlan[match].vlan_bc |= BIT(upstream);
-               /* The "untagged" flag is set the same as for the
-                * original VLAN
-                */
-               if (!v->untagged)
-                       new_vlan[match].tag_port |= BIT(v->port);
-               /* But it's always tagged towards the CPU */
-               new_vlan[match].tag_port |= BIT(upstream);
-               new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
-
-               /* The Retagging Table generates packet *clones* with
-                * the new VLAN. This is a very odd hardware quirk
-                * which we need to suppress by dropping the original
-                * packet.
-                * Deny egress of the original VLAN towards the CPU
-                * port. This will force the switch to drop it, and
-                * we'll see only the retagged packets.
-                */
-               match = v->vid;
-               new_vlan[match].vlan_bc &= ~BIT(upstream);
-
-               /* And the retagging itself */
-               new_retagging[k].vlan_ing = v->vid;
-               new_retagging[k].vlan_egr = rx_vid;
-               new_retagging[k].ing_port = BIT(v->port);
-               new_retagging[k].egr_port = BIT(upstream);
-               if (k++ == SJA1105_MAX_RETAGGING_COUNT) {
-                       dev_err(priv->ds->dev, "No more retagging rules\n");
-                       return -ENOSPC;
-               }
-
-               subvlan_map[v->port][subvlan] = v->vid;
-       }
-
-       *num_retagging = k;
-
-       return 0;
-}
-
-/* Sadly, in crosschip scenarios where the CPU port is also the link to another
- * switch, we should retag backwards (the dsa_8021q vid to the original vid) on
- * the CPU port of neighbour switches.
- */
-static int
-sja1105_build_crosschip_subvlans(struct sja1105_private *priv,
-                                struct sja1105_vlan_lookup_entry *new_vlan,
-                                struct sja1105_retagging_entry *new_retagging,
-                                int *num_retagging)
-{
-       struct sja1105_crosschip_vlan *tmp, *pos;
-       struct dsa_8021q_crosschip_link *c;
-       struct sja1105_bridge_vlan *v, *w;
-       struct list_head crosschip_vlans;
-       int k = *num_retagging;
-       int rc = 0;
-
-       if (priv->vlan_state != SJA1105_VLAN_BEST_EFFORT)
-               return 0;
-
-       INIT_LIST_HEAD(&crosschip_vlans);
-
-       list_for_each_entry(c, &priv->dsa_8021q_ctx->crosschip_links, list) {
-               struct sja1105_private *other_priv = c->other_ctx->ds->priv;
-
-               if (other_priv->vlan_state == SJA1105_VLAN_FILTERING_FULL)
-                       continue;
-
-               /* Crosschip links are also added to the CPU ports.
-                * Ignore those.
-                */
-               if (!dsa_is_user_port(priv->ds, c->port))
-                       continue;
-               if (!dsa_is_user_port(c->other_ctx->ds, c->other_port))
-                       continue;
-
-               /* Search for VLANs on the remote port */
-               list_for_each_entry(v, &other_priv->bridge_vlans, list) {
-                       bool already_added = false;
-                       bool we_have_it = false;
-
-                       if (v->port != c->other_port)
-                               continue;
-
-                       /* If @v is a pvid on @other_ds, it does not need
-                        * re-retagging, because its SVL field is 0 and we
-                        * already allow that, via the dsa_8021q crosschip
-                        * links.
-                        */
-                       if (v->pvid)
-                               continue;
-
-                       /* Search for the VLAN on our local port */
-                       list_for_each_entry(w, &priv->bridge_vlans, list) {
-                               if (w->port == c->port && w->vid == v->vid) {
-                                       we_have_it = true;
-                                       break;
-                               }
-                       }
-
-                       if (!we_have_it)
-                               continue;
-
-                       list_for_each_entry(tmp, &crosschip_vlans, list) {
-                               if (tmp->vid == v->vid &&
-                                   tmp->untagged == v->untagged &&
-                                   tmp->port == c->port &&
-                                   tmp->other_port == v->port &&
-                                   tmp->other_ctx == c->other_ctx) {
-                                       already_added = true;
-                                       break;
-                               }
-                       }
-
-                       if (already_added)
-                               continue;
-
-                       tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
-                       if (!tmp) {
-                               dev_err(priv->ds->dev, "Failed to allocate memory\n");
-                               rc = -ENOMEM;
-                               goto out;
-                       }
-                       tmp->vid = v->vid;
-                       tmp->port = c->port;
-                       tmp->other_port = v->port;
-                       tmp->other_ctx = c->other_ctx;
-                       tmp->untagged = v->untagged;
-                       list_add(&tmp->list, &crosschip_vlans);
-               }
-       }
-
-       list_for_each_entry(tmp, &crosschip_vlans, list) {
-               struct sja1105_private *other_priv = tmp->other_ctx->ds->priv;
-               int upstream = dsa_upstream_port(priv->ds, tmp->port);
-               int match, subvlan;
-               u16 rx_vid;
-
-               subvlan = sja1105_find_committed_subvlan(other_priv,
-                                                        tmp->other_port,
-                                                        tmp->vid);
-               /* If this happens, it's a bug. The neighbour switch does not
-                * have a subvlan for tmp->vid on tmp->other_port, but it
-                * should, since we already checked for its vlan_state.
-                */
-               if (WARN_ON(subvlan < 0)) {
-                       rc = -EINVAL;
-                       goto out;
-               }
-
-               rx_vid = dsa_8021q_rx_vid_subvlan(tmp->other_ctx->ds,
-                                                 tmp->other_port,
-                                                 subvlan);
-
-               /* The @rx_vid retagged from @tmp->vid on
-                * {@tmp->other_ds, @tmp->other_port} needs to be
-                * re-retagged to @tmp->vid on the way back to us.
-                *
-                * Assume the original @tmp->vid is already configured
-                * on this local switch, otherwise we wouldn't be
-                * retagging its subvlan on the other switch in the
-                * first place. We just need to add a reverse retagging
-                * rule for @rx_vid and install @rx_vid on our ports.
-                */
-               match = rx_vid;
-               new_vlan[match].vlanid = rx_vid;
-               new_vlan[match].vmemb_port |= BIT(tmp->port);
-               new_vlan[match].vmemb_port |= BIT(upstream);
-               /* The "untagged" flag is set the same as for the
-                * original VLAN. And towards the CPU, it doesn't
-                * really matter, because @rx_vid will only receive
-                * traffic on that port. For consistency with other dsa_8021q
-                * VLANs, we'll keep the CPU port tagged.
-                */
-               if (!tmp->untagged)
-                       new_vlan[match].tag_port |= BIT(tmp->port);
-               new_vlan[match].tag_port |= BIT(upstream);
-               new_vlan[match].type_entry = SJA1110_VLAN_D_TAG;
-               /* Deny egress of @rx_vid towards our front-panel port.
-                * This will force the switch to drop it, and we'll see
-                * only the re-retagged packets (having the original,
-                * pre-initial-retagging, VLAN @tmp->vid).
-                */
-               new_vlan[match].vlan_bc &= ~BIT(tmp->port);
-
-               /* On reverse retagging, the same ingress VLAN goes to multiple
-                * ports. So we have an opportunity to create composite rules
-                * to not waste the limited space in the retagging table.
-                */
-               k = sja1105_find_retagging_entry(new_retagging, *num_retagging,
-                                                upstream, rx_vid, tmp->vid);
-               if (k < 0) {
-                       if (*num_retagging == SJA1105_MAX_RETAGGING_COUNT) {
-                               dev_err(priv->ds->dev, "No more retagging rules\n");
-                               rc = -ENOSPC;
-                               goto out;
-                       }
-                       k = (*num_retagging)++;
-               }
-               /* And the retagging itself */
-               new_retagging[k].vlan_ing = rx_vid;
-               new_retagging[k].vlan_egr = tmp->vid;
-               new_retagging[k].ing_port = BIT(upstream);
-               new_retagging[k].egr_port |= BIT(tmp->port);
-       }
-
-out:
-       list_for_each_entry_safe(tmp, pos, &crosschip_vlans, list) {
-               list_del(&tmp->list);
-               kfree(tmp);
-       }
-
-       return rc;
-}
-
-static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify);
-
-static int sja1105_notify_crosschip_switches(struct sja1105_private *priv)
-{
-       struct sja1105_crosschip_switch *s, *pos;
-       struct list_head crosschip_switches;
-       struct dsa_8021q_crosschip_link *c;
-       int rc = 0;
-
-       INIT_LIST_HEAD(&crosschip_switches);
-
-       list_for_each_entry(c, &priv->dsa_8021q_ctx->crosschip_links, list) {
-               bool already_added = false;
-
-               list_for_each_entry(s, &crosschip_switches, list) {
-                       if (s->other_ctx == c->other_ctx) {
-                               already_added = true;
-                               break;
-                       }
-               }
-
-               if (already_added)
-                       continue;
-
-               s = kzalloc(sizeof(*s), GFP_KERNEL);
-               if (!s) {
-                       dev_err(priv->ds->dev, "Failed to allocate memory\n");
-                       rc = -ENOMEM;
-                       goto out;
-               }
-               s->other_ctx = c->other_ctx;
-               list_add(&s->list, &crosschip_switches);
-       }
-
-       list_for_each_entry(s, &crosschip_switches, list) {
-               struct sja1105_private *other_priv = s->other_ctx->ds->priv;
-
-               rc = sja1105_build_vlan_table(other_priv, false);
-               if (rc)
-                       goto out;
-       }
-
-out:
-       list_for_each_entry_safe(s, pos, &crosschip_switches, list) {
-               list_del(&s->list);
-               kfree(s);
-       }
-
-       return rc;
-}
-
-static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify)
-{
-       u16 subvlan_map[SJA1105_MAX_NUM_PORTS][DSA_8021Q_N_SUBVLAN];
-       struct sja1105_retagging_entry *new_retagging;
-       struct sja1105_vlan_lookup_entry *new_vlan;
-       struct sja1105_table *table;
-       int i, num_retagging = 0;
-       int rc;
-
-       table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
-       new_vlan = kcalloc(VLAN_N_VID,
-                          table->ops->unpacked_entry_size, GFP_KERNEL);
-       if (!new_vlan)
-               return -ENOMEM;
-
-       table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
-       new_retagging = kcalloc(SJA1105_MAX_RETAGGING_COUNT,
-                               table->ops->unpacked_entry_size, GFP_KERNEL);
-       if (!new_retagging) {
-               kfree(new_vlan);
-               return -ENOMEM;
-       }
-
-       for (i = 0; i < VLAN_N_VID; i++)
-               new_vlan[i].vlanid = VLAN_N_VID;
-
-       for (i = 0; i < SJA1105_MAX_RETAGGING_COUNT; i++)
-               new_retagging[i].vlan_ing = VLAN_N_VID;
-
-       for (i = 0; i < priv->ds->num_ports; i++)
-               sja1105_init_subvlan_map(subvlan_map[i]);
-
-       /* Bridge VLANs */
-       rc = sja1105_build_bridge_vlans(priv, new_vlan);
-       if (rc)
-               goto out;
-
-       /* VLANs necessary for dsa_8021q operation, given to us by tag_8021q.c:
-        * - RX VLANs
-        * - TX VLANs
-        * - Crosschip links
-        */
-       rc = sja1105_build_dsa_8021q_vlans(priv, new_vlan);
-       if (rc)
-               goto out;
-
-       /* Private VLANs necessary for dsa_8021q operation, which we need to
-        * determine on our own:
-        * - Sub-VLANs
-        * - Sub-VLANs of crosschip switches
-        */
-       rc = sja1105_build_subvlans(priv, subvlan_map, new_vlan, new_retagging,
-                                   &num_retagging);
-       if (rc)
-               goto out;
-
-       rc = sja1105_build_crosschip_subvlans(priv, new_vlan, new_retagging,
-                                             &num_retagging);
-       if (rc)
-               goto out;
-
-       rc = sja1105_commit_vlans(priv, new_vlan, new_retagging, num_retagging);
-       if (rc)
-               goto out;
-
-       rc = sja1105_commit_pvid(priv);
-       if (rc)
-               goto out;
-
-       for (i = 0; i < priv->ds->num_ports; i++)
-               sja1105_commit_subvlan_map(priv, i, subvlan_map[i]);
-
-       if (notify) {
-               rc = sja1105_notify_crosschip_switches(priv);
-               if (rc)
-                       goto out;
-       }
-
-out:
-       kfree(new_vlan);
-       kfree(new_retagging);
-
-       return rc;
-}
-
-/* The TPID setting belongs to the General Parameters table,
- * which can only be partially reconfigured at runtime (and not the TPID).
- * So a switch reset is required.
- */
-int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
-                          struct netlink_ext_ack *extack)
-{
-       struct sja1105_l2_lookup_params_entry *l2_lookup_params;
-       struct sja1105_general_params_entry *general_params;
-       struct sja1105_private *priv = ds->priv;
-       enum sja1105_vlan_state state;
-       struct sja1105_table *table;
-       struct sja1105_rule *rule;
-       bool want_tagging;
-       u16 tpid, tpid2;
-       int rc;
-
-       list_for_each_entry(rule, &priv->flow_block.rules, list) {
-               if (rule->type == SJA1105_RULE_VL) {
-                       NL_SET_ERR_MSG_MOD(extack,
-                                          "Cannot change VLAN filtering with active VL rules");
-                       return -EBUSY;
-               }
-       }
-
-       if (enabled) {
-               /* Enable VLAN filtering. */
-               tpid  = ETH_P_8021Q;
-               tpid2 = ETH_P_8021AD;
-       } else {
-               /* Disable VLAN filtering. */
-               tpid  = ETH_P_SJA1105;
-               tpid2 = ETH_P_SJA1105;
-       }
-
-       for (port = 0; port < ds->num_ports; port++) {
-               struct sja1105_port *sp = &priv->ports[port];
-
-               if (enabled)
-                       sp->xmit_tpid = priv->info->qinq_tpid;
-               else
-                       sp->xmit_tpid = ETH_P_SJA1105;
-       }
-
-       if (!enabled)
-               state = SJA1105_VLAN_UNAWARE;
-       else if (priv->best_effort_vlan_filtering)
-               state = SJA1105_VLAN_BEST_EFFORT;
-       else
-               state = SJA1105_VLAN_FILTERING_FULL;
-
-       if (priv->vlan_state == state)
-               return 0;
-
-       priv->vlan_state = state;
-       want_tagging = (state == SJA1105_VLAN_UNAWARE ||
-                       state == SJA1105_VLAN_BEST_EFFORT);
+       priv->vlan_aware = enabled;
 
        table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
        general_params = table->entries;
@@ -2872,8 +2312,6 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
        general_params->incl_srcpt1 = enabled;
        general_params->incl_srcpt0 = enabled;
 
-       want_tagging = priv->best_effort_vlan_filtering || !enabled;
-
        /* VLAN filtering => independent VLAN learning.
         * No VLAN filtering (or best effort) => shared VLAN learning.
         *
@@ -2894,314 +2332,205 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled,
         */
        table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS];
        l2_lookup_params = table->entries;
-       l2_lookup_params->shared_learn = want_tagging;
+       l2_lookup_params->shared_learn = !priv->vlan_aware;
 
-       sja1105_frame_memory_partitioning(priv);
+       for (port = 0; port < ds->num_ports; port++) {
+               if (dsa_is_unused_port(ds, port))
+                       continue;
 
-       rc = sja1105_build_vlan_table(priv, false);
-       if (rc)
-               return rc;
+               rc = sja1105_commit_pvid(ds, port);
+               if (rc)
+                       return rc;
+       }
 
        rc = sja1105_static_config_reload(priv, SJA1105_VLAN_FILTERING);
        if (rc)
                NL_SET_ERR_MSG_MOD(extack, "Failed to change VLAN Ethertype");
 
-       /* Switch port identification based on 802.1Q is only passable
-        * if we are not under a vlan_filtering bridge. So make sure
-        * the two configurations are mutually exclusive (of course, the
-        * user may know better, i.e. best_effort_vlan_filtering).
-        */
-       return sja1105_setup_8021q_tagging(ds, want_tagging);
+       return rc;
 }
 
-/* Returns number of VLANs added (0 or 1) on success,
- * or a negative error code.
- */
-static int sja1105_vlan_add_one(struct dsa_switch *ds, int port, u16 vid,
-                               u16 flags, struct list_head *vlan_list)
-{
-       bool untagged = flags & BRIDGE_VLAN_INFO_UNTAGGED;
-       bool pvid = flags & BRIDGE_VLAN_INFO_PVID;
-       struct sja1105_bridge_vlan *v;
-
-       list_for_each_entry(v, vlan_list, list) {
-               if (v->port == port && v->vid == vid) {
-                       /* Already added */
-                       if (v->untagged == untagged && v->pvid == pvid)
-                               /* Nothing changed */
-                               return 0;
-
-                       /* It's the same VLAN, but some of the flags changed
-                        * and the user did not bother to delete it first.
-                        * Update it and trigger sja1105_build_vlan_table.
-                        */
-                       v->untagged = untagged;
-                       v->pvid = pvid;
-                       return 1;
-               }
-       }
+static int sja1105_vlan_add(struct sja1105_private *priv, int port, u16 vid,
+                           u16 flags, bool allowed_ingress)
+{
+       struct sja1105_vlan_lookup_entry *vlan;
+       struct sja1105_table *table;
+       int match, rc;
 
-       v = kzalloc(sizeof(*v), GFP_KERNEL);
-       if (!v) {
-               dev_err(ds->dev, "Out of memory while storing VLAN\n");
-               return -ENOMEM;
+       table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
+
+       match = sja1105_is_vlan_configured(priv, vid);
+       if (match < 0) {
+               rc = sja1105_table_resize(table, table->entry_count + 1);
+               if (rc)
+                       return rc;
+               match = table->entry_count - 1;
        }
 
-       v->port = port;
-       v->vid = vid;
-       v->untagged = untagged;
-       v->pvid = pvid;
-       list_add(&v->list, vlan_list);
+       /* Assign pointer after the resize (it's new memory) */
+       vlan = table->entries;
 
-       return 1;
-}
+       vlan[match].type_entry = SJA1110_VLAN_D_TAG;
+       vlan[match].vlanid = vid;
+       vlan[match].vlan_bc |= BIT(port);
 
-/* Returns number of VLANs deleted (0 or 1) */
-static int sja1105_vlan_del_one(struct dsa_switch *ds, int port, u16 vid,
-                               struct list_head *vlan_list)
-{
-       struct sja1105_bridge_vlan *v, *n;
+       if (allowed_ingress)
+               vlan[match].vmemb_port |= BIT(port);
+       else
+               vlan[match].vmemb_port &= ~BIT(port);
 
-       list_for_each_entry_safe(v, n, vlan_list, list) {
-               if (v->port == port && v->vid == vid) {
-                       list_del(&v->list);
-                       kfree(v);
-                       return 1;
-               }
-       }
+       if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
+               vlan[match].tag_port &= ~BIT(port);
+       else
+               vlan[match].tag_port |= BIT(port);
 
-       return 0;
+       return sja1105_dynamic_config_write(priv, BLK_IDX_VLAN_LOOKUP, vid,
+                                           &vlan[match], true);
 }
 
-static int sja1105_vlan_add(struct dsa_switch *ds, int port,
-                           const struct switchdev_obj_port_vlan *vlan,
-                           struct netlink_ext_ack *extack)
+static int sja1105_vlan_del(struct sja1105_private *priv, int port, u16 vid)
 {
-       struct sja1105_private *priv = ds->priv;
-       bool vlan_table_changed = false;
-       int rc;
+       struct sja1105_vlan_lookup_entry *vlan;
+       struct sja1105_table *table;
+       bool keep = true;
+       int match, rc;
+
+       table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
+
+       match = sja1105_is_vlan_configured(priv, vid);
+       /* Can't delete a missing entry. */
+       if (match < 0)
+               return 0;
+
+       /* Assign pointer after the resize (it's new memory) */
+       vlan = table->entries;
 
-       /* If the user wants best-effort VLAN filtering (aka vlan_filtering
-        * bridge plus tagging), be sure to at least deny alterations to the
-        * configuration done by dsa_8021q.
+       vlan[match].vlanid = vid;
+       vlan[match].vlan_bc &= ~BIT(port);
+       vlan[match].vmemb_port &= ~BIT(port);
+       /* Also unset tag_port, just so we don't have a confusing bitmap
+        * (no practical purpose).
         */
-       if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL &&
-           vid_is_dsa_8021q(vlan->vid)) {
-               NL_SET_ERR_MSG_MOD(extack,
-                                  "Range 1024-3071 reserved for dsa_8021q operation");
-               return -EBUSY;
-       }
+       vlan[match].tag_port &= ~BIT(port);
+
+       /* If there's no port left as member of this VLAN,
+        * it's time for it to go.
+        */
+       if (!vlan[match].vmemb_port)
+               keep = false;
 
-       rc = sja1105_vlan_add_one(ds, port, vlan->vid, vlan->flags,
-                                 &priv->bridge_vlans);
+       rc = sja1105_dynamic_config_write(priv, BLK_IDX_VLAN_LOOKUP, vid,
+                                         &vlan[match], keep);
        if (rc < 0)
                return rc;
-       if (rc > 0)
-               vlan_table_changed = true;
 
-       if (!vlan_table_changed)
-               return 0;
+       if (!keep)
+               return sja1105_table_delete_entry(table, match);
 
-       return sja1105_build_vlan_table(priv, true);
+       return 0;
 }
 
-static int sja1105_vlan_del(struct dsa_switch *ds, int port,
-                           const struct switchdev_obj_port_vlan *vlan)
+static int sja1105_bridge_vlan_add(struct dsa_switch *ds, int port,
+                                  const struct switchdev_obj_port_vlan *vlan,
+                                  struct netlink_ext_ack *extack)
 {
        struct sja1105_private *priv = ds->priv;
-       bool vlan_table_changed = false;
+       u16 flags = vlan->flags;
        int rc;
 
-       rc = sja1105_vlan_del_one(ds, port, vlan->vid, &priv->bridge_vlans);
-       if (rc > 0)
-               vlan_table_changed = true;
-
-       if (!vlan_table_changed)
-               return 0;
-
-       return sja1105_build_vlan_table(priv, true);
-}
+       /* Be sure to deny alterations to the configuration done by tag_8021q.
+        */
+       if (vid_is_dsa_8021q(vlan->vid)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Range 1024-3071 reserved for dsa_8021q operation");
+               return -EBUSY;
+       }
 
-static int sja1105_dsa_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid,
-                                     u16 flags)
-{
-       struct sja1105_private *priv = ds->priv;
-       int rc;
+       /* Always install bridge VLANs as egress-tagged on CPU and DSA ports */
+       if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+               flags = 0;
 
-       rc = sja1105_vlan_add_one(ds, port, vid, flags, &priv->dsa_8021q_vlans);
-       if (rc <= 0)
+       rc = sja1105_vlan_add(priv, port, vlan->vid, flags, true);
+       if (rc)
                return rc;
 
-       return sja1105_build_vlan_table(priv, true);
+       if (vlan->flags & BRIDGE_VLAN_INFO_PVID)
+               priv->bridge_pvid[port] = vlan->vid;
+
+       return sja1105_commit_pvid(ds, port);
 }
 
-static int sja1105_dsa_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid)
+static int sja1105_bridge_vlan_del(struct dsa_switch *ds, int port,
+                                  const struct switchdev_obj_port_vlan *vlan)
 {
        struct sja1105_private *priv = ds->priv;
        int rc;
 
-       rc = sja1105_vlan_del_one(ds, port, vid, &priv->dsa_8021q_vlans);
-       if (!rc)
-               return 0;
+       rc = sja1105_vlan_del(priv, port, vlan->vid);
+       if (rc)
+               return rc;
 
-       return sja1105_build_vlan_table(priv, true);
+       /* In case the pvid was deleted, make sure that untagged packets will
+        * be dropped.
+        */
+       return sja1105_commit_pvid(ds, port);
 }
 
-static const struct dsa_8021q_ops sja1105_dsa_8021q_ops = {
-       .vlan_add       = sja1105_dsa_8021q_vlan_add,
-       .vlan_del       = sja1105_dsa_8021q_vlan_del,
-};
-
-/* The programming model for the SJA1105 switch is "all-at-once" via static
- * configuration tables. Some of these can be dynamically modified at runtime,
- * but not the xMII mode parameters table.
- * Furthermode, some PHYs may not have crystals for generating their clocks
- * (e.g. RMII). Instead, their 50MHz clock is supplied via the SJA1105 port's
- * ref_clk pin. So port clocking needs to be initialized early, before
- * connecting to PHYs is attempted, otherwise they won't respond through MDIO.
- * Setting correct PHY link speed does not matter now.
- * But dsa_slave_phy_setup is called later than sja1105_setup, so the PHY
- * bindings are not yet parsed by DSA core. We need to parse early so that we
- * can populate the xMII mode parameters table.
- */
-static int sja1105_setup(struct dsa_switch *ds)
+static int sja1105_dsa_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid,
+                                     u16 flags)
 {
        struct sja1105_private *priv = ds->priv;
+       bool allowed_ingress = true;
        int rc;
 
-       rc = sja1105_parse_dt(priv);
-       if (rc < 0) {
-               dev_err(ds->dev, "Failed to parse DT: %d\n", rc);
-               return rc;
-       }
-
-       /* Error out early if internal delays are required through DT
-        * and we can't apply them.
-        */
-       rc = sja1105_parse_rgmii_delays(priv);
-       if (rc < 0) {
-               dev_err(ds->dev, "RGMII delay not supported\n");
-               return rc;
-       }
-
-       rc = sja1105_ptp_clock_register(ds);
-       if (rc < 0) {
-               dev_err(ds->dev, "Failed to register PTP clock: %d\n", rc);
-               return rc;
-       }
-
-       rc = sja1105_mdiobus_register(ds);
-       if (rc < 0) {
-               dev_err(ds->dev, "Failed to register MDIO bus: %pe\n",
-                       ERR_PTR(rc));
-               goto out_ptp_clock_unregister;
-       }
-
-       if (priv->info->disable_microcontroller) {
-               rc = priv->info->disable_microcontroller(priv);
-               if (rc < 0) {
-                       dev_err(ds->dev,
-                               "Failed to disable microcontroller: %pe\n",
-                               ERR_PTR(rc));
-                       goto out_mdiobus_unregister;
-               }
-       }
-
-       /* Create and send configuration down to device */
-       rc = sja1105_static_config_load(priv);
-       if (rc < 0) {
-               dev_err(ds->dev, "Failed to load static config: %d\n", rc);
-               goto out_mdiobus_unregister;
-       }
-
-       /* Configure the CGU (PHY link modes and speeds) */
-       if (priv->info->clocking_setup) {
-               rc = priv->info->clocking_setup(priv);
-               if (rc < 0) {
-                       dev_err(ds->dev,
-                               "Failed to configure MII clocking: %pe\n",
-                               ERR_PTR(rc));
-                       goto out_static_config_free;
-               }
-       }
-
-       /* On SJA1105, VLAN filtering per se is always enabled in hardware.
-        * The only thing we can do to disable it is lie about what the 802.1Q
-        * EtherType is.
-        * So it will still try to apply VLAN filtering, but all ingress
-        * traffic (except frames received with EtherType of ETH_P_SJA1105)
-        * will be internally tagged with a distorted VLAN header where the
-        * TPID is ETH_P_SJA1105, and the VLAN ID is the port pvid.
+       /* Prevent attackers from trying to inject a DSA tag from
+        * the outside world.
         */
-       ds->vlan_filtering_is_global = true;
-
-       /* Advertise the 8 egress queues */
-       ds->num_tx_queues = SJA1105_NUM_TC;
-
-       ds->mtu_enforcement_ingress = true;
-
-       priv->best_effort_vlan_filtering = true;
-
-       rc = sja1105_devlink_setup(ds);
-       if (rc < 0)
-               goto out_static_config_free;
+       if (dsa_is_user_port(ds, port))
+               allowed_ingress = false;
 
-       /* The DSA/switchdev model brings up switch ports in standalone mode by
-        * default, and that means vlan_filtering is 0 since they're not under
-        * a bridge, so it's safe to set up switch tagging at this time.
-        */
-       rtnl_lock();
-       rc = sja1105_setup_8021q_tagging(ds, true);
-       rtnl_unlock();
+       rc = sja1105_vlan_add(priv, port, vid, flags, allowed_ingress);
        if (rc)
-               goto out_devlink_teardown;
-
-       return 0;
-
-out_devlink_teardown:
-       sja1105_devlink_teardown(ds);
-out_mdiobus_unregister:
-       sja1105_mdiobus_unregister(ds);
-out_ptp_clock_unregister:
-       sja1105_ptp_clock_unregister(ds);
-out_static_config_free:
-       sja1105_static_config_free(&priv->static_config);
+               return rc;
 
-       return rc;
+       if (flags & BRIDGE_VLAN_INFO_PVID)
+               priv->tag_8021q_pvid[port] = vid;
+
+       return sja1105_commit_pvid(ds, port);
 }
 
-static void sja1105_teardown(struct dsa_switch *ds)
+static int sja1105_dsa_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid)
 {
        struct sja1105_private *priv = ds->priv;
-       struct sja1105_bridge_vlan *v, *n;
-       int port;
 
-       for (port = 0; port < ds->num_ports; port++) {
-               struct sja1105_port *sp = &priv->ports[port];
+       return sja1105_vlan_del(priv, port, vid);
+}
 
-               if (!dsa_is_user_port(ds, port))
-                       continue;
+static int sja1105_prechangeupper(struct dsa_switch *ds, int port,
+                                 struct netdev_notifier_changeupper_info *info)
+{
+       struct netlink_ext_ack *extack = info->info.extack;
+       struct net_device *upper = info->upper_dev;
+       struct dsa_switch_tree *dst = ds->dst;
+       struct dsa_port *dp;
 
-               if (sp->xmit_worker)
-                       kthread_destroy_worker(sp->xmit_worker);
+       if (is_vlan_dev(upper)) {
+               NL_SET_ERR_MSG_MOD(extack, "8021q uppers are not supported");
+               return -EBUSY;
        }
 
-       sja1105_devlink_teardown(ds);
-       sja1105_mdiobus_unregister(ds);
-       sja1105_flower_teardown(ds);
-       sja1105_tas_teardown(ds);
-       sja1105_ptp_clock_unregister(ds);
-       sja1105_static_config_free(&priv->static_config);
-
-       list_for_each_entry_safe(v, n, &priv->dsa_8021q_vlans, list) {
-               list_del(&v->list);
-               kfree(v);
+       if (netif_is_bridge_master(upper)) {
+               list_for_each_entry(dp, &dst->ports, list) {
+                       if (dp->bridge_dev && dp->bridge_dev != upper &&
+                           br_vlan_enabled(dp->bridge_dev)) {
+                               NL_SET_ERR_MSG_MOD(extack,
+                                                  "Only one VLAN-aware bridge is supported");
+                               return -EBUSY;
+                       }
+               }
        }
 
-       list_for_each_entry_safe(v, n, &priv->bridge_vlans, list) {
-               list_del(&v->list);
-               kfree(v);
-       }
+       return 0;
 }
 
 static void sja1105_port_disable(struct dsa_switch *ds, int port)
@@ -3337,7 +2666,7 @@ static int sja1105_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
 
        new_mtu += VLAN_ETH_HLEN + ETH_FCS_LEN;
 
-       if (dsa_is_cpu_port(ds, port))
+       if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
                new_mtu += VLAN_HLEN;
 
        policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries;
@@ -3484,23 +2813,13 @@ static int sja1105_port_set_learning(struct sja1105_private *priv, int port,
                                     bool enabled)
 {
        struct sja1105_mac_config_entry *mac;
-       int rc;
 
        mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
 
        mac[port].dyn_learn = enabled;
 
-       rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
-                                         &mac[port], true);
-       if (rc)
-               return rc;
-
-       if (enabled)
-               priv->learn_ena |= BIT(port);
-       else
-               priv->learn_ena &= ~BIT(port);
-
-       return 0;
+       return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
+                                           &mac[port], true);
 }
 
 static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to,
@@ -3616,7 +2935,189 @@ static int sja1105_port_bridge_flags(struct dsa_switch *ds, int port,
        return 0;
 }
 
-static const struct dsa_switch_ops sja1105_switch_ops = {
+static void sja1105_teardown_ports(struct sja1105_private *priv)
+{
+       struct dsa_switch *ds = priv->ds;
+       int port;
+
+       for (port = 0; port < ds->num_ports; port++) {
+               struct sja1105_port *sp = &priv->ports[port];
+
+               if (sp->xmit_worker)
+                       kthread_destroy_worker(sp->xmit_worker);
+       }
+}
+
+static int sja1105_setup_ports(struct sja1105_private *priv)
+{
+       struct sja1105_tagger_data *tagger_data = &priv->tagger_data;
+       struct dsa_switch *ds = priv->ds;
+       int port, rc;
+
+       /* Connections between dsa_port and sja1105_port */
+       for (port = 0; port < ds->num_ports; port++) {
+               struct sja1105_port *sp = &priv->ports[port];
+               struct dsa_port *dp = dsa_to_port(ds, port);
+               struct kthread_worker *worker;
+               struct net_device *slave;
+
+               if (!dsa_port_is_user(dp))
+                       continue;
+
+               dp->priv = sp;
+               sp->dp = dp;
+               sp->data = tagger_data;
+               slave = dp->slave;
+               kthread_init_work(&sp->xmit_work, sja1105_port_deferred_xmit);
+               worker = kthread_create_worker(0, "%s_xmit", slave->name);
+               if (IS_ERR(worker)) {
+                       rc = PTR_ERR(worker);
+                       dev_err(ds->dev,
+                               "failed to create deferred xmit thread: %d\n",
+                               rc);
+                       goto out_destroy_workers;
+               }
+               sp->xmit_worker = worker;
+               skb_queue_head_init(&sp->xmit_queue);
+       }
+
+       return 0;
+
+out_destroy_workers:
+       sja1105_teardown_ports(priv);
+       return rc;
+}
+
+/* The programming model for the SJA1105 switch is "all-at-once" via static
+ * configuration tables. Some of these can be dynamically modified at runtime,
+ * but not the xMII mode parameters table.
+ * Furthermode, some PHYs may not have crystals for generating their clocks
+ * (e.g. RMII). Instead, their 50MHz clock is supplied via the SJA1105 port's
+ * ref_clk pin. So port clocking needs to be initialized early, before
+ * connecting to PHYs is attempted, otherwise they won't respond through MDIO.
+ * Setting correct PHY link speed does not matter now.
+ * But dsa_slave_phy_setup is called later than sja1105_setup, so the PHY
+ * bindings are not yet parsed by DSA core. We need to parse early so that we
+ * can populate the xMII mode parameters table.
+ */
+static int sja1105_setup(struct dsa_switch *ds)
+{
+       struct sja1105_private *priv = ds->priv;
+       int rc;
+
+       if (priv->info->disable_microcontroller) {
+               rc = priv->info->disable_microcontroller(priv);
+               if (rc < 0) {
+                       dev_err(ds->dev,
+                               "Failed to disable microcontroller: %pe\n",
+                               ERR_PTR(rc));
+                       return rc;
+               }
+       }
+
+       /* Create and send configuration down to device */
+       rc = sja1105_static_config_load(priv);
+       if (rc < 0) {
+               dev_err(ds->dev, "Failed to load static config: %d\n", rc);
+               return rc;
+       }
+
+       /* Configure the CGU (PHY link modes and speeds) */
+       if (priv->info->clocking_setup) {
+               rc = priv->info->clocking_setup(priv);
+               if (rc < 0) {
+                       dev_err(ds->dev,
+                               "Failed to configure MII clocking: %pe\n",
+                               ERR_PTR(rc));
+                       goto out_static_config_free;
+               }
+       }
+
+       rc = sja1105_setup_ports(priv);
+       if (rc)
+               goto out_static_config_free;
+
+       sja1105_tas_setup(ds);
+       sja1105_flower_setup(ds);
+
+       rc = sja1105_ptp_clock_register(ds);
+       if (rc < 0) {
+               dev_err(ds->dev, "Failed to register PTP clock: %d\n", rc);
+               goto out_flower_teardown;
+       }
+
+       rc = sja1105_mdiobus_register(ds);
+       if (rc < 0) {
+               dev_err(ds->dev, "Failed to register MDIO bus: %pe\n",
+                       ERR_PTR(rc));
+               goto out_ptp_clock_unregister;
+       }
+
+       rc = sja1105_devlink_setup(ds);
+       if (rc < 0)
+               goto out_mdiobus_unregister;
+
+       rtnl_lock();
+       rc = dsa_tag_8021q_register(ds, htons(ETH_P_8021Q));
+       rtnl_unlock();
+       if (rc)
+               goto out_devlink_teardown;
+
+       /* On SJA1105, VLAN filtering per se is always enabled in hardware.
+        * The only thing we can do to disable it is lie about what the 802.1Q
+        * EtherType is.
+        * So it will still try to apply VLAN filtering, but all ingress
+        * traffic (except frames received with EtherType of ETH_P_SJA1105)
+        * will be internally tagged with a distorted VLAN header where the
+        * TPID is ETH_P_SJA1105, and the VLAN ID is the port pvid.
+        */
+       ds->vlan_filtering_is_global = true;
+       ds->untag_bridge_pvid = true;
+       /* tag_8021q has 3 bits for the VBID, and the value 0 is reserved */
+       ds->num_fwd_offloading_bridges = 7;
+
+       /* Advertise the 8 egress queues */
+       ds->num_tx_queues = SJA1105_NUM_TC;
+
+       ds->mtu_enforcement_ingress = true;
+       ds->assisted_learning_on_cpu_port = true;
+
+       return 0;
+
+out_devlink_teardown:
+       sja1105_devlink_teardown(ds);
+out_mdiobus_unregister:
+       sja1105_mdiobus_unregister(ds);
+out_ptp_clock_unregister:
+       sja1105_ptp_clock_unregister(ds);
+out_flower_teardown:
+       sja1105_flower_teardown(ds);
+       sja1105_tas_teardown(ds);
+       sja1105_teardown_ports(priv);
+out_static_config_free:
+       sja1105_static_config_free(&priv->static_config);
+
+       return rc;
+}
+
+static void sja1105_teardown(struct dsa_switch *ds)
+{
+       struct sja1105_private *priv = ds->priv;
+
+       rtnl_lock();
+       dsa_tag_8021q_unregister(ds);
+       rtnl_unlock();
+
+       sja1105_devlink_teardown(ds);
+       sja1105_mdiobus_unregister(ds);
+       sja1105_ptp_clock_unregister(ds);
+       sja1105_flower_teardown(ds);
+       sja1105_tas_teardown(ds);
+       sja1105_teardown_ports(priv);
+       sja1105_static_config_free(&priv->static_config);
+}
+
+const struct dsa_switch_ops sja1105_switch_ops = {
        .get_tag_protocol       = sja1105_get_tag_protocol,
        .setup                  = sja1105_setup,
        .teardown               = sja1105_teardown,
@@ -3635,14 +3136,15 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
        .port_fdb_dump          = sja1105_fdb_dump,
        .port_fdb_add           = sja1105_fdb_add,
        .port_fdb_del           = sja1105_fdb_del,
+       .port_fast_age          = sja1105_fast_age,
        .port_bridge_join       = sja1105_bridge_join,
        .port_bridge_leave      = sja1105_bridge_leave,
        .port_pre_bridge_flags  = sja1105_port_pre_bridge_flags,
        .port_bridge_flags      = sja1105_port_bridge_flags,
        .port_stp_state_set     = sja1105_bridge_stp_state_set,
        .port_vlan_filtering    = sja1105_vlan_filtering,
-       .port_vlan_add          = sja1105_vlan_add,
-       .port_vlan_del          = sja1105_vlan_del,
+       .port_vlan_add          = sja1105_bridge_vlan_add,
+       .port_vlan_del          = sja1105_bridge_vlan_del,
        .port_mdb_add           = sja1105_mdb_add,
        .port_mdb_del           = sja1105_mdb_del,
        .port_hwtstamp_get      = sja1105_hwtstamp_get,
@@ -3657,12 +3159,14 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
        .cls_flower_add         = sja1105_cls_flower_add,
        .cls_flower_del         = sja1105_cls_flower_del,
        .cls_flower_stats       = sja1105_cls_flower_stats,
-       .crosschip_bridge_join  = sja1105_crosschip_bridge_join,
-       .crosschip_bridge_leave = sja1105_crosschip_bridge_leave,
-       .devlink_param_get      = sja1105_devlink_param_get,
-       .devlink_param_set      = sja1105_devlink_param_set,
        .devlink_info_get       = sja1105_devlink_info_get,
+       .tag_8021q_vlan_add     = sja1105_dsa_8021q_vlan_add,
+       .tag_8021q_vlan_del     = sja1105_dsa_8021q_vlan_del,
+       .port_prechangeupper    = sja1105_prechangeupper,
+       .port_bridge_tx_fwd_offload = dsa_tag_8021q_bridge_tx_fwd_offload,
+       .port_bridge_tx_fwd_unoffload = dsa_tag_8021q_bridge_tx_fwd_unoffload,
 };
+EXPORT_SYMBOL_GPL(sja1105_switch_ops);
 
 static const struct of_device_id sja1105_dt_ids[];
 
@@ -3715,12 +3219,11 @@ static int sja1105_check_device_id(struct sja1105_private *priv)
 
 static int sja1105_probe(struct spi_device *spi)
 {
-       struct sja1105_tagger_data *tagger_data;
        struct device *dev = &spi->dev;
        struct sja1105_private *priv;
        size_t max_xfer, max_msg;
        struct dsa_switch *ds;
-       int rc, port;
+       int rc;
 
        if (!dev->of_node) {
                dev_err(dev, "No DTS bindings for SJA1105 driver\n");
@@ -3800,95 +3303,42 @@ static int sja1105_probe(struct spi_device *spi)
        ds->priv = priv;
        priv->ds = ds;
 
-       tagger_data = &priv->tagger_data;
-
        mutex_init(&priv->ptp_data.lock);
        mutex_init(&priv->mgmt_lock);
 
-       priv->dsa_8021q_ctx = devm_kzalloc(dev, sizeof(*priv->dsa_8021q_ctx),
-                                          GFP_KERNEL);
-       if (!priv->dsa_8021q_ctx)
-               return -ENOMEM;
-
-       priv->dsa_8021q_ctx->ops = &sja1105_dsa_8021q_ops;
-       priv->dsa_8021q_ctx->proto = htons(ETH_P_8021Q);
-       priv->dsa_8021q_ctx->ds = ds;
-
-       INIT_LIST_HEAD(&priv->dsa_8021q_ctx->crosschip_links);
-       INIT_LIST_HEAD(&priv->bridge_vlans);
-       INIT_LIST_HEAD(&priv->dsa_8021q_vlans);
-
-       sja1105_tas_setup(ds);
-       sja1105_flower_setup(ds);
+       rc = sja1105_parse_dt(priv);
+       if (rc < 0) {
+               dev_err(ds->dev, "Failed to parse DT: %d\n", rc);
+               return rc;
+       }
 
-       rc = dsa_register_switch(priv->ds);
-       if (rc)
+       /* Error out early if internal delays are required through DT
+        * and we can't apply them.
+        */
+       rc = sja1105_parse_rgmii_delays(priv);
+       if (rc < 0) {
+               dev_err(ds->dev, "RGMII delay not supported\n");
                return rc;
+       }
 
        if (IS_ENABLED(CONFIG_NET_SCH_CBS)) {
                priv->cbs = devm_kcalloc(dev, priv->info->num_cbs_shapers,
                                         sizeof(struct sja1105_cbs_entry),
                                         GFP_KERNEL);
-               if (!priv->cbs) {
-                       rc = -ENOMEM;
-                       goto out_unregister_switch;
-               }
-       }
-
-       /* Connections between dsa_port and sja1105_port */
-       for (port = 0; port < ds->num_ports; port++) {
-               struct sja1105_port *sp = &priv->ports[port];
-               struct dsa_port *dp = dsa_to_port(ds, port);
-               struct net_device *slave;
-               int subvlan;
-
-               if (!dsa_is_user_port(ds, port))
-                       continue;
-
-               dp->priv = sp;
-               sp->dp = dp;
-               sp->data = tagger_data;
-               slave = dp->slave;
-               kthread_init_work(&sp->xmit_work, sja1105_port_deferred_xmit);
-               sp->xmit_worker = kthread_create_worker(0, "%s_xmit",
-                                                       slave->name);
-               if (IS_ERR(sp->xmit_worker)) {
-                       rc = PTR_ERR(sp->xmit_worker);
-                       dev_err(ds->dev,
-                               "failed to create deferred xmit thread: %d\n",
-                               rc);
-                       goto out_destroy_workers;
-               }
-               skb_queue_head_init(&sp->xmit_queue);
-               sp->xmit_tpid = ETH_P_SJA1105;
-
-               for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
-                       sp->subvlan_map[subvlan] = VLAN_N_VID;
-       }
-
-       return 0;
-
-out_destroy_workers:
-       while (port-- > 0) {
-               struct sja1105_port *sp = &priv->ports[port];
-
-               if (!dsa_is_user_port(ds, port))
-                       continue;
-
-               kthread_destroy_worker(sp->xmit_worker);
+               if (!priv->cbs)
+                       return -ENOMEM;
        }
 
-out_unregister_switch:
-       dsa_unregister_switch(ds);
-
-       return rc;
+       return dsa_register_switch(priv->ds);
 }
 
 static int sja1105_remove(struct spi_device *spi)
 {
        struct sja1105_private *priv = spi_get_drvdata(spi);
+       struct dsa_switch *ds = priv->ds;
+
+       dsa_unregister_switch(ds);
 
-       dsa_unregister_switch(priv->ds);
        return 0;
 }
 
index 08cc5db..d60a530 100644 (file)
@@ -575,7 +575,6 @@ const struct sja1105_info sja1105e_info = {
        .part_no                = SJA1105ET_PART_NO,
        .static_ops             = sja1105e_table_ops,
        .dyn_ops                = sja1105et_dyn_ops,
-       .qinq_tpid              = ETH_P_8021Q,
        .tag_proto              = DSA_TAG_PROTO_SJA1105,
        .can_limit_mcast_flood  = false,
        .ptp_ts_bits            = 24,
@@ -608,7 +607,6 @@ const struct sja1105_info sja1105t_info = {
        .part_no                = SJA1105ET_PART_NO,
        .static_ops             = sja1105t_table_ops,
        .dyn_ops                = sja1105et_dyn_ops,
-       .qinq_tpid              = ETH_P_8021Q,
        .tag_proto              = DSA_TAG_PROTO_SJA1105,
        .can_limit_mcast_flood  = false,
        .ptp_ts_bits            = 24,
@@ -641,7 +639,6 @@ const struct sja1105_info sja1105p_info = {
        .part_no                = SJA1105P_PART_NO,
        .static_ops             = sja1105p_table_ops,
        .dyn_ops                = sja1105pqrs_dyn_ops,
-       .qinq_tpid              = ETH_P_8021AD,
        .tag_proto              = DSA_TAG_PROTO_SJA1105,
        .can_limit_mcast_flood  = true,
        .ptp_ts_bits            = 32,
@@ -675,7 +672,6 @@ const struct sja1105_info sja1105q_info = {
        .part_no                = SJA1105Q_PART_NO,
        .static_ops             = sja1105q_table_ops,
        .dyn_ops                = sja1105pqrs_dyn_ops,
-       .qinq_tpid              = ETH_P_8021AD,
        .tag_proto              = DSA_TAG_PROTO_SJA1105,
        .can_limit_mcast_flood  = true,
        .ptp_ts_bits            = 32,
@@ -709,7 +705,6 @@ const struct sja1105_info sja1105r_info = {
        .part_no                = SJA1105R_PART_NO,
        .static_ops             = sja1105r_table_ops,
        .dyn_ops                = sja1105pqrs_dyn_ops,
-       .qinq_tpid              = ETH_P_8021AD,
        .tag_proto              = DSA_TAG_PROTO_SJA1105,
        .can_limit_mcast_flood  = true,
        .ptp_ts_bits            = 32,
@@ -747,7 +742,6 @@ const struct sja1105_info sja1105s_info = {
        .static_ops             = sja1105s_table_ops,
        .dyn_ops                = sja1105pqrs_dyn_ops,
        .regs                   = &sja1105pqrs_regs,
-       .qinq_tpid              = ETH_P_8021AD,
        .tag_proto              = DSA_TAG_PROTO_SJA1105,
        .can_limit_mcast_flood  = true,
        .ptp_ts_bits            = 32,
@@ -784,7 +778,6 @@ const struct sja1105_info sja1110a_info = {
        .static_ops             = sja1110_table_ops,
        .dyn_ops                = sja1110_dyn_ops,
        .regs                   = &sja1110_regs,
-       .qinq_tpid              = ETH_P_8021AD,
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
@@ -835,7 +828,6 @@ const struct sja1105_info sja1110b_info = {
        .static_ops             = sja1110_table_ops,
        .dyn_ops                = sja1110_dyn_ops,
        .regs                   = &sja1110_regs,
-       .qinq_tpid              = ETH_P_8021AD,
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
@@ -886,7 +878,6 @@ const struct sja1105_info sja1110c_info = {
        .static_ops             = sja1110_table_ops,
        .dyn_ops                = sja1110_dyn_ops,
        .regs                   = &sja1110_regs,
-       .qinq_tpid              = ETH_P_8021AD,
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
@@ -937,7 +928,6 @@ const struct sja1105_info sja1110d_info = {
        .static_ops             = sja1110_table_ops,
        .dyn_ops                = sja1110_dyn_ops,
        .regs                   = &sja1110_regs,
-       .qinq_tpid              = ETH_P_8021AD,
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
index f6e13e6..ec7b65d 100644 (file)
@@ -496,14 +496,11 @@ int sja1105_vl_redirect(struct sja1105_private *priv, int port,
        struct sja1105_rule *rule = sja1105_rule_find(priv, cookie);
        int rc;
 
-       if (priv->vlan_state == SJA1105_VLAN_UNAWARE &&
-           key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
+       if (!priv->vlan_aware && key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Can only redirect based on DMAC");
                return -EOPNOTSUPP;
-       } else if ((priv->vlan_state == SJA1105_VLAN_BEST_EFFORT ||
-                   priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) &&
-                  key->type != SJA1105_KEY_VLAN_AWARE_VL) {
+       } else if (priv->vlan_aware && key->type != SJA1105_KEY_VLAN_AWARE_VL) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Can only redirect based on {DMAC, VID, PCP}");
                return -EOPNOTSUPP;
@@ -595,14 +592,11 @@ int sja1105_vl_gate(struct sja1105_private *priv, int port,
                return -ERANGE;
        }
 
-       if (priv->vlan_state == SJA1105_VLAN_UNAWARE &&
-           key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
+       if (!priv->vlan_aware && key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Can only gate based on DMAC");
                return -EOPNOTSUPP;
-       } else if ((priv->vlan_state == SJA1105_VLAN_BEST_EFFORT ||
-                   priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) &&
-                  key->type != SJA1105_KEY_VLAN_AWARE_VL) {
+       } else if (priv->vlan_aware && key->type != SJA1105_KEY_VLAN_AWARE_VL) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Can only gate based on {DMAC, VID, PCP}");
                return -EOPNOTSUPP;
index 74263f8..8ef3490 100644 (file)
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/compat.h>
 #include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 
 static int eql_open(struct net_device *dev);
 static int eql_close(struct net_device *dev);
-static int eql_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+static int eql_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                             void __user *data, int cmd);
 static netdev_tx_t eql_slave_xmit(struct sk_buff *skb, struct net_device *dev);
 
 #define eql_is_slave(dev)      ((dev->flags & IFF_SLAVE) == IFF_SLAVE)
@@ -170,7 +172,7 @@ static const char version[] __initconst =
 static const struct net_device_ops eql_netdev_ops = {
        .ndo_open       = eql_open,
        .ndo_stop       = eql_close,
-       .ndo_do_ioctl   = eql_ioctl,
+       .ndo_siocdevprivate = eql_siocdevprivate,
        .ndo_start_xmit = eql_slave_xmit,
 };
 
@@ -268,25 +270,29 @@ static int eql_s_slave_cfg(struct net_device *dev, slave_config_t __user *sc);
 static int eql_g_master_cfg(struct net_device *dev, master_config_t __user *mc);
 static int eql_s_master_cfg(struct net_device *dev, master_config_t __user *mc);
 
-static int eql_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int eql_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                             void __user *data, int cmd)
 {
        if (cmd != EQL_GETMASTRCFG && cmd != EQL_GETSLAVECFG &&
            !capable(CAP_NET_ADMIN))
                return -EPERM;
 
+       if (in_compat_syscall()) /* to be implemented */
+               return -EOPNOTSUPP;
+
        switch (cmd) {
                case EQL_ENSLAVE:
-                       return eql_enslave(dev, ifr->ifr_data);
+                       return eql_enslave(dev, data);
                case EQL_EMANCIPATE:
-                       return eql_emancipate(dev, ifr->ifr_data);
+                       return eql_emancipate(dev, data);
                case EQL_GETSLAVECFG:
-                       return eql_g_slave_cfg(dev, ifr->ifr_data);
+                       return eql_g_slave_cfg(dev, data);
                case EQL_SETSLAVECFG:
-                       return eql_s_slave_cfg(dev, ifr->ifr_data);
+                       return eql_s_slave_cfg(dev, data);
                case EQL_GETMASTRCFG:
-                       return eql_g_master_cfg(dev, ifr->ifr_data);
+                       return eql_g_master_cfg(dev, data);
                case EQL_SETMASTRCFG:
-                       return eql_s_master_cfg(dev, ifr->ifr_data);
+                       return eql_s_master_cfg(dev, data);
                default:
                        return -EOPNOTSUPP;
        }
index 96cc5fc..87c906e 100644 (file)
@@ -302,7 +302,6 @@ static int el3_isa_match(struct device *pdev, unsigned int ndev)
                return -ENOMEM;
 
        SET_NETDEV_DEV(dev, pdev);
-       netdev_boot_setup_check(dev);
 
        if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-isa")) {
                free_netdev(dev);
@@ -421,7 +420,6 @@ static int el3_pnp_probe(struct pnp_dev *pdev, const struct pnp_device_id *id)
                return -ENOMEM;
        }
        SET_NETDEV_DEV(dev, &pdev->dev);
-       netdev_boot_setup_check(dev);
 
        el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_PNP);
        pnp_set_drvdata(pdev, dev);
@@ -514,7 +512,9 @@ static int el3_common_init(struct net_device *dev)
 {
        struct el3_private *lp = netdev_priv(dev);
        int err;
-       const char *if_names[] = {"10baseT", "AUI", "undefined", "BNC"};
+       static const char * const if_names[] = {
+               "10baseT", "AUI", "undefined", "BNC"
+       };
 
        spin_lock_init(&lp->lock);
 
@@ -588,7 +588,6 @@ static int el3_eisa_probe(struct device *device)
        }
 
        SET_NETDEV_DEV(dev, device);
-       netdev_boot_setup_check(dev);
 
        el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_EISA);
        eisa_set_drvdata (edev, dev);
index 47b4215..8d90fed 100644 (file)
@@ -407,7 +407,7 @@ MODULE_PARM_DESC(max_interrupt_work, "3c515 maximum events handled per interrupt
 /* we will need locking (and refcounting) if we ever use it for more */
 static LIST_HEAD(root_corkscrew_dev);
 
-int init_module(void)
+static int corkscrew_init_module(void)
 {
        int found = 0;
        if (debug >= 0)
@@ -416,6 +416,7 @@ int init_module(void)
                found++;
        return found ? 0 : -ENODEV;
 }
+module_init(corkscrew_init_module);
 
 #else
 struct net_device *tc515_probe(int unit)
index f66e7fb..dd4d3c4 100644 (file)
@@ -252,7 +252,7 @@ static const struct net_device_ops el3_netdev_ops = {
        .ndo_start_xmit         = el3_start_xmit,
        .ndo_tx_timeout         = el3_tx_timeout,
        .ndo_get_stats          = el3_get_stats,
-       .ndo_do_ioctl           = el3_ioctl,
+       .ndo_eth_ioctl          = el3_ioctl,
        .ndo_set_rx_mode        = set_multicast_list,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
index 7d7d3ff..17c1633 100644 (file)
@@ -1052,7 +1052,7 @@ static const struct net_device_ops boomrang_netdev_ops = {
        .ndo_tx_timeout         = vortex_tx_timeout,
        .ndo_get_stats          = vortex_get_stats,
 #ifdef CONFIG_PCI
-       .ndo_do_ioctl           = vortex_ioctl,
+       .ndo_eth_ioctl          = vortex_ioctl,
 #endif
        .ndo_set_rx_mode        = set_rx_mode,
        .ndo_set_mac_address    = eth_mac_addr,
@@ -1069,7 +1069,7 @@ static const struct net_device_ops vortex_netdev_ops = {
        .ndo_tx_timeout         = vortex_tx_timeout,
        .ndo_get_stats          = vortex_get_stats,
 #ifdef CONFIG_PCI
-       .ndo_do_ioctl           = vortex_ioctl,
+       .ndo_eth_ioctl          = vortex_ioctl,
 #endif
        .ndo_set_rx_mode        = set_rx_mode,
        .ndo_set_mac_address    = eth_mac_addr,
index a52a374..706bd59 100644 (file)
@@ -34,6 +34,7 @@ config EL3
 config 3C515
        tristate "3c515 ISA \"Fast EtherLink\""
        depends on ISA && ISA_DMA_API && !PPC32
+       select NETDEV_LEGACY_INIT
        help
          If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet
          network card, say Y here.
index 9f4b302..a4130e6 100644 (file)
@@ -102,6 +102,7 @@ config MCF8390
 config NE2000
        tristate "NE2000/NE1000 support"
        depends on (ISA || (Q40 && m) || MACH_TX49XX || ATARI_ETHERNEC)
+       select NETDEV_LEGACY_INIT if ISA
        select CRC32
        help
          If you have a network (Ethernet) card of this type, say Y here.
@@ -169,6 +170,7 @@ config STNIC
 config ULTRA
        tristate "SMC Ultra support"
        depends on ISA
+       select NETDEV_LEGACY_INIT
        select CRC32
        help
          If you have a network (Ethernet) card of this type, say Y here.
@@ -186,6 +188,7 @@ config ULTRA
 config WD80x3
        tristate "WD80*3 support"
        depends on ISA
+       select NETDEV_LEGACY_INIT
        select CRC32
        help
          If you have a network (Ethernet) card of this type, say Y here.
index fe6c834..da1ae37 100644 (file)
@@ -75,7 +75,6 @@
 #define NESM_STOP_PG   0x80    /* Last page +1 of RX ring */
 
 
-struct net_device * __init apne_probe(int unit);
 static int apne_probe1(struct net_device *dev, int ioaddr);
 
 static void apne_reset_8390(struct net_device *dev);
@@ -120,7 +119,7 @@ static u32 apne_msg_enable;
 module_param_named(msg_enable, apne_msg_enable, uint, 0444);
 MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
 
-struct net_device * __init apne_probe(int unit)
+static struct net_device * __init apne_probe(void)
 {
        struct net_device *dev;
        struct ei_device *ei_local;
@@ -150,10 +149,6 @@ struct net_device * __init apne_probe(int unit)
        dev = alloc_ei_netdev();
        if (!dev)
                return ERR_PTR(-ENOMEM);
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
        ei_local = netdev_priv(dev);
        ei_local->msg_enable = apne_msg_enable;
 
@@ -554,12 +549,11 @@ static irqreturn_t apne_interrupt(int irq, void *dev_id)
     return IRQ_HANDLED;
 }
 
-#ifdef MODULE
 static struct net_device *apne_dev;
 
 static int __init apne_module_init(void)
 {
-       apne_dev = apne_probe(-1);
+       apne_dev = apne_probe();
        return PTR_ERR_OR_ZERO(apne_dev);
 }
 
@@ -579,7 +573,6 @@ static void __exit apne_module_exit(void)
 }
 module_init(apne_module_init);
 module_exit(apne_module_exit);
-#endif
 
 static int init_pcmcia(void)
 {
index 172947f..6c6bdd5 100644 (file)
@@ -101,6 +101,13 @@ static inline struct ax_device *to_ax_dev(struct net_device *dev)
        return (struct ax_device *)(ei_local + 1);
 }
 
+void ax_NS8390_reinit(struct net_device *dev)
+{
+       ax_NS8390_init(dev, 1);
+}
+
+EXPORT_SYMBOL_GPL(ax_NS8390_reinit);
+
 /*
  * ax_initial_check
  *
@@ -635,7 +642,7 @@ static void ax_eeprom_register_write(struct eeprom_93cx6 *eeprom)
 static const struct net_device_ops ax_netdev_ops = {
        .ndo_open               = ax_open,
        .ndo_stop               = ax_close,
-       .ndo_do_ioctl           = ax_ioctl,
+       .ndo_eth_ioctl          = ax_ioctl,
 
        .ndo_start_xmit         = ax_ei_start_xmit,
        .ndo_tx_timeout         = ax_ei_tx_timeout,
index 8c321df..3c370e6 100644 (file)
@@ -128,7 +128,7 @@ static inline struct axnet_dev *PRIV(struct net_device *dev)
 static const struct net_device_ops axnet_netdev_ops = {
        .ndo_open               = axnet_open,
        .ndo_stop               = axnet_close,
-       .ndo_do_ioctl           = axnet_ioctl,
+       .ndo_eth_ioctl          = axnet_ioctl,
        .ndo_start_xmit         = axnet_start_xmit,
        .ndo_tx_timeout         = axnet_tx_timeout,
        .ndo_get_stats          = get_stats,
index e9756d0..53660bc 100644 (file)
@@ -923,7 +923,7 @@ static void __init ne_add_devices(void)
 }
 
 #ifdef MODULE
-int __init init_module(void)
+static int __init ne_init(void)
 {
        int retval;
        ne_add_devices();
@@ -940,6 +940,7 @@ int __init init_module(void)
        ne_loop_rm_unreg(0);
        return retval;
 }
+module_init(ne_init);
 #else /* MODULE */
 static int __init ne_init(void)
 {
@@ -951,6 +952,7 @@ static int __init ne_init(void)
 }
 module_init(ne_init);
 
+#ifdef CONFIG_NETDEV_LEGACY_INIT
 struct net_device * __init ne_probe(int unit)
 {
        int this_dev;
@@ -991,6 +993,7 @@ struct net_device * __init ne_probe(int unit)
 
        return ERR_PTR(-ENODEV);
 }
+#endif
 #endif /* MODULE */
 
 static void __exit ne_exit(void)
index cac0367..96ad72a 100644 (file)
@@ -223,7 +223,7 @@ static const struct net_device_ops pcnet_netdev_ops = {
        .ndo_set_config         = set_config,
        .ndo_start_xmit         = ei_start_xmit,
        .ndo_get_stats          = ei_get_stats,
-       .ndo_do_ioctl           = ei_ioctl,
+       .ndo_eth_ioctl          = ei_ioctl,
        .ndo_set_rx_mode        = ei_set_multicast_list,
        .ndo_tx_timeout         = ei_tx_timeout,
        .ndo_set_mac_address    = eth_mac_addr,
index 1d8ed73..0890fa4 100644 (file)
@@ -522,7 +522,6 @@ static void ultra_pio_input(struct net_device *dev, int count,
        /* We know skbuffs are padded to at least word alignment. */
        insw(ioaddr + IOPD, buf, (count+1)>>1);
 }
-
 static void ultra_pio_output(struct net_device *dev, int count,
                                                        const unsigned char *buf, const int start_page)
 {
@@ -572,8 +571,7 @@ MODULE_LICENSE("GPL");
 
 /* This is set up so that only a single autoprobe takes place per call.
 ISA device autoprobes on a running machine are not recommended. */
-int __init
-init_module(void)
+static int __init ultra_init_module(void)
 {
        struct net_device *dev;
        int this_dev, found = 0;
@@ -600,6 +598,7 @@ init_module(void)
                return 0;
        return -ENXIO;
 }
+module_init(ultra_init_module);
 
 static void cleanup_card(struct net_device *dev)
 {
@@ -613,8 +612,7 @@ static void cleanup_card(struct net_device *dev)
        iounmap(ei_status.mem);
 }
 
-void __exit
-cleanup_module(void)
+static void __exit ultra_cleanup_module(void)
 {
        int this_dev;
 
@@ -627,4 +625,5 @@ cleanup_module(void)
                }
        }
 }
+module_exit(ultra_cleanup_module);
 #endif /* MODULE */
index c834123..263a942 100644 (file)
@@ -519,7 +519,7 @@ MODULE_LICENSE("GPL");
 /* This is set up so that only a single autoprobe takes place per call.
 ISA device autoprobes on a running machine are not recommended. */
 
-int __init init_module(void)
+static int __init wd_init_module(void)
 {
        struct net_device *dev;
        int this_dev, found = 0;
@@ -548,6 +548,7 @@ int __init init_module(void)
                return 0;
        return -ENXIO;
 }
+module_init(wd_init_module);
 
 static void cleanup_card(struct net_device *dev)
 {
@@ -556,8 +557,7 @@ static void cleanup_card(struct net_device *dev)
        iounmap(ei_status.mem);
 }
 
-void __exit
-cleanup_module(void)
+static void __exit wd_cleanup_module(void)
 {
        int this_dev;
 
@@ -570,4 +570,5 @@ cleanup_module(void)
                }
        }
 }
+module_exit(wd_cleanup_module);
 #endif /* MODULE */
index e2c9638..fe7a747 100644 (file)
@@ -22,8 +22,6 @@
 #define XS100_8390_DATA_WRITE32_BASE 0x0C80
 #define XS100_8390_DATA_AREA_SIZE 0x80
 
-#define __NS8390_init ax_NS8390_init
-
 /* force unsigned long back to 'void __iomem *' */
 #define ax_convert_addr(_a) ((void __force __iomem *)(_a))
 
 /* Ensure we have our RCR base value */
 #define AX88796_PLATFORM
 
-static unsigned char version[] =
-               "ax88796.c: Copyright 2005,2007 Simtec Electronics\n";
-
-#include "lib8390.c"
+#include "8390.h"
 
 /* from ne.c */
 #define NE_CMD         EI_SHIFT(0x00)
@@ -232,7 +227,7 @@ static void xs100_block_output(struct net_device *dev, int count,
                if (jiffies - dma_start > 2 * HZ / 100) {       /* 20ms */
                        netdev_warn(dev, "timeout waiting for Tx RDC.\n");
                        ei_local->reset_8390(dev);
-                       ax_NS8390_init(dev, 1);
+                       ax_NS8390_reinit(dev);
                        break;
                }
        }
index 1cdff1d..d796684 100644 (file)
@@ -118,6 +118,7 @@ config LANTIQ_XRX200
          Support for the PMAC of the Gigabit switch (GSWIP) inside the
          Lantiq / Intel VRX200 VDSL SoC
 
+source "drivers/net/ethernet/litex/Kconfig"
 source "drivers/net/ethernet/marvell/Kconfig"
 source "drivers/net/ethernet/mediatek/Kconfig"
 source "drivers/net/ethernet/mellanox/Kconfig"
index cb3f908..aaa5078 100644 (file)
@@ -51,6 +51,7 @@ obj-$(CONFIG_JME) += jme.o
 obj-$(CONFIG_KORINA) += korina.o
 obj-$(CONFIG_LANTIQ_ETOP) += lantiq_etop.o
 obj-$(CONFIG_LANTIQ_XRX200) += lantiq_xrx200.o
+obj-$(CONFIG_NET_VENDOR_LITEX) += litex/
 obj-$(CONFIG_NET_VENDOR_MARVELL) += marvell/
 obj-$(CONFIG_NET_VENDOR_MEDIATEK) += mediatek/
 obj-$(CONFIG_NET_VENDOR_MELLANOX) += mellanox/
index ccad6a3..f630cac 100644 (file)
@@ -2,8 +2,8 @@
 
 config NET_VENDOR_ACTIONS
        bool "Actions Semi devices"
-       default y
-       depends on ARCH_ACTIONS
+       depends on ARCH_ACTIONS || COMPILE_TEST
+       default ARCH_ACTIONS
        help
          If you have a network (Ethernet) card belonging to this class, say Y.
 
index b8e771c..c4ecf4f 100644 (file)
@@ -1179,8 +1179,8 @@ static int owl_emac_ndo_set_mac_addr(struct net_device *netdev, void *addr)
        return owl_emac_setup_frame_xmit(netdev_priv(netdev));
 }
 
-static int owl_emac_ndo_do_ioctl(struct net_device *netdev,
-                                struct ifreq *req, int cmd)
+static int owl_emac_ndo_eth_ioctl(struct net_device *netdev,
+                                 struct ifreq *req, int cmd)
 {
        if (!netif_running(netdev))
                return -EINVAL;
@@ -1224,7 +1224,7 @@ static const struct net_device_ops owl_emac_netdev_ops = {
        .ndo_set_rx_mode        = owl_emac_ndo_set_rx_mode,
        .ndo_set_mac_address    = owl_emac_ndo_set_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = owl_emac_ndo_do_ioctl,
+       .ndo_eth_ioctl          = owl_emac_ndo_eth_ioctl,
        .ndo_tx_timeout         = owl_emac_ndo_tx_timeout,
        .ndo_get_stats          = owl_emac_ndo_get_stats,
 };
index 7965e5e..e0f6cc9 100644 (file)
@@ -625,7 +625,7 @@ static const struct net_device_ops netdev_ops = {
        .ndo_tx_timeout         = tx_timeout,
        .ndo_get_stats          = get_stats,
        .ndo_set_rx_mode        = set_rx_mode,
-       .ndo_do_ioctl           = netdev_ioctl,
+       .ndo_eth_ioctl          = netdev_ioctl,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 #ifdef VLAN_SUPPORT
index 41f8821..9206331 100644 (file)
@@ -3882,7 +3882,7 @@ static const struct net_device_ops et131x_netdev_ops = {
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_get_stats          = et131x_stats,
-       .ndo_do_ioctl           = phy_do_ioctl,
+       .ndo_eth_ioctl          = phy_do_ioctl,
 };
 
 static int et131x_pci_setup(struct pci_dev *pdev,
index f99ae31..037baea 100644 (file)
@@ -774,7 +774,7 @@ static const struct net_device_ops emac_netdev_ops = {
        .ndo_start_xmit         = emac_start_xmit,
        .ndo_tx_timeout         = emac_timeout,
        .ndo_set_rx_mode        = emac_set_rx_mode,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = emac_set_mac_address,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 27dae63..13e745c 100644 (file)
@@ -357,7 +357,9 @@ static int ena_get_link_ksettings(struct net_device *netdev,
 }
 
 static int ena_get_coalesce(struct net_device *net_dev,
-                           struct ethtool_coalesce *coalesce)
+                           struct ethtool_coalesce *coalesce,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct ena_adapter *adapter = netdev_priv(net_dev);
        struct ena_com_dev *ena_dev = adapter->ena_dev;
@@ -402,7 +404,9 @@ static void ena_update_rx_rings_nonadaptive_intr_moderation(struct ena_adapter *
 }
 
 static int ena_set_coalesce(struct net_device *net_dev,
-                           struct ethtool_coalesce *coalesce)
+                           struct ethtool_coalesce *coalesce,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct ena_adapter *adapter = netdev_priv(net_dev);
        struct ena_com_dev *ena_dev = adapter->ena_dev;
index d0b0609..4786f05 100644 (file)
@@ -46,6 +46,7 @@ config AMD8111_ETH
 config LANCE
        tristate "AMD LANCE and PCnet (AT1500 and NE2100) support"
        depends on ISA && ISA_DMA_API && !ARM && !PPC32
+       select NETDEV_LEGACY_INIT
        help
          If you have a network (Ethernet) card of this type, say Y here.
          Some LinkSys cards are of this type.
@@ -132,6 +133,7 @@ config PCMCIA_NMCLAN
 config NI65
        tristate "NI6510 support"
        depends on ISA && ISA_DMA_API && !ARM && !PPC32
+       select NETDEV_LEGACY_INIT
        help
          If you have a network (Ethernet) card of this type, say Y here.
 
@@ -168,11 +170,11 @@ config AMD_XGBE
        tristate "AMD 10GbE Ethernet driver"
        depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM
        depends on X86 || ARM64 || COMPILE_TEST
+       depends on PTP_1588_CLOCK_OPTIONAL
        select BITREVERSE
        select CRC32
        select PHYLIB
        select AMD_XGBE_HAVE_ECC if X86
-       imply PTP_1588_CLOCK
        help
          This driver supports the AMD 10GbE Ethernet device found on an
          AMD SoC.
index 9cac5aa..92e4246 100644 (file)
@@ -1729,7 +1729,7 @@ static const struct net_device_ops amd8111e_netdev_ops = {
        .ndo_set_rx_mode        = amd8111e_set_multicast_list,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = amd8111e_set_mac_address,
-       .ndo_do_ioctl           = amd8111e_ioctl,
+       .ndo_eth_ioctl          = amd8111e_ioctl,
        .ndo_change_mtu         = amd8111e_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller     = amd8111e_poll,
index 36f54d1..9d2f49f 100644 (file)
@@ -367,7 +367,7 @@ static void *slow_memcpy( void *dst, const void *src, size_t len )
 }
 
 
-struct net_device * __init atarilance_probe(int unit)
+struct net_device * __init atarilance_probe(void)
 {
        int i;
        static int found;
@@ -382,10 +382,6 @@ struct net_device * __init atarilance_probe(int unit)
        dev = alloc_etherdev(sizeof(struct lance_private));
        if (!dev)
                return ERR_PTR(-ENOMEM);
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
 
        for( i = 0; i < N_LANCE_ADDR; ++i ) {
                if (lance_probe1( dev, &lance_addr_list[i] )) {
@@ -1137,13 +1133,11 @@ static int lance_set_mac_address( struct net_device *dev, void *addr )
        return 0;
 }
 
-
-#ifdef MODULE
 static struct net_device *atarilance_dev;
 
 static int __init atarilance_module_init(void)
 {
-       atarilance_dev = atarilance_probe(-1);
+       atarilance_dev = atarilance_probe();
        return PTR_ERR_OR_ZERO(atarilance_dev);
 }
 
@@ -1155,4 +1149,3 @@ static void __exit atarilance_module_exit(void)
 }
 module_init(atarilance_module_init);
 module_exit(atarilance_module_exit);
-#endif /* MODULE */
index 19e1954..9c16362 100644 (file)
@@ -1051,7 +1051,7 @@ static const struct net_device_ops au1000_netdev_ops = {
        .ndo_stop               = au1000_close,
        .ndo_start_xmit         = au1000_tx,
        .ndo_set_rx_mode        = au1000_multicast_list,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_tx_timeout         = au1000_tx_timeout,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
index 2178e6b..945bf1d 100644 (file)
@@ -327,7 +327,7 @@ MODULE_PARM_DESC(dma, "LANCE/PCnet ISA DMA channel (ignored for some devices)");
 MODULE_PARM_DESC(irq, "LANCE/PCnet IRQ number (ignored for some devices)");
 MODULE_PARM_DESC(lance_debug, "LANCE/PCnet debug level (0-7)");
 
-int __init init_module(void)
+static int __init lance_init_module(void)
 {
        struct net_device *dev;
        int this_dev, found = 0;
@@ -356,6 +356,7 @@ int __init init_module(void)
                return 0;
        return -ENXIO;
 }
+module_init(lance_init_module);
 
 static void cleanup_card(struct net_device *dev)
 {
@@ -368,7 +369,7 @@ static void cleanup_card(struct net_device *dev)
        kfree(lp);
 }
 
-void __exit cleanup_module(void)
+static void __exit lance_cleanup_module(void)
 {
        int this_dev;
 
@@ -381,6 +382,7 @@ void __exit cleanup_module(void)
                }
        }
 }
+module_exit(lance_cleanup_module);
 #endif /* MODULE */
 MODULE_LICENSE("GPL");
 
index 3f2e4cd..da97fcc 100644 (file)
@@ -68,7 +68,7 @@ static const struct net_device_ops lance_netdev_ops = {
 };
 
 /* Initialise the one and only on-board 7990 */
-struct net_device * __init mvme147lance_probe(int unit)
+static struct net_device * __init mvme147lance_probe(void)
 {
        struct net_device *dev;
        static int called;
@@ -86,9 +86,6 @@ struct net_device * __init mvme147lance_probe(int unit)
        if (!dev)
                return ERR_PTR(-ENOMEM);
 
-       if (unit >= 0)
-               sprintf(dev->name, "eth%d", unit);
-
        /* Fill the dev fields */
        dev->base_addr = (unsigned long)MVME147_LANCE_BASE;
        dev->netdev_ops = &lance_netdev_ops;
@@ -179,22 +176,21 @@ static int m147lance_close(struct net_device *dev)
        return 0;
 }
 
-#ifdef MODULE
 MODULE_LICENSE("GPL");
 
 static struct net_device *dev_mvme147_lance;
-int __init init_module(void)
+static int __init m147lance_init(void)
 {
-       dev_mvme147_lance = mvme147lance_probe(-1);
+       dev_mvme147_lance = mvme147lance_probe();
        return PTR_ERR_OR_ZERO(dev_mvme147_lance);
 }
+module_init(m147lance_init);
 
-void __exit cleanup_module(void)
+static void __exit m147lance_exit(void)
 {
        struct m147lance_private *lp = netdev_priv(dev_mvme147_lance);
        unregister_netdev(dev_mvme147_lance);
        free_pages(lp->ram, 3);
        free_netdev(dev_mvme147_lance);
 }
-
-#endif /* MODULE */
+module_exit(m147lance_exit);
index 5c1cfb0..b5df7ad 100644 (file)
@@ -1230,18 +1230,20 @@ MODULE_PARM_DESC(irq, "ni6510 IRQ number (ignored for some cards)");
 MODULE_PARM_DESC(io, "ni6510 I/O base address");
 MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)");
 
-int __init init_module(void)
+static int __init ni65_init_module(void)
 {
        dev_ni65 = ni65_probe(-1);
        return PTR_ERR_OR_ZERO(dev_ni65);
 }
+module_init(ni65_init_module);
 
-void __exit cleanup_module(void)
+static void __exit ni65_cleanup_module(void)
 {
        unregister_netdev(dev_ni65);
        cleanup_card(dev_ni65);
        free_netdev(dev_ni65);
 }
+module_exit(ni65_cleanup_module);
 #endif /* MODULE */
 
 MODULE_LICENSE("GPL");
index 4100ab0..70d76fd 100644 (file)
@@ -1572,7 +1572,7 @@ static const struct net_device_ops pcnet32_netdev_ops = {
        .ndo_tx_timeout         = pcnet32_tx_timeout,
        .ndo_get_stats          = pcnet32_get_stats,
        .ndo_set_rx_mode        = pcnet32_set_multicast_list,
-       .ndo_do_ioctl           = pcnet32_ioctl,
+       .ndo_eth_ioctl          = pcnet32_ioctl,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index f8d7a93..4a845bc 100644 (file)
@@ -245,7 +245,7 @@ static void set_multicast_list( struct net_device *dev );
 
 /************************* End of Prototypes **************************/
 
-struct net_device * __init sun3lance_probe(int unit)
+static struct net_device * __init sun3lance_probe(void)
 {
        struct net_device *dev;
        static int found;
@@ -272,10 +272,6 @@ struct net_device * __init sun3lance_probe(int unit)
        dev = alloc_etherdev(sizeof(struct lance_private));
        if (!dev)
                return ERR_PTR(-ENOMEM);
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
 
        if (!lance_probe(dev))
                goto out;
@@ -924,17 +920,16 @@ static void set_multicast_list( struct net_device *dev )
 }
 
 
-#ifdef MODULE
-
 static struct net_device *sun3lance_dev;
 
-int __init init_module(void)
+static int __init sun3lance_init(void)
 {
-       sun3lance_dev = sun3lance_probe(-1);
+       sun3lance_dev = sun3lance_probe();
        return PTR_ERR_OR_ZERO(sun3lance_dev);
 }
+module_init(sun3lance_init);
 
-void __exit cleanup_module(void)
+static void __exit sun3lance_cleanup(void)
 {
        unregister_netdev(sun3lance_dev);
 #ifdef CONFIG_SUN3
@@ -942,6 +937,4 @@ void __exit cleanup_module(void)
 #endif
        free_netdev(sun3lance_dev);
 }
-
-#endif /* MODULE */
-
+module_exit(sun3lance_cleanup);
index 4f714f8..17a585a 100644 (file)
@@ -2284,7 +2284,7 @@ static const struct net_device_ops xgbe_netdev_ops = {
        .ndo_set_rx_mode        = xgbe_set_rx_mode,
        .ndo_set_mac_address    = xgbe_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = xgbe_ioctl,
+       .ndo_eth_ioctl          = xgbe_ioctl,
        .ndo_change_mtu         = xgbe_change_mtu,
        .ndo_tx_timeout         = xgbe_tx_timeout,
        .ndo_get_stats64        = xgbe_get_stats64,
index 61f39a0..bafc51c 100644 (file)
@@ -428,7 +428,9 @@ static void xgbe_set_msglevel(struct net_device *netdev, u32 msglevel)
 }
 
 static int xgbe_get_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *ec)
+                            struct ethtool_coalesce *ec,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct xgbe_prv_data *pdata = netdev_priv(netdev);
 
@@ -443,7 +445,9 @@ static int xgbe_get_coalesce(struct net_device *netdev,
 }
 
 static int xgbe_set_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *ec)
+                            struct ethtool_coalesce *ec,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct xgbe_prv_data *pdata = netdev_priv(netdev);
        struct xgbe_hw_if *hw_if = &pdata->hw_if;
index de2a934..a9ef054 100644 (file)
@@ -547,7 +547,9 @@ static int aq_ethtool_set_rxnfc(struct net_device *ndev,
 }
 
 static int aq_ethtool_get_coalesce(struct net_device *ndev,
-                                  struct ethtool_coalesce *coal)
+                                  struct ethtool_coalesce *coal,
+                                  struct kernel_ethtool_coalesce *kernel_coal,
+                                  struct netlink_ext_ack *extack)
 {
        struct aq_nic_s *aq_nic = netdev_priv(ndev);
        struct aq_nic_cfg_s *cfg;
@@ -571,7 +573,9 @@ static int aq_ethtool_get_coalesce(struct net_device *ndev,
 }
 
 static int aq_ethtool_set_coalesce(struct net_device *ndev,
-                                  struct ethtool_coalesce *coal)
+                                  struct ethtool_coalesce *coal,
+                                  struct kernel_ethtool_coalesce *kernel_coal,
+                                  struct netlink_ext_ack *extack)
 {
        struct aq_nic_s *aq_nic = netdev_priv(ndev);
        struct aq_nic_cfg_s *cfg;
index 4af0cd9..e22935c 100644 (file)
@@ -421,7 +421,7 @@ static const struct net_device_ops aq_ndev_ops = {
        .ndo_change_mtu = aq_ndev_change_mtu,
        .ndo_set_mac_address = aq_ndev_set_mac_address,
        .ndo_set_features = aq_ndev_set_features,
-       .ndo_do_ioctl = aq_ndev_ioctl,
+       .ndo_eth_ioctl = aq_ndev_ioctl,
        .ndo_vlan_rx_add_vid = aq_ndo_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid = aq_ndo_vlan_rx_kill_vid,
        .ndo_setup_tc = aq_ndo_setup_tc,
index f26d037..dee9ff7 100644 (file)
@@ -119,16 +119,10 @@ static int aq_pci_func_init(struct pci_dev *pdev)
 {
        int err;
 
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (!err)
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+       if (err)
+               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
        if (err) {
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-               if (!err)
-                       err = pci_set_consistent_dma_mask(pdev,
-                                                         DMA_BIT_MASK(32));
-       }
-       if (err != 0) {
                err = -ENOSR;
                goto err_exit;
        }
index 67b8113..38c288e 100644 (file)
@@ -844,7 +844,7 @@ static const struct net_device_ops arc_emac_netdev_ops = {
        .ndo_set_mac_address    = arc_emac_set_address,
        .ndo_get_stats          = arc_emac_stats,
        .ndo_set_rx_mode        = arc_emac_set_rx_mode,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = arc_emac_poll_controller,
 #endif
index 1ba81b1..02ae98a 100644 (file)
@@ -1851,7 +1851,7 @@ static const struct net_device_ops ag71xx_netdev_ops = {
        .ndo_open               = ag71xx_open,
        .ndo_stop               = ag71xx_stop,
        .ndo_start_xmit         = ag71xx_hard_start_xmit,
-       .ndo_do_ioctl           = phy_do_ioctl,
+       .ndo_eth_ioctl          = phy_do_ioctl,
        .ndo_tx_timeout         = ag71xx_tx_timeout,
        .ndo_change_mtu         = ag71xx_change_mtu,
        .ndo_set_mac_address    = eth_mac_addr,
index 11ef1fb..4ea157e 100644 (file)
@@ -1701,7 +1701,7 @@ static const struct net_device_ops alx_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = alx_set_mac_address,
        .ndo_change_mtu         = alx_change_mtu,
-       .ndo_do_ioctl           = alx_ioctl,
+       .ndo_eth_ioctl           = alx_ioctl,
        .ndo_tx_timeout         = alx_tx_timeout,
        .ndo_fix_features       = alx_fix_features,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 1c6246a..3b51b17 100644 (file)
@@ -2609,7 +2609,7 @@ static const struct net_device_ops atl1c_netdev_ops = {
        .ndo_change_mtu         = atl1c_change_mtu,
        .ndo_fix_features       = atl1c_fix_features,
        .ndo_set_features       = atl1c_set_features,
-       .ndo_do_ioctl           = atl1c_ioctl,
+       .ndo_eth_ioctl          = atl1c_ioctl,
        .ndo_tx_timeout         = atl1c_tx_timeout,
        .ndo_get_stats          = atl1c_get_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 2eb0a2a..753973a 100644 (file)
@@ -2247,7 +2247,7 @@ static const struct net_device_ops atl1e_netdev_ops = {
        .ndo_fix_features       = atl1e_fix_features,
        .ndo_set_features       = atl1e_set_features,
        .ndo_change_mtu         = atl1e_change_mtu,
-       .ndo_do_ioctl           = atl1e_ioctl,
+       .ndo_eth_ioctl          = atl1e_ioctl,
        .ndo_tx_timeout         = atl1e_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = atl1e_netpoll,
index c67201a..68f6c0b 100644 (file)
@@ -2885,7 +2885,7 @@ static const struct net_device_ops atl1_netdev_ops = {
        .ndo_change_mtu         = atl1_change_mtu,
        .ndo_fix_features       = atlx_fix_features,
        .ndo_set_features       = atlx_set_features,
-       .ndo_do_ioctl           = atlx_ioctl,
+       .ndo_eth_ioctl          = atlx_ioctl,
        .ndo_tx_timeout         = atlx_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = atl1_poll_controller,
index 0cc0db0..b69298d 100644 (file)
@@ -1293,7 +1293,7 @@ static const struct net_device_ops atl2_netdev_ops = {
        .ndo_change_mtu         = atl2_change_mtu,
        .ndo_fix_features       = atl2_fix_features,
        .ndo_set_features       = atl2_set_features,
-       .ndo_do_ioctl           = atl2_ioctl,
+       .ndo_eth_ioctl          = atl2_ioctl,
        .ndo_tx_timeout         = atl2_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = atl2_poll_controller,
index 1a02ca6..56e0fb0 100644 (file)
@@ -122,8 +122,8 @@ config SB1250_MAC
 config TIGON3
        tristate "Broadcom Tigon3 support"
        depends on PCI
+       depends on PTP_1588_CLOCK_OPTIONAL
        select PHYLIB
-       imply PTP_1588_CLOCK
        help
          This driver supports Broadcom Tigon3 based gigabit Ethernet cards.
 
@@ -140,7 +140,7 @@ config TIGON3_HWMON
 config BNX2X
        tristate "Broadcom NetXtremeII 10Gb support"
        depends on PCI
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK_OPTIONAL
        select FW_LOADER
        select ZLIB_INFLATE
        select LIBCRC32C
@@ -206,7 +206,7 @@ config SYSTEMPORT
 config BNXT
        tristate "Broadcom NetXtreme-C/E support"
        depends on PCI
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK_OPTIONAL
        select FW_LOADER
        select LIBCRC32C
        select NET_DEVLINK
index ad2655e..fa78495 100644 (file)
@@ -2198,7 +2198,7 @@ static const struct net_device_ops b44_netdev_ops = {
        .ndo_set_rx_mode        = b44_set_rx_mode,
        .ndo_set_mac_address    = b44_set_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = b44_ioctl,
+       .ndo_eth_ioctl          = b44_ioctl,
        .ndo_tx_timeout         = b44_tx_timeout,
        .ndo_change_mtu         = b44_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 977f097..d568863 100644 (file)
@@ -1699,7 +1699,7 @@ static const struct net_device_ops bcm_enet_ops = {
        .ndo_start_xmit         = bcm_enet_start_xmit,
        .ndo_set_mac_address    = bcm_enet_set_mac_address,
        .ndo_set_rx_mode        = bcm_enet_set_multicast_list,
-       .ndo_do_ioctl           = bcm_enet_ioctl,
+       .ndo_eth_ioctl          = bcm_enet_ioctl,
        .ndo_change_mtu         = bcm_enet_change_mtu,
 };
 
@@ -2446,7 +2446,7 @@ static const struct net_device_ops bcm_enetsw_ops = {
        .ndo_stop               = bcm_enetsw_stop,
        .ndo_start_xmit         = bcm_enet_start_xmit,
        .ndo_change_mtu         = bcm_enet_change_mtu,
-       .ndo_do_ioctl           = bcm_enetsw_ioctl,
+       .ndo_eth_ioctl          = bcm_enetsw_ioctl,
 };
 
 
@@ -2649,7 +2649,6 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
        if (!res_mem || irq_rx < 0)
                return -ENODEV;
 
-       ret = 0;
        dev = alloc_etherdev(sizeof(*priv));
        if (!dev)
                return -ENOMEM;
index d9f0f0d..7fa1b69 100644 (file)
@@ -607,7 +607,9 @@ static void bcm_sysport_set_tx_coalesce(struct bcm_sysport_tx_ring *ring,
 }
 
 static int bcm_sysport_get_coalesce(struct net_device *dev,
-                                   struct ethtool_coalesce *ec)
+                                   struct ethtool_coalesce *ec,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct bcm_sysport_priv *priv = netdev_priv(dev);
        u32 reg;
@@ -627,7 +629,9 @@ static int bcm_sysport_get_coalesce(struct net_device *dev,
 }
 
 static int bcm_sysport_set_coalesce(struct net_device *dev,
-                                   struct ethtool_coalesce *ec)
+                                   struct ethtool_coalesce *ec,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct bcm_sysport_priv *priv = netdev_priv(dev);
        struct dim_cq_moder moder;
index 075f6e1..fe4d99a 100644 (file)
@@ -1263,7 +1263,7 @@ static const struct net_device_ops bgmac_netdev_ops = {
        .ndo_set_rx_mode        = bgmac_set_rx_mode,
        .ndo_set_mac_address    = bgmac_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl           = phy_do_ioctl_running,
        .ndo_change_mtu         = bgmac_change_mtu,
 };
 
index bee6cfa..a705e26 100644 (file)
@@ -2730,7 +2730,7 @@ bnx2_alloc_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gf
        if (!page)
                return -ENOMEM;
        mapping = dma_map_page(&bp->pdev->dev, page, 0, PAGE_SIZE,
-                              PCI_DMA_FROMDEVICE);
+                              DMA_FROM_DEVICE);
        if (dma_mapping_error(&bp->pdev->dev, mapping)) {
                __free_page(page);
                return -EIO;
@@ -2753,7 +2753,7 @@ bnx2_free_rx_page(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index)
                return;
 
        dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(rx_pg, mapping),
-                      PAGE_SIZE, PCI_DMA_FROMDEVICE);
+                      PAGE_SIZE, DMA_FROM_DEVICE);
 
        __free_page(page);
        rx_pg->page = NULL;
@@ -2775,7 +2775,7 @@ bnx2_alloc_rx_data(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, u16 index, gf
        mapping = dma_map_single(&bp->pdev->dev,
                                 get_l2_fhdr(data),
                                 bp->rx_buf_use_size,
-                                PCI_DMA_FROMDEVICE);
+                                DMA_FROM_DEVICE);
        if (dma_mapping_error(&bp->pdev->dev, mapping)) {
                kfree(data);
                return -EIO;
@@ -2881,7 +2881,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
                }
 
                dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(tx_buf, mapping),
-                       skb_headlen(skb), PCI_DMA_TODEVICE);
+                       skb_headlen(skb), DMA_TO_DEVICE);
 
                tx_buf->skb = NULL;
                last = tx_buf->nr_frags;
@@ -2895,7 +2895,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
                        dma_unmap_page(&bp->pdev->dev,
                                dma_unmap_addr(tx_buf, mapping),
                                skb_frag_size(&skb_shinfo(skb)->frags[i]),
-                               PCI_DMA_TODEVICE);
+                               DMA_TO_DEVICE);
                }
 
                sw_cons = BNX2_NEXT_TX_BD(sw_cons);
@@ -3003,7 +3003,7 @@ bnx2_reuse_rx_data(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
 
        dma_sync_single_for_device(&bp->pdev->dev,
                dma_unmap_addr(cons_rx_buf, mapping),
-               BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH, PCI_DMA_FROMDEVICE);
+               BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH, DMA_FROM_DEVICE);
 
        rxr->rx_prod_bseq += bp->rx_buf_use_size;
 
@@ -3044,7 +3044,7 @@ error:
        }
 
        dma_unmap_single(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size,
-                        PCI_DMA_FROMDEVICE);
+                        DMA_FROM_DEVICE);
        skb = build_skb(data, 0);
        if (!skb) {
                kfree(data);
@@ -3110,7 +3110,7 @@ error:
                        }
 
                        dma_unmap_page(&bp->pdev->dev, mapping_old,
-                                      PAGE_SIZE, PCI_DMA_FROMDEVICE);
+                                      PAGE_SIZE, DMA_FROM_DEVICE);
 
                        frag_size -= frag_len;
                        skb->data_len += frag_len;
@@ -3180,7 +3180,7 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget)
 
                dma_sync_single_for_cpu(&bp->pdev->dev, dma_addr,
                        BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH,
-                       PCI_DMA_FROMDEVICE);
+                       DMA_FROM_DEVICE);
 
                next_ring_idx = BNX2_RX_RING_IDX(BNX2_NEXT_RX_BD(sw_cons));
                next_rx_buf = &rxr->rx_buf_ring[next_ring_idx];
@@ -5449,7 +5449,7 @@ bnx2_free_tx_skbs(struct bnx2 *bp)
                        dma_unmap_single(&bp->pdev->dev,
                                         dma_unmap_addr(tx_buf, mapping),
                                         skb_headlen(skb),
-                                        PCI_DMA_TODEVICE);
+                                        DMA_TO_DEVICE);
 
                        tx_buf->skb = NULL;
 
@@ -5460,7 +5460,7 @@ bnx2_free_tx_skbs(struct bnx2 *bp)
                                dma_unmap_page(&bp->pdev->dev,
                                        dma_unmap_addr(tx_buf, mapping),
                                        skb_frag_size(&skb_shinfo(skb)->frags[k]),
-                                       PCI_DMA_TODEVICE);
+                                       DMA_TO_DEVICE);
                        }
                        dev_kfree_skb(skb);
                }
@@ -5491,7 +5491,7 @@ bnx2_free_rx_skbs(struct bnx2 *bp)
                        dma_unmap_single(&bp->pdev->dev,
                                         dma_unmap_addr(rx_buf, mapping),
                                         bp->rx_buf_use_size,
-                                        PCI_DMA_FROMDEVICE);
+                                        DMA_FROM_DEVICE);
 
                        rx_buf->data = NULL;
 
@@ -5843,7 +5843,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
                packet[i] = (unsigned char) (i & 0xff);
 
        map = dma_map_single(&bp->pdev->dev, skb->data, pkt_size,
-                            PCI_DMA_TODEVICE);
+                            DMA_TO_DEVICE);
        if (dma_mapping_error(&bp->pdev->dev, map)) {
                dev_kfree_skb(skb);
                return -EIO;
@@ -5882,7 +5882,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
 
        udelay(5);
 
-       dma_unmap_single(&bp->pdev->dev, map, pkt_size, PCI_DMA_TODEVICE);
+       dma_unmap_single(&bp->pdev->dev, map, pkt_size, DMA_TO_DEVICE);
        dev_kfree_skb(skb);
 
        if (bnx2_get_hw_tx_cons(tx_napi) != txr->tx_prod)
@@ -5901,7 +5901,7 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
 
        dma_sync_single_for_cpu(&bp->pdev->dev,
                dma_unmap_addr(rx_buf, mapping),
-               bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
+               bp->rx_buf_use_size, DMA_FROM_DEVICE);
 
        if (rx_hdr->l2_fhdr_status &
                (L2_FHDR_ERRORS_BAD_CRC |
@@ -6660,7 +6660,8 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
        } else
                mss = 0;
 
-       mapping = dma_map_single(&bp->pdev->dev, skb->data, len, PCI_DMA_TODEVICE);
+       mapping = dma_map_single(&bp->pdev->dev, skb->data, len,
+                                DMA_TO_DEVICE);
        if (dma_mapping_error(&bp->pdev->dev, mapping)) {
                dev_kfree_skb_any(skb);
                return NETDEV_TX_OK;
@@ -6741,7 +6742,7 @@ dma_error:
        tx_buf = &txr->tx_buf_ring[ring_prod];
        tx_buf->skb = NULL;
        dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(tx_buf, mapping),
-                        skb_headlen(skb), PCI_DMA_TODEVICE);
+                        skb_headlen(skb), DMA_TO_DEVICE);
 
        /* unmap remaining mapped pages */
        for (i = 0; i < last_frag; i++) {
@@ -6750,7 +6751,7 @@ dma_error:
                tx_buf = &txr->tx_buf_ring[ring_prod];
                dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(tx_buf, mapping),
                               skb_frag_size(&skb_shinfo(skb)->frags[i]),
-                              PCI_DMA_TODEVICE);
+                              DMA_TO_DEVICE);
        }
 
        dev_kfree_skb_any(skb);
@@ -7241,8 +7242,10 @@ bnx2_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
        return rc;
 }
 
-static int
-bnx2_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
+static int bnx2_get_coalesce(struct net_device *dev,
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct bnx2 *bp = netdev_priv(dev);
 
@@ -7263,8 +7266,10 @@ bnx2_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
        return 0;
 }
 
-static int
-bnx2_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
+static int bnx2_set_coalesce(struct net_device *dev,
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct bnx2 *bp = netdev_priv(dev);
 
@@ -8041,21 +8046,16 @@ bnx2_read_vpd_fw_ver(struct bnx2 *bp)
 #define BNX2_VPD_LEN           128
 #define BNX2_MAX_VER_SLEN      30
 
-       data = kmalloc(256, GFP_KERNEL);
+       data = kmalloc(BNX2_VPD_LEN, GFP_KERNEL);
        if (!data)
                return;
 
-       rc = bnx2_nvram_read(bp, BNX2_VPD_NVRAM_OFFSET, data + BNX2_VPD_LEN,
-                            BNX2_VPD_LEN);
+       rc = bnx2_nvram_read(bp, BNX2_VPD_NVRAM_OFFSET, data, BNX2_VPD_LEN);
        if (rc)
                goto vpd_done;
 
-       for (i = 0; i < BNX2_VPD_LEN; i += 4) {
-               data[i] = data[i + BNX2_VPD_LEN + 3];
-               data[i + 1] = data[i + BNX2_VPD_LEN + 2];
-               data[i + 2] = data[i + BNX2_VPD_LEN + 1];
-               data[i + 3] = data[i + BNX2_VPD_LEN];
-       }
+       for (i = 0; i < BNX2_VPD_LEN; i += 4)
+               swab32s((u32 *)&data[i]);
 
        i = pci_vpd_find_tag(data, BNX2_VPD_LEN, PCI_VPD_LRDT_RO_DATA);
        if (i < 0)
@@ -8224,15 +8224,15 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
                persist_dma_mask = dma_mask = DMA_BIT_MASK(64);
 
        /* Configure DMA attributes. */
-       if (pci_set_dma_mask(pdev, dma_mask) == 0) {
+       if (dma_set_mask(&pdev->dev, dma_mask) == 0) {
                dev->features |= NETIF_F_HIGHDMA;
-               rc = pci_set_consistent_dma_mask(pdev, persist_dma_mask);
+               rc = dma_set_coherent_mask(&pdev->dev, persist_dma_mask);
                if (rc) {
                        dev_err(&pdev->dev,
                                "pci_set_consistent_dma_mask failed, aborting\n");
                        goto err_out_unmap;
                }
-       } else if ((rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) != 0) {
+       } else if ((rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) != 0) {
                dev_err(&pdev->dev, "System does not support DMA, aborting\n");
                goto err_out_unmap;
        }
@@ -8546,7 +8546,7 @@ static const struct net_device_ops bnx2_netdev_ops = {
        .ndo_stop               = bnx2_close,
        .ndo_get_stats64        = bnx2_get_stats64,
        .ndo_set_rx_mode        = bnx2_set_rx_mode,
-       .ndo_do_ioctl           = bnx2_ioctl,
+       .ndo_eth_ioctl          = bnx2_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = bnx2_change_mac_addr,
        .ndo_change_mtu         = bnx2_change_mtu,
index 32245bb..472a3a4 100644 (file)
@@ -1878,7 +1878,9 @@ static int bnx2x_set_eeprom(struct net_device *dev,
 }
 
 static int bnx2x_get_coalesce(struct net_device *dev,
-                             struct ethtool_coalesce *coal)
+                             struct ethtool_coalesce *coal,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct bnx2x *bp = netdev_priv(dev);
 
@@ -1891,7 +1893,9 @@ static int bnx2x_get_coalesce(struct net_device *dev,
 }
 
 static int bnx2x_set_coalesce(struct net_device *dev,
-                             struct ethtool_coalesce *coal)
+                             struct ethtool_coalesce *coal,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct bnx2x *bp = netdev_priv(dev);
 
index 2acbc73..6d98134 100644 (file)
@@ -13048,7 +13048,7 @@ static const struct net_device_ops bnx2x_netdev_ops = {
        .ndo_set_rx_mode        = bnx2x_set_rx_mode,
        .ndo_set_mac_address    = bnx2x_change_mac_addr,
        .ndo_validate_addr      = bnx2x_validate_addr,
-       .ndo_do_ioctl           = bnx2x_ioctl,
+       .ndo_eth_ioctl          = bnx2x_ioctl,
        .ndo_change_mtu         = bnx2x_change_mtu,
        .ndo_fix_features       = bnx2x_fix_features,
        .ndo_set_features       = bnx2x_set_features,
index 27943b0..f255fd0 100644 (file)
@@ -1858,7 +1858,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
 {
        int i;
        int first_queue_query_index, num_queues_req;
-       dma_addr_t cur_data_offset;
        struct stats_query_entry *cur_query_entry;
        u8 stats_count = 0;
        bool is_fcoe = false;
@@ -1879,10 +1878,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
               BNX2X_NUM_ETH_QUEUES(bp), is_fcoe, first_queue_query_index,
               first_queue_query_index + num_queues_req);
 
-       cur_data_offset = bp->fw_stats_data_mapping +
-               offsetof(struct bnx2x_fw_stats_data, queue_stats) +
-               num_queues_req * sizeof(struct per_queue_stats);
-
        cur_query_entry = &bp->fw_stats_req->
                query[first_queue_query_index + num_queues_req];
 
@@ -1933,7 +1928,6 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp)
                               cur_query_entry->funcID,
                               j, cur_query_entry->index);
                        cur_query_entry++;
-                       cur_data_offset += sizeof(struct per_queue_stats);
                        stats_count++;
 
                        /* all stats are coalesced to the leading queue */
index 2b8ae68..c6ef7ec 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_BNXT) += bnxt_en.o
 
-bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o
+bnxt_en-y := bnxt.o bnxt_hwrm.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_ptp.o bnxt_vfr.o bnxt_devlink.o bnxt_dim.o
 bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o
 bnxt_en-$(CONFIG_DEBUG_FS) += bnxt_debugfs.o
index 8a97640..627f85e 100644 (file)
@@ -60,6 +60,7 @@
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
+#include "bnxt_hwrm.h"
 #include "bnxt_ulp.h"
 #include "bnxt_sriov.h"
 #include "bnxt_ethtool.h"
@@ -276,8 +277,11 @@ static const u16 bnxt_async_events_arr[] = {
        ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY,
        ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY,
        ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION,
+       ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE,
        ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG,
        ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST,
+       ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP,
+       ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT,
 };
 
 static struct workqueue_struct *bnxt_pf_wq;
@@ -670,7 +674,7 @@ tx_dma_error:
        prod = txr->tx_prod;
        tx_buf = &txr->tx_buf_ring[prod];
        dma_unmap_single(&pdev->dev, dma_unmap_addr(tx_buf, mapping),
-                        skb_headlen(skb), PCI_DMA_TODEVICE);
+                        skb_headlen(skb), DMA_TO_DEVICE);
        prod = NEXT_TX(prod);
 
        /* unmap remaining mapped pages */
@@ -679,7 +683,7 @@ tx_dma_error:
                tx_buf = &txr->tx_buf_ring[prod];
                dma_unmap_page(&pdev->dev, dma_unmap_addr(tx_buf, mapping),
                               skb_frag_size(&skb_shinfo(skb)->frags[i]),
-                              PCI_DMA_TODEVICE);
+                              DMA_TO_DEVICE);
        }
 
 tx_free:
@@ -718,7 +722,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
                }
 
                dma_unmap_single(&pdev->dev, dma_unmap_addr(tx_buf, mapping),
-                                skb_headlen(skb), PCI_DMA_TODEVICE);
+                                skb_headlen(skb), DMA_TO_DEVICE);
                last = tx_buf->nr_frags;
 
                for (j = 0; j < last; j++) {
@@ -728,7 +732,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
                                &pdev->dev,
                                dma_unmap_addr(tx_buf, mapping),
                                skb_frag_size(&skb_shinfo(skb)->frags[j]),
-                               PCI_DMA_TODEVICE);
+                               DMA_TO_DEVICE);
                }
                if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
                        if (bp->flags & BNXT_FLAG_CHIP_P5) {
@@ -901,7 +905,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
        }
 
        mapping = dma_map_page_attrs(&pdev->dev, page, offset,
-                                    BNXT_RX_PAGE_SIZE, PCI_DMA_FROMDEVICE,
+                                    BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE,
                                     DMA_ATTR_WEAK_ORDERING);
        if (dma_mapping_error(&pdev->dev, mapping)) {
                __free_page(page);
@@ -1141,7 +1145,7 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp,
                }
 
                dma_unmap_page_attrs(&pdev->dev, mapping, BNXT_RX_PAGE_SIZE,
-                                    PCI_DMA_FROMDEVICE,
+                                    DMA_FROM_DEVICE,
                                     DMA_ATTR_WEAK_ORDERING);
 
                skb->data_len += frag_len;
@@ -1649,6 +1653,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
                skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping);
                if (!skb) {
                        bnxt_abort_tpa(cpr, idx, agg_bufs);
+                       cpr->sw_stats.rx.rx_oom_discards += 1;
                        return NULL;
                }
        } else {
@@ -1658,6 +1663,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
                new_data = __bnxt_alloc_rx_data(bp, &new_mapping, GFP_ATOMIC);
                if (!new_data) {
                        bnxt_abort_tpa(cpr, idx, agg_bufs);
+                       cpr->sw_stats.rx.rx_oom_discards += 1;
                        return NULL;
                }
 
@@ -1673,6 +1679,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
                if (!skb) {
                        kfree(data);
                        bnxt_abort_tpa(cpr, idx, agg_bufs);
+                       cpr->sw_stats.rx.rx_oom_discards += 1;
                        return NULL;
                }
                skb_reserve(skb, bp->rx_offset);
@@ -1683,6 +1690,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
                skb = bnxt_rx_pages(bp, cpr, skb, idx, agg_bufs, true);
                if (!skb) {
                        /* Page reuse already handled by bnxt_rx_pages(). */
+                       cpr->sw_stats.rx.rx_oom_discards += 1;
                        return NULL;
                }
        }
@@ -1886,6 +1894,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
                        if (agg_bufs)
                                bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0,
                                                       agg_bufs, false);
+                       cpr->sw_stats.rx.rx_oom_discards += 1;
                        rc = -ENOMEM;
                        goto next_rx;
                }
@@ -1899,6 +1908,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
                skb = bp->rx_skb_func(bp, rxr, cons, data, data_ptr, dma_addr,
                                      payload | len);
                if (!skb) {
+                       cpr->sw_stats.rx.rx_oom_discards += 1;
                        rc = -ENOMEM;
                        goto next_rx;
                }
@@ -1907,6 +1917,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
        if (agg_bufs) {
                skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs, false);
                if (!skb) {
+                       cpr->sw_stats.rx.rx_oom_discards += 1;
                        rc = -ENOMEM;
                        goto next_rx;
                }
@@ -2001,6 +2012,7 @@ static int bnxt_force_rx_discard(struct bnxt *bp,
        struct rx_cmp *rxcmp;
        u16 cp_cons;
        u8 cmp_type;
+       int rc;
 
        cp_cons = RING_CMP(tmp_raw_cons);
        rxcmp = (struct rx_cmp *)
@@ -2029,7 +2041,10 @@ static int bnxt_force_rx_discard(struct bnxt *bp,
                tpa_end1->rx_tpa_end_cmp_errors_v2 |=
                        cpu_to_le32(RX_TPA_END_CMP_ERRORS);
        }
-       return bnxt_rx_pkt(bp, cpr, raw_cons, event);
+       rc = bnxt_rx_pkt(bp, cpr, raw_cons, event);
+       if (rc && rc != -EBUSY)
+               cpr->sw_stats.rx.rx_netpoll_discards += 1;
+       return rc;
 }
 
 u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx)
@@ -2074,6 +2089,19 @@ static u16 bnxt_agg_ring_id_to_grp_idx(struct bnxt *bp, u16 ring_id)
        return INVALID_HW_RING_ID;
 }
 
+static void bnxt_event_error_report(struct bnxt *bp, u32 data1, u32 data2)
+{
+       switch (BNXT_EVENT_ERROR_REPORT_TYPE(data1)) {
+       case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL:
+               netdev_err(bp->dev, "1PPS: Received invalid signal on pin%lu from the external source. Please fix the signal and reconfigure the pin\n",
+                          BNXT_EVENT_INVALID_SIGNAL_DATA(data2));
+               break;
+       default:
+               netdev_err(bp->dev, "FW reported unknown error type\n");
+               break;
+       }
+}
+
 #define BNXT_GET_EVENT_PORT(data)      \
        ((data) &                       \
         ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
@@ -2234,6 +2262,20 @@ static int bnxt_async_event_process(struct bnxt *bp,
                }
                goto async_event_process_exit;
        }
+       case ASYNC_EVENT_CMPL_EVENT_ID_PPS_TIMESTAMP: {
+               bnxt_ptp_pps_event(bp, data1, data2);
+               goto async_event_process_exit;
+       }
+       case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT: {
+               bnxt_event_error_report(bp, data1, data2);
+               goto async_event_process_exit;
+       }
+       case ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE: {
+               u16 seq_id = le32_to_cpu(cmpl->event_data2) & 0xffff;
+
+               hwrm_update_token(bp, seq_id, BNXT_HWRM_DEFERRED);
+               goto async_event_process_exit;
+       }
        default:
                goto async_event_process_exit;
        }
@@ -2253,10 +2295,7 @@ static int bnxt_hwrm_handler(struct bnxt *bp, struct tx_cmp *txcmp)
        switch (cmpl_type) {
        case CMPL_BASE_TYPE_HWRM_DONE:
                seq_id = le16_to_cpu(h_cmpl->sequence_id);
-               if (seq_id == bp->hwrm_intr_seq_id)
-                       bp->hwrm_intr_seq_id = (u16)~bp->hwrm_intr_seq_id;
-               else
-                       netdev_err(bp->dev, "Invalid hwrm seq id %d\n", seq_id);
+               hwrm_update_token(bp, seq_id, BNXT_HWRM_COMPLETE);
                break;
 
        case CMPL_BASE_TYPE_HWRM_FWD_REQ:
@@ -2690,7 +2729,7 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
                                dma_unmap_single(&pdev->dev,
                                        dma_unmap_addr(tx_buf, mapping),
                                        dma_unmap_len(tx_buf, len),
-                                       PCI_DMA_TODEVICE);
+                                       DMA_TO_DEVICE);
                                xdp_return_frame(tx_buf->xdpf);
                                tx_buf->action = 0;
                                tx_buf->xdpf = NULL;
@@ -2715,7 +2754,7 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
                        dma_unmap_single(&pdev->dev,
                                         dma_unmap_addr(tx_buf, mapping),
                                         skb_headlen(skb),
-                                        PCI_DMA_TODEVICE);
+                                        DMA_TO_DEVICE);
 
                        last = tx_buf->nr_frags;
                        j += 2;
@@ -2727,7 +2766,7 @@ static void bnxt_free_tx_skbs(struct bnxt *bp)
                                dma_unmap_page(
                                        &pdev->dev,
                                        dma_unmap_addr(tx_buf, mapping),
-                                       skb_frag_size(frag), PCI_DMA_TODEVICE);
+                                       skb_frag_size(frag), DMA_TO_DEVICE);
                        }
                        dev_kfree_skb(skb);
                }
@@ -2794,7 +2833,7 @@ skip_rx_tpa_free:
                        continue;
 
                dma_unmap_page_attrs(&pdev->dev, rx_agg_buf->mapping,
-                                    BNXT_RX_PAGE_SIZE, PCI_DMA_FROMDEVICE,
+                                    BNXT_RX_PAGE_SIZE, DMA_FROM_DEVICE,
                                     DMA_ATTR_WEAK_ORDERING);
 
                rx_agg_buf->page = NULL;
@@ -3176,6 +3215,58 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
        return 0;
 }
 
+static void bnxt_free_cp_arrays(struct bnxt_cp_ring_info *cpr)
+{
+       kfree(cpr->cp_desc_ring);
+       cpr->cp_desc_ring = NULL;
+       kfree(cpr->cp_desc_mapping);
+       cpr->cp_desc_mapping = NULL;
+}
+
+static int bnxt_alloc_cp_arrays(struct bnxt_cp_ring_info *cpr, int n)
+{
+       cpr->cp_desc_ring = kcalloc(n, sizeof(*cpr->cp_desc_ring), GFP_KERNEL);
+       if (!cpr->cp_desc_ring)
+               return -ENOMEM;
+       cpr->cp_desc_mapping = kcalloc(n, sizeof(*cpr->cp_desc_mapping),
+                                      GFP_KERNEL);
+       if (!cpr->cp_desc_mapping)
+               return -ENOMEM;
+       return 0;
+}
+
+static void bnxt_free_all_cp_arrays(struct bnxt *bp)
+{
+       int i;
+
+       if (!bp->bnapi)
+               return;
+       for (i = 0; i < bp->cp_nr_rings; i++) {
+               struct bnxt_napi *bnapi = bp->bnapi[i];
+
+               if (!bnapi)
+                       continue;
+               bnxt_free_cp_arrays(&bnapi->cp_ring);
+       }
+}
+
+static int bnxt_alloc_all_cp_arrays(struct bnxt *bp)
+{
+       int i, n = bp->cp_nr_pages;
+
+       for (i = 0; i < bp->cp_nr_rings; i++) {
+               struct bnxt_napi *bnapi = bp->bnapi[i];
+               int rc;
+
+               if (!bnapi)
+                       continue;
+               rc = bnxt_alloc_cp_arrays(&bnapi->cp_ring, n);
+               if (rc)
+                       return rc;
+       }
+       return 0;
+}
+
 static void bnxt_free_cp_rings(struct bnxt *bp)
 {
        int i;
@@ -3203,6 +3294,7 @@ static void bnxt_free_cp_rings(struct bnxt *bp)
                        if (cpr2) {
                                ring = &cpr2->cp_ring_struct;
                                bnxt_free_ring(bp, &ring->ring_mem);
+                               bnxt_free_cp_arrays(cpr2);
                                kfree(cpr2);
                                cpr->cp_ring_arr[j] = NULL;
                        }
@@ -3221,6 +3313,12 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
        if (!cpr)
                return NULL;
 
+       rc = bnxt_alloc_cp_arrays(cpr, bp->cp_nr_pages);
+       if (rc) {
+               bnxt_free_cp_arrays(cpr);
+               kfree(cpr);
+               return NULL;
+       }
        ring = &cpr->cp_ring_struct;
        rmem = &ring->ring_mem;
        rmem->nr_pages = bp->cp_nr_pages;
@@ -3231,6 +3329,7 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
        rc = bnxt_alloc_ring(bp, rmem);
        if (rc) {
                bnxt_free_ring(bp, rmem);
+               bnxt_free_cp_arrays(cpr);
                kfree(cpr);
                cpr = NULL;
        }
@@ -3663,9 +3762,15 @@ void bnxt_set_ring_params(struct bnxt *bp)
                if (jumbo_factor > agg_factor)
                        agg_factor = jumbo_factor;
        }
-       agg_ring_size = ring_size * agg_factor;
+       if (agg_factor) {
+               if (ring_size > BNXT_MAX_RX_DESC_CNT_JUM_ENA) {
+                       ring_size = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
+                       netdev_warn(bp->dev, "RX ring size reduced from %d to %d because the jumbo ring is now enabled\n",
+                                   bp->rx_ring_size, ring_size);
+                       bp->rx_ring_size = ring_size;
+               }
+               agg_ring_size = ring_size * agg_factor;
 
-       if (agg_ring_size) {
                bp->rx_agg_nr_pages = bnxt_calc_nr_ring_pages(agg_ring_size,
                                                        RX_DESC_CNT);
                if (bp->rx_agg_nr_pages > MAX_RX_AGG_PAGES) {
@@ -3855,77 +3960,26 @@ out:
 
 static void bnxt_free_hwrm_resources(struct bnxt *bp)
 {
-       struct pci_dev *pdev = bp->pdev;
+       struct bnxt_hwrm_wait_token *token;
 
-       if (bp->hwrm_cmd_resp_addr) {
-               dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr,
-                                 bp->hwrm_cmd_resp_dma_addr);
-               bp->hwrm_cmd_resp_addr = NULL;
-       }
-
-       if (bp->hwrm_cmd_kong_resp_addr) {
-               dma_free_coherent(&pdev->dev, PAGE_SIZE,
-                                 bp->hwrm_cmd_kong_resp_addr,
-                                 bp->hwrm_cmd_kong_resp_dma_addr);
-               bp->hwrm_cmd_kong_resp_addr = NULL;
-       }
-}
-
-static int bnxt_alloc_kong_hwrm_resources(struct bnxt *bp)
-{
-       struct pci_dev *pdev = bp->pdev;
-
-       if (bp->hwrm_cmd_kong_resp_addr)
-               return 0;
+       dma_pool_destroy(bp->hwrm_dma_pool);
+       bp->hwrm_dma_pool = NULL;
 
-       bp->hwrm_cmd_kong_resp_addr =
-               dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
-                                  &bp->hwrm_cmd_kong_resp_dma_addr,
-                                  GFP_KERNEL);
-       if (!bp->hwrm_cmd_kong_resp_addr)
-               return -ENOMEM;
-
-       return 0;
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(token, &bp->hwrm_pending_list, node)
+               WRITE_ONCE(token->state, BNXT_HWRM_CANCELLED);
+       rcu_read_unlock();
 }
 
 static int bnxt_alloc_hwrm_resources(struct bnxt *bp)
 {
-       struct pci_dev *pdev = bp->pdev;
-
-       bp->hwrm_cmd_resp_addr = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
-                                                  &bp->hwrm_cmd_resp_dma_addr,
-                                                  GFP_KERNEL);
-       if (!bp->hwrm_cmd_resp_addr)
+       bp->hwrm_dma_pool = dma_pool_create("bnxt_hwrm", &bp->pdev->dev,
+                                           BNXT_HWRM_DMA_SIZE,
+                                           BNXT_HWRM_DMA_ALIGN, 0);
+       if (!bp->hwrm_dma_pool)
                return -ENOMEM;
 
-       return 0;
-}
-
-static void bnxt_free_hwrm_short_cmd_req(struct bnxt *bp)
-{
-       if (bp->hwrm_short_cmd_req_addr) {
-               struct pci_dev *pdev = bp->pdev;
-
-               dma_free_coherent(&pdev->dev, bp->hwrm_max_ext_req_len,
-                                 bp->hwrm_short_cmd_req_addr,
-                                 bp->hwrm_short_cmd_req_dma_addr);
-               bp->hwrm_short_cmd_req_addr = NULL;
-       }
-}
-
-static int bnxt_alloc_hwrm_short_cmd_req(struct bnxt *bp)
-{
-       struct pci_dev *pdev = bp->pdev;
-
-       if (bp->hwrm_short_cmd_req_addr)
-               return 0;
-
-       bp->hwrm_short_cmd_req_addr =
-               dma_alloc_coherent(&pdev->dev, bp->hwrm_max_ext_req_len,
-                                  &bp->hwrm_short_cmd_req_dma_addr,
-                                  GFP_KERNEL);
-       if (!bp->hwrm_short_cmd_req_addr)
-               return -ENOMEM;
+       INIT_HLIST_HEAD(&bp->hwrm_pending_list);
 
        return 0;
 }
@@ -3986,8 +4040,8 @@ static void bnxt_copy_hw_masks(u64 *mask_arr, __le64 *hw_mask_arr, int count)
 static int bnxt_hwrm_func_qstat_ext(struct bnxt *bp,
                                    struct bnxt_stats_mem *stats)
 {
-       struct hwrm_func_qstats_ext_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_func_qstats_ext_input req = {0};
+       struct hwrm_func_qstats_ext_output *resp;
+       struct hwrm_func_qstats_ext_input *req;
        __le64 *hw_masks;
        int rc;
 
@@ -3995,19 +4049,20 @@ static int bnxt_hwrm_func_qstat_ext(struct bnxt *bp,
            !(bp->flags & BNXT_FLAG_CHIP_P5))
                return -EOPNOTSUPP;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QSTATS_EXT, -1, -1);
-       req.fid = cpu_to_le16(0xffff);
-       req.flags = FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK;
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_QSTATS_EXT);
        if (rc)
-               goto qstat_exit;
+               return rc;
 
-       hw_masks = &resp->rx_ucast_pkts;
-       bnxt_copy_hw_masks(stats->hw_masks, hw_masks, stats->len / 8);
+       req->fid = cpu_to_le16(0xffff);
+       req->flags = FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK;
 
-qstat_exit:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
+       if (!rc) {
+               hw_masks = &resp->rx_ucast_pkts;
+               bnxt_copy_hw_masks(stats->hw_masks, hw_masks, stats->len / 8);
+       }
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -4266,6 +4321,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init)
        bnxt_free_tx_rings(bp);
        bnxt_free_rx_rings(bp);
        bnxt_free_cp_rings(bp);
+       bnxt_free_all_cp_arrays(bp);
        bnxt_free_ntp_fltrs(bp, irq_re_init);
        if (irq_re_init) {
                bnxt_free_ring_stats(bp);
@@ -4386,6 +4442,10 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
                        goto alloc_mem_err;
        }
 
+       rc = bnxt_alloc_all_cp_arrays(bp);
+       if (rc)
+               goto alloc_mem_err;
+
        bnxt_init_ring_struct(bp);
 
        rc = bnxt_alloc_rx_rings(bp);
@@ -4468,313 +4528,38 @@ static void bnxt_enable_int(struct bnxt *bp)
        }
 }
 
-void bnxt_hwrm_cmd_hdr_init(struct bnxt *bp, void *request, u16 req_type,
-                           u16 cmpl_ring, u16 target_id)
-{
-       struct input *req = request;
-
-       req->req_type = cpu_to_le16(req_type);
-       req->cmpl_ring = cpu_to_le16(cmpl_ring);
-       req->target_id = cpu_to_le16(target_id);
-       if (bnxt_kong_hwrm_message(bp, req))
-               req->resp_addr = cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr);
-       else
-               req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);
-}
-
-static int bnxt_hwrm_to_stderr(u32 hwrm_err)
-{
-       switch (hwrm_err) {
-       case HWRM_ERR_CODE_SUCCESS:
-               return 0;
-       case HWRM_ERR_CODE_RESOURCE_LOCKED:
-               return -EROFS;
-       case HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED:
-               return -EACCES;
-       case HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR:
-               return -ENOSPC;
-       case HWRM_ERR_CODE_INVALID_PARAMS:
-       case HWRM_ERR_CODE_INVALID_FLAGS:
-       case HWRM_ERR_CODE_INVALID_ENABLES:
-       case HWRM_ERR_CODE_UNSUPPORTED_TLV:
-       case HWRM_ERR_CODE_UNSUPPORTED_OPTION_ERR:
-               return -EINVAL;
-       case HWRM_ERR_CODE_NO_BUFFER:
-               return -ENOMEM;
-       case HWRM_ERR_CODE_HOT_RESET_PROGRESS:
-       case HWRM_ERR_CODE_BUSY:
-               return -EAGAIN;
-       case HWRM_ERR_CODE_CMD_NOT_SUPPORTED:
-               return -EOPNOTSUPP;
-       default:
-               return -EIO;
-       }
-}
-
-static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
-                                int timeout, bool silent)
-{
-       int i, intr_process, rc, tmo_count;
-       struct input *req = msg;
-       u32 *data = msg;
-       u8 *valid;
-       u16 cp_ring_id, len = 0;
-       struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr;
-       u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN;
-       struct hwrm_short_input short_input = {0};
-       u32 doorbell_offset = BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER;
-       u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM;
-       u16 dst = BNXT_HWRM_CHNL_CHIMP;
-
-       if (BNXT_NO_FW_ACCESS(bp) &&
-           le16_to_cpu(req->req_type) != HWRM_FUNC_RESET)
-               return -EBUSY;
-
-       if (msg_len > BNXT_HWRM_MAX_REQ_LEN) {
-               if (msg_len > bp->hwrm_max_ext_req_len ||
-                   !bp->hwrm_short_cmd_req_addr)
-                       return -EINVAL;
-       }
-
-       if (bnxt_hwrm_kong_chnl(bp, req)) {
-               dst = BNXT_HWRM_CHNL_KONG;
-               bar_offset = BNXT_GRCPF_REG_KONG_COMM;
-               doorbell_offset = BNXT_GRCPF_REG_KONG_COMM_TRIGGER;
-               resp = bp->hwrm_cmd_kong_resp_addr;
-       }
-
-       memset(resp, 0, PAGE_SIZE);
-       cp_ring_id = le16_to_cpu(req->cmpl_ring);
-       intr_process = (cp_ring_id == INVALID_HW_RING_ID) ? 0 : 1;
-
-       req->seq_id = cpu_to_le16(bnxt_get_hwrm_seq_id(bp, dst));
-       /* currently supports only one outstanding message */
-       if (intr_process)
-               bp->hwrm_intr_seq_id = le16_to_cpu(req->seq_id);
-
-       if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
-           msg_len > BNXT_HWRM_MAX_REQ_LEN) {
-               void *short_cmd_req = bp->hwrm_short_cmd_req_addr;
-               u16 max_msg_len;
-
-               /* Set boundary for maximum extended request length for short
-                * cmd format. If passed up from device use the max supported
-                * internal req length.
-                */
-               max_msg_len = bp->hwrm_max_ext_req_len;
-
-               memcpy(short_cmd_req, req, msg_len);
-               if (msg_len < max_msg_len)
-                       memset(short_cmd_req + msg_len, 0,
-                              max_msg_len - msg_len);
-
-               short_input.req_type = req->req_type;
-               short_input.signature =
-                               cpu_to_le16(SHORT_REQ_SIGNATURE_SHORT_CMD);
-               short_input.size = cpu_to_le16(msg_len);
-               short_input.req_addr =
-                       cpu_to_le64(bp->hwrm_short_cmd_req_dma_addr);
-
-               data = (u32 *)&short_input;
-               msg_len = sizeof(short_input);
-
-               /* Sync memory write before updating doorbell */
-               wmb();
-
-               max_req_len = BNXT_HWRM_SHORT_REQ_LEN;
-       }
-
-       /* Write request msg to hwrm channel */
-       __iowrite32_copy(bp->bar0 + bar_offset, data, msg_len / 4);
-
-       for (i = msg_len; i < max_req_len; i += 4)
-               writel(0, bp->bar0 + bar_offset + i);
-
-       /* Ring channel doorbell */
-       writel(1, bp->bar0 + doorbell_offset);
-
-       if (!pci_is_enabled(bp->pdev))
-               return -ENODEV;
-
-       if (!timeout)
-               timeout = DFLT_HWRM_CMD_TIMEOUT;
-       /* Limit timeout to an upper limit */
-       timeout = min(timeout, HWRM_CMD_MAX_TIMEOUT);
-       /* convert timeout to usec */
-       timeout *= 1000;
-
-       i = 0;
-       /* Short timeout for the first few iterations:
-        * number of loops = number of loops for short timeout +
-        * number of loops for standard timeout.
-        */
-       tmo_count = HWRM_SHORT_TIMEOUT_COUNTER;
-       timeout = timeout - HWRM_SHORT_MIN_TIMEOUT * HWRM_SHORT_TIMEOUT_COUNTER;
-       tmo_count += DIV_ROUND_UP(timeout, HWRM_MIN_TIMEOUT);
-
-       if (intr_process) {
-               u16 seq_id = bp->hwrm_intr_seq_id;
-
-               /* Wait until hwrm response cmpl interrupt is processed */
-               while (bp->hwrm_intr_seq_id != (u16)~seq_id &&
-                      i++ < tmo_count) {
-                       /* Abort the wait for completion if the FW health
-                        * check has failed.
-                        */
-                       if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
-                               return -EBUSY;
-                       /* on first few passes, just barely sleep */
-                       if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
-                               usleep_range(HWRM_SHORT_MIN_TIMEOUT,
-                                            HWRM_SHORT_MAX_TIMEOUT);
-                       } else {
-                               if (HWRM_WAIT_MUST_ABORT(bp, req))
-                                       break;
-                               usleep_range(HWRM_MIN_TIMEOUT,
-                                            HWRM_MAX_TIMEOUT);
-                       }
-               }
-
-               if (bp->hwrm_intr_seq_id != (u16)~seq_id) {
-                       if (!silent)
-                               netdev_err(bp->dev, "Resp cmpl intr err msg: 0x%x\n",
-                                          le16_to_cpu(req->req_type));
-                       return -EBUSY;
-               }
-               len = le16_to_cpu(resp->resp_len);
-               valid = ((u8 *)resp) + len - 1;
-       } else {
-               int j;
-
-               /* Check if response len is updated */
-               for (i = 0; i < tmo_count; i++) {
-                       /* Abort the wait for completion if the FW health
-                        * check has failed.
-                        */
-                       if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
-                               return -EBUSY;
-                       len = le16_to_cpu(resp->resp_len);
-                       if (len)
-                               break;
-                       /* on first few passes, just barely sleep */
-                       if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
-                               usleep_range(HWRM_SHORT_MIN_TIMEOUT,
-                                            HWRM_SHORT_MAX_TIMEOUT);
-                       } else {
-                               if (HWRM_WAIT_MUST_ABORT(bp, req))
-                                       goto timeout_abort;
-                               usleep_range(HWRM_MIN_TIMEOUT,
-                                            HWRM_MAX_TIMEOUT);
-                       }
-               }
-
-               if (i >= tmo_count) {
-timeout_abort:
-                       if (!silent)
-                               netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d\n",
-                                          HWRM_TOTAL_TIMEOUT(i),
-                                          le16_to_cpu(req->req_type),
-                                          le16_to_cpu(req->seq_id), len);
-                       return -EBUSY;
-               }
-
-               /* Last byte of resp contains valid bit */
-               valid = ((u8 *)resp) + len - 1;
-               for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) {
-                       /* make sure we read from updated DMA memory */
-                       dma_rmb();
-                       if (*valid)
-                               break;
-                       usleep_range(1, 5);
-               }
-
-               if (j >= HWRM_VALID_BIT_DELAY_USEC) {
-                       if (!silent)
-                               netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d v:%d\n",
-                                          HWRM_TOTAL_TIMEOUT(i),
-                                          le16_to_cpu(req->req_type),
-                                          le16_to_cpu(req->seq_id), len,
-                                          *valid);
-                       return -EBUSY;
-               }
-       }
-
-       /* Zero valid bit for compatibility.  Valid bit in an older spec
-        * may become a new field in a newer spec.  We must make sure that
-        * a new field not implemented by old spec will read zero.
-        */
-       *valid = 0;
-       rc = le16_to_cpu(resp->error_code);
-       if (rc && !silent)
-               netdev_err(bp->dev, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n",
-                          le16_to_cpu(resp->req_type),
-                          le16_to_cpu(resp->seq_id), rc);
-       return bnxt_hwrm_to_stderr(rc);
-}
-
-int _hwrm_send_message(struct bnxt *bp, void *msg, u32 msg_len, int timeout)
-{
-       return bnxt_hwrm_do_send_msg(bp, msg, msg_len, timeout, false);
-}
-
-int _hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 msg_len,
-                             int timeout)
-{
-       return bnxt_hwrm_do_send_msg(bp, msg, msg_len, timeout, true);
-}
-
-int hwrm_send_message(struct bnxt *bp, void *msg, u32 msg_len, int timeout)
-{
-       int rc;
-
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, msg, msg_len, timeout);
-       mutex_unlock(&bp->hwrm_cmd_lock);
-       return rc;
-}
-
-int hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 msg_len,
-                            int timeout)
-{
-       int rc;
-
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = bnxt_hwrm_do_send_msg(bp, msg, msg_len, timeout, true);
-       mutex_unlock(&bp->hwrm_cmd_lock);
-       return rc;
-}
-
 int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
                            bool async_only)
 {
-       struct hwrm_func_drv_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_func_drv_rgtr_input req = {0};
        DECLARE_BITMAP(async_events_bmap, 256);
        u32 *events = (u32 *)async_events_bmap;
+       struct hwrm_func_drv_rgtr_output *resp;
+       struct hwrm_func_drv_rgtr_input *req;
        u32 flags;
        int rc, i;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_DRV_RGTR, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_DRV_RGTR);
+       if (rc)
+               return rc;
 
-       req.enables =
-               cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE |
-                           FUNC_DRV_RGTR_REQ_ENABLES_VER |
-                           FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);
+       req->enables = cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE |
+                                  FUNC_DRV_RGTR_REQ_ENABLES_VER |
+                                  FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);
 
-       req.os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
+       req->os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
        flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE;
        if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET)
                flags |= FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
        if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
                flags |= FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT |
                         FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT;
-       req.flags = cpu_to_le32(flags);
-       req.ver_maj_8b = DRV_VER_MAJ;
-       req.ver_min_8b = DRV_VER_MIN;
-       req.ver_upd_8b = DRV_VER_UPD;
-       req.ver_maj = cpu_to_le16(DRV_VER_MAJ);
-       req.ver_min = cpu_to_le16(DRV_VER_MIN);
-       req.ver_upd = cpu_to_le16(DRV_VER_UPD);
+       req->flags = cpu_to_le32(flags);
+       req->ver_maj_8b = DRV_VER_MAJ;
+       req->ver_min_8b = DRV_VER_MIN;
+       req->ver_upd_8b = DRV_VER_UPD;
+       req->ver_maj = cpu_to_le16(DRV_VER_MAJ);
+       req->ver_min = cpu_to_le16(DRV_VER_MIN);
+       req->ver_upd = cpu_to_le16(DRV_VER_UPD);
 
        if (BNXT_PF(bp)) {
                u32 data[8];
@@ -4791,14 +4576,14 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
                }
 
                for (i = 0; i < 8; i++)
-                       req.vf_req_fwd[i] = cpu_to_le32(data[i]);
+                       req->vf_req_fwd[i] = cpu_to_le32(data[i]);
 
-               req.enables |=
+               req->enables |=
                        cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_VF_REQ_FWD);
        }
 
        if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
-               req.flags |= cpu_to_le32(
+               req->flags |= cpu_to_le32(
                        FUNC_DRV_RGTR_REQ_FLAGS_FLOW_HANDLE_64BIT_MODE);
 
        memset(async_events_bmap, 0, sizeof(async_events_bmap));
@@ -4817,57 +4602,63 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
                }
        }
        for (i = 0; i < 8; i++)
-               req.async_event_fwd[i] |= cpu_to_le32(events[i]);
+               req->async_event_fwd[i] |= cpu_to_le32(events[i]);
 
        if (async_only)
-               req.enables =
+               req->enables =
                        cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc) {
                set_bit(BNXT_STATE_DRV_REGISTERED, &bp->state);
                if (resp->flags &
                    cpu_to_le32(FUNC_DRV_RGTR_RESP_FLAGS_IF_CHANGE_SUPPORTED))
                        bp->fw_cap |= BNXT_FW_CAP_IF_CHANGE;
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp)
 {
-       struct hwrm_func_drv_unrgtr_input req = {0};
+       struct hwrm_func_drv_unrgtr_input *req;
+       int rc;
 
        if (!test_and_clear_bit(BNXT_STATE_DRV_REGISTERED, &bp->state))
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_DRV_UNRGTR, -1, -1);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_DRV_UNRGTR);
+       if (rc)
+               return rc;
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_hwrm_tunnel_dst_port_free(struct bnxt *bp, u8 tunnel_type)
 {
-       u32 rc = 0;
-       struct hwrm_tunnel_dst_port_free_input req = {0};
+       struct hwrm_tunnel_dst_port_free_input *req;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TUNNEL_DST_PORT_FREE, -1, -1);
-       req.tunnel_type = tunnel_type;
+       rc = hwrm_req_init(bp, req, HWRM_TUNNEL_DST_PORT_FREE);
+       if (rc)
+               return rc;
+
+       req->tunnel_type = tunnel_type;
 
        switch (tunnel_type) {
        case TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN:
-               req.tunnel_dst_port_id = cpu_to_le16(bp->vxlan_fw_dst_port_id);
+               req->tunnel_dst_port_id = cpu_to_le16(bp->vxlan_fw_dst_port_id);
                bp->vxlan_fw_dst_port_id = INVALID_HW_RING_ID;
                break;
        case TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE:
-               req.tunnel_dst_port_id = cpu_to_le16(bp->nge_fw_dst_port_id);
+               req->tunnel_dst_port_id = cpu_to_le16(bp->nge_fw_dst_port_id);
                bp->nge_fw_dst_port_id = INVALID_HW_RING_ID;
                break;
        default:
                break;
        }
 
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_send(bp, req);
        if (rc)
                netdev_err(bp->dev, "hwrm_tunnel_dst_port_free failed. rc:%d\n",
                           rc);
@@ -4877,17 +4668,19 @@ static int bnxt_hwrm_tunnel_dst_port_free(struct bnxt *bp, u8 tunnel_type)
 static int bnxt_hwrm_tunnel_dst_port_alloc(struct bnxt *bp, __be16 port,
                                           u8 tunnel_type)
 {
-       u32 rc = 0;
-       struct hwrm_tunnel_dst_port_alloc_input req = {0};
-       struct hwrm_tunnel_dst_port_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_tunnel_dst_port_alloc_output *resp;
+       struct hwrm_tunnel_dst_port_alloc_input *req;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TUNNEL_DST_PORT_ALLOC, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_TUNNEL_DST_PORT_ALLOC);
+       if (rc)
+               return rc;
 
-       req.tunnel_type = tunnel_type;
-       req.tunnel_dst_port_val = port;
+       req->tunnel_type = tunnel_type;
+       req->tunnel_dst_port_val = port;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc) {
                netdev_err(bp->dev, "hwrm_tunnel_dst_port_alloc failed. rc:%d\n",
                           rc);
@@ -4907,33 +4700,40 @@ static int bnxt_hwrm_tunnel_dst_port_alloc(struct bnxt *bp, __be16 port,
        }
 
 err_out:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, u16 vnic_id)
 {
-       struct hwrm_cfa_l2_set_rx_mask_input req = {0};
+       struct hwrm_cfa_l2_set_rx_mask_input *req;
        struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_L2_SET_RX_MASK, -1, -1);
-       req.vnic_id = cpu_to_le32(vnic->fw_vnic_id);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_L2_SET_RX_MASK);
+       if (rc)
+               return rc;
 
-       req.num_mc_entries = cpu_to_le32(vnic->mc_list_count);
-       req.mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping);
-       req.mask = cpu_to_le32(vnic->rx_mask);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->vnic_id = cpu_to_le32(vnic->fw_vnic_id);
+       req->num_mc_entries = cpu_to_le32(vnic->mc_list_count);
+       req->mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping);
+       req->mask = cpu_to_le32(vnic->rx_mask);
+       return hwrm_req_send_silent(bp, req);
 }
 
 #ifdef CONFIG_RFS_ACCEL
 static int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp,
                                            struct bnxt_ntuple_filter *fltr)
 {
-       struct hwrm_cfa_ntuple_filter_free_input req = {0};
+       struct hwrm_cfa_ntuple_filter_free_input *req;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_NTUPLE_FILTER_FREE, -1, -1);
-       req.ntuple_filter_id = fltr->filter_id;
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_NTUPLE_FILTER_FREE);
+       if (rc)
+               return rc;
+
+       req->ntuple_filter_id = fltr->filter_id;
+       return hwrm_req_send(bp, req);
 }
 
 #define BNXT_NTP_FLTR_FLAGS                                    \
@@ -4958,69 +4758,70 @@ static int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp,
 static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
                                             struct bnxt_ntuple_filter *fltr)
 {
-       struct hwrm_cfa_ntuple_filter_alloc_input req = {0};
        struct hwrm_cfa_ntuple_filter_alloc_output *resp;
+       struct hwrm_cfa_ntuple_filter_alloc_input *req;
        struct flow_keys *keys = &fltr->fkeys;
        struct bnxt_vnic_info *vnic;
        u32 flags = 0;
-       int rc = 0;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_NTUPLE_FILTER_ALLOC, -1, -1);
-       req.l2_filter_id = bp->vnic_info[0].fw_l2_filter_id[fltr->l2_fltr_idx];
+       rc = hwrm_req_init(bp, req, HWRM_CFA_NTUPLE_FILTER_ALLOC);
+       if (rc)
+               return rc;
+
+       req->l2_filter_id = bp->vnic_info[0].fw_l2_filter_id[fltr->l2_fltr_idx];
 
        if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2) {
                flags = CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DEST_RFS_RING_IDX;
-               req.dst_id = cpu_to_le16(fltr->rxq);
+               req->dst_id = cpu_to_le16(fltr->rxq);
        } else {
                vnic = &bp->vnic_info[fltr->rxq + 1];
-               req.dst_id = cpu_to_le16(vnic->fw_vnic_id);
+               req->dst_id = cpu_to_le16(vnic->fw_vnic_id);
        }
-       req.flags = cpu_to_le32(flags);
-       req.enables = cpu_to_le32(BNXT_NTP_FLTR_FLAGS);
+       req->flags = cpu_to_le32(flags);
+       req->enables = cpu_to_le32(BNXT_NTP_FLTR_FLAGS);
 
-       req.ethertype = htons(ETH_P_IP);
-       memcpy(req.src_macaddr, fltr->src_mac_addr, ETH_ALEN);
-       req.ip_addr_type = CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
-       req.ip_protocol = keys->basic.ip_proto;
+       req->ethertype = htons(ETH_P_IP);
+       memcpy(req->src_macaddr, fltr->src_mac_addr, ETH_ALEN);
+       req->ip_addr_type = CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
+       req->ip_protocol = keys->basic.ip_proto;
 
        if (keys->basic.n_proto == htons(ETH_P_IPV6)) {
                int i;
 
-               req.ethertype = htons(ETH_P_IPV6);
-               req.ip_addr_type =
+               req->ethertype = htons(ETH_P_IPV6);
+               req->ip_addr_type =
                        CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6;
-               *(struct in6_addr *)&req.src_ipaddr[0] =
+               *(struct in6_addr *)&req->src_ipaddr[0] =
                        keys->addrs.v6addrs.src;
-               *(struct in6_addr *)&req.dst_ipaddr[0] =
+               *(struct in6_addr *)&req->dst_ipaddr[0] =
                        keys->addrs.v6addrs.dst;
                for (i = 0; i < 4; i++) {
-                       req.src_ipaddr_mask[i] = cpu_to_be32(0xffffffff);
-                       req.dst_ipaddr_mask[i] = cpu_to_be32(0xffffffff);
+                       req->src_ipaddr_mask[i] = cpu_to_be32(0xffffffff);
+                       req->dst_ipaddr_mask[i] = cpu_to_be32(0xffffffff);
                }
        } else {
-               req.src_ipaddr[0] = keys->addrs.v4addrs.src;
-               req.src_ipaddr_mask[0] = cpu_to_be32(0xffffffff);
-               req.dst_ipaddr[0] = keys->addrs.v4addrs.dst;
-               req.dst_ipaddr_mask[0] = cpu_to_be32(0xffffffff);
+               req->src_ipaddr[0] = keys->addrs.v4addrs.src;
+               req->src_ipaddr_mask[0] = cpu_to_be32(0xffffffff);
+               req->dst_ipaddr[0] = keys->addrs.v4addrs.dst;
+               req->dst_ipaddr_mask[0] = cpu_to_be32(0xffffffff);
        }
        if (keys->control.flags & FLOW_DIS_ENCAPSULATION) {
-               req.enables |= cpu_to_le32(BNXT_NTP_TUNNEL_FLTR_FLAG);
-               req.tunnel_type =
+               req->enables |= cpu_to_le32(BNXT_NTP_TUNNEL_FLTR_FLAG);
+               req->tunnel_type =
                        CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL;
        }
 
-       req.src_port = keys->ports.src;
-       req.src_port_mask = cpu_to_be16(0xffff);
-       req.dst_port = keys->ports.dst;
-       req.dst_port_mask = cpu_to_be16(0xffff);
+       req->src_port = keys->ports.src;
+       req->src_port_mask = cpu_to_be16(0xffff);
+       req->dst_port = keys->ports.dst;
+       req->dst_port_mask = cpu_to_be16(0xffff);
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-       if (!rc) {
-               resp = bnxt_get_hwrm_resp_addr(bp, &req);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
+       if (!rc)
                fltr->filter_id = resp->ntuple_filter_id;
-       }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 #endif
@@ -5028,62 +4829,62 @@ static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
 static int bnxt_hwrm_set_vnic_filter(struct bnxt *bp, u16 vnic_id, u16 idx,
                                     u8 *mac_addr)
 {
-       u32 rc = 0;
-       struct hwrm_cfa_l2_filter_alloc_input req = {0};
-       struct hwrm_cfa_l2_filter_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_cfa_l2_filter_alloc_output *resp;
+       struct hwrm_cfa_l2_filter_alloc_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_ALLOC);
+       if (rc)
+               return rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_L2_FILTER_ALLOC, -1, -1);
-       req.flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX);
+       req->flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX);
        if (!BNXT_CHIP_TYPE_NITRO_A0(bp))
-               req.flags |=
+               req->flags |=
                        cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST);
-       req.dst_id = cpu_to_le16(bp->vnic_info[vnic_id].fw_vnic_id);
-       req.enables =
+       req->dst_id = cpu_to_le16(bp->vnic_info[vnic_id].fw_vnic_id);
+       req->enables =
                cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR |
                            CFA_L2_FILTER_ALLOC_REQ_ENABLES_DST_ID |
                            CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR_MASK);
-       memcpy(req.l2_addr, mac_addr, ETH_ALEN);
-       req.l2_addr_mask[0] = 0xff;
-       req.l2_addr_mask[1] = 0xff;
-       req.l2_addr_mask[2] = 0xff;
-       req.l2_addr_mask[3] = 0xff;
-       req.l2_addr_mask[4] = 0xff;
-       req.l2_addr_mask[5] = 0xff;
-
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       memcpy(req->l2_addr, mac_addr, ETH_ALEN);
+       req->l2_addr_mask[0] = 0xff;
+       req->l2_addr_mask[1] = 0xff;
+       req->l2_addr_mask[2] = 0xff;
+       req->l2_addr_mask[3] = 0xff;
+       req->l2_addr_mask[4] = 0xff;
+       req->l2_addr_mask[5] = 0xff;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc)
                bp->vnic_info[vnic_id].fw_l2_filter_id[idx] =
                                                        resp->l2_filter_id;
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static int bnxt_hwrm_clear_vnic_filter(struct bnxt *bp)
 {
+       struct hwrm_cfa_l2_filter_free_input *req;
        u16 i, j, num_of_vnics = 1; /* only vnic 0 supported */
-       int rc = 0;
+       int rc;
 
        /* Any associated ntuple filters will also be cleared by firmware. */
-       mutex_lock(&bp->hwrm_cmd_lock);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_FREE);
+       if (rc)
+               return rc;
+       hwrm_req_hold(bp, req);
        for (i = 0; i < num_of_vnics; i++) {
                struct bnxt_vnic_info *vnic = &bp->vnic_info[i];
 
                for (j = 0; j < vnic->uc_filter_count; j++) {
-                       struct hwrm_cfa_l2_filter_free_input req = {0};
+                       req->l2_filter_id = vnic->fw_l2_filter_id[j];
 
-                       bnxt_hwrm_cmd_hdr_init(bp, &req,
-                                              HWRM_CFA_L2_FILTER_FREE, -1, -1);
-
-                       req.l2_filter_id = vnic->fw_l2_filter_id[j];
-
-                       rc = _hwrm_send_message(bp, &req, sizeof(req),
-                                               HWRM_CMD_TIMEOUT);
+                       rc = hwrm_req_send(bp, req);
                }
                vnic->uc_filter_count = 0;
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
-
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -5091,12 +4892,15 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
 {
        struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
        u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX;
-       struct hwrm_vnic_tpa_cfg_input req = {0};
+       struct hwrm_vnic_tpa_cfg_input *req;
+       int rc;
 
        if (vnic->fw_vnic_id == INVALID_HW_RING_ID)
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_TPA_CFG, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_VNIC_TPA_CFG);
+       if (rc)
+               return rc;
 
        if (tpa_flags) {
                u16 mss = bp->dev->mtu - 40;
@@ -5110,9 +4914,9 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
                if (tpa_flags & BNXT_FLAG_GRO)
                        flags |= VNIC_TPA_CFG_REQ_FLAGS_GRO;
 
-               req.flags = cpu_to_le32(flags);
+               req->flags = cpu_to_le32(flags);
 
-               req.enables =
+               req->enables =
                        cpu_to_le32(VNIC_TPA_CFG_REQ_ENABLES_MAX_AGG_SEGS |
                                    VNIC_TPA_CFG_REQ_ENABLES_MAX_AGGS |
                                    VNIC_TPA_CFG_REQ_ENABLES_MIN_AGG_LEN);
@@ -5136,14 +4940,14 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
                } else {
                        segs = ilog2(nsegs);
                }
-               req.max_agg_segs = cpu_to_le16(segs);
-               req.max_aggs = cpu_to_le16(max_aggs);
+               req->max_agg_segs = cpu_to_le16(segs);
+               req->max_aggs = cpu_to_le16(max_aggs);
 
-               req.min_agg_len = cpu_to_le32(512);
+               req->min_agg_len = cpu_to_le32(512);
        }
-       req.vnic_id = cpu_to_le16(vnic->fw_vnic_id);
+       req->vnic_id = cpu_to_le16(vnic->fw_vnic_id);
 
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
 static u16 bnxt_cp_ring_from_grp(struct bnxt *bp, struct bnxt_ring_struct *ring)
@@ -5287,86 +5091,102 @@ static void bnxt_fill_hw_rss_tbl(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss)
 {
        struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
-       struct hwrm_vnic_rss_cfg_input req = {0};
+       struct hwrm_vnic_rss_cfg_input *req;
+       int rc;
 
        if ((bp->flags & BNXT_FLAG_CHIP_P5) ||
            vnic->fw_rss_cos_lb_ctx[0] == INVALID_HW_RING_ID)
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_VNIC_RSS_CFG);
+       if (rc)
+               return rc;
+
        if (set_rss) {
                bnxt_fill_hw_rss_tbl(bp, vnic);
-               req.hash_type = cpu_to_le32(bp->rss_hash_cfg);
-               req.hash_mode_flags = VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_DEFAULT;
-               req.ring_grp_tbl_addr = cpu_to_le64(vnic->rss_table_dma_addr);
-               req.hash_key_tbl_addr =
+               req->hash_type = cpu_to_le32(bp->rss_hash_cfg);
+               req->hash_mode_flags = VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_DEFAULT;
+               req->ring_grp_tbl_addr = cpu_to_le64(vnic->rss_table_dma_addr);
+               req->hash_key_tbl_addr =
                        cpu_to_le64(vnic->rss_hash_key_dma_addr);
        }
-       req.rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_hwrm_vnic_set_rss_p5(struct bnxt *bp, u16 vnic_id, bool set_rss)
 {
        struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
-       struct hwrm_vnic_rss_cfg_input req = {0};
+       struct hwrm_vnic_rss_cfg_input *req;
        dma_addr_t ring_tbl_map;
        u32 i, nr_ctxs;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_VNIC_RSS_CFG);
+       if (rc)
+               return rc;
+
+       req->vnic_id = cpu_to_le16(vnic->fw_vnic_id);
+       if (!set_rss)
+               return hwrm_req_send(bp, req);
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1);
-       req.vnic_id = cpu_to_le16(vnic->fw_vnic_id);
-       if (!set_rss) {
-               hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-               return 0;
-       }
        bnxt_fill_hw_rss_tbl(bp, vnic);
-       req.hash_type = cpu_to_le32(bp->rss_hash_cfg);
-       req.hash_mode_flags = VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_DEFAULT;
-       req.hash_key_tbl_addr = cpu_to_le64(vnic->rss_hash_key_dma_addr);
+       req->hash_type = cpu_to_le32(bp->rss_hash_cfg);
+       req->hash_mode_flags = VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_DEFAULT;
+       req->hash_key_tbl_addr = cpu_to_le64(vnic->rss_hash_key_dma_addr);
        ring_tbl_map = vnic->rss_table_dma_addr;
        nr_ctxs = bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings);
-       for (i = 0; i < nr_ctxs; ring_tbl_map += BNXT_RSS_TABLE_SIZE_P5, i++) {
-               int rc;
 
-               req.ring_grp_tbl_addr = cpu_to_le64(ring_tbl_map);
-               req.ring_table_pair_index = i;
-               req.rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[i]);
-               rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       hwrm_req_hold(bp, req);
+       for (i = 0; i < nr_ctxs; ring_tbl_map += BNXT_RSS_TABLE_SIZE_P5, i++) {
+               req->ring_grp_tbl_addr = cpu_to_le64(ring_tbl_map);
+               req->ring_table_pair_index = i;
+               req->rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[i]);
+               rc = hwrm_req_send(bp, req);
                if (rc)
-                       return rc;
+                       goto exit;
        }
-       return 0;
+
+exit:
+       hwrm_req_drop(bp, req);
+       return rc;
 }
 
 static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id)
 {
        struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
-       struct hwrm_vnic_plcmodes_cfg_input req = {0};
+       struct hwrm_vnic_plcmodes_cfg_input *req;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_PLCMODES_CFG, -1, -1);
-       req.flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT |
-                               VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 |
-                               VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6);
-       req.enables =
+       rc = hwrm_req_init(bp, req, HWRM_VNIC_PLCMODES_CFG);
+       if (rc)
+               return rc;
+
+       req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT |
+                                VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 |
+                                VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6);
+       req->enables =
                cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID |
                            VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID);
        /* thresholds not implemented in firmware yet */
-       req.jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh);
-       req.hds_threshold = cpu_to_le16(bp->rx_copy_thresh);
-       req.vnic_id = cpu_to_le32(vnic->fw_vnic_id);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh);
+       req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh);
+       req->vnic_id = cpu_to_le32(vnic->fw_vnic_id);
+       return hwrm_req_send(bp, req);
 }
 
 static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, u16 vnic_id,
                                        u16 ctx_idx)
 {
-       struct hwrm_vnic_rss_cos_lb_ctx_free_input req = {0};
+       struct hwrm_vnic_rss_cos_lb_ctx_free_input *req;
+
+       if (hwrm_req_init(bp, req, HWRM_VNIC_RSS_COS_LB_CTX_FREE))
+               return;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_COS_LB_CTX_FREE, -1, -1);
-       req.rss_cos_lb_ctx_id =
+       req->rss_cos_lb_ctx_id =
                cpu_to_le16(bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx]);
 
-       hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       hwrm_req_send(bp, req);
        bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID;
 }
 
@@ -5387,20 +5207,20 @@ static void bnxt_hwrm_vnic_ctx_free(struct bnxt *bp)
 
 static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id, u16 ctx_idx)
 {
+       struct hwrm_vnic_rss_cos_lb_ctx_alloc_output *resp;
+       struct hwrm_vnic_rss_cos_lb_ctx_alloc_input *req;
        int rc;
-       struct hwrm_vnic_rss_cos_lb_ctx_alloc_input req = {0};
-       struct hwrm_vnic_rss_cos_lb_ctx_alloc_output *resp =
-                                               bp->hwrm_cmd_resp_addr;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_COS_LB_CTX_ALLOC, -1,
-                              -1);
+       rc = hwrm_req_init(bp, req, HWRM_VNIC_RSS_COS_LB_CTX_ALLOC);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc)
                bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] =
                        le16_to_cpu(resp->rss_cos_lb_ctx_id);
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
 
        return rc;
 }
@@ -5414,47 +5234,50 @@ static u32 bnxt_get_roce_vnic_mode(struct bnxt *bp)
 
 int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
 {
-       unsigned int ring = 0, grp_idx;
        struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
-       struct hwrm_vnic_cfg_input req = {0};
+       struct hwrm_vnic_cfg_input *req;
+       unsigned int ring = 0, grp_idx;
        u16 def_vlan = 0;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_CFG, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_VNIC_CFG);
+       if (rc)
+               return rc;
 
        if (bp->flags & BNXT_FLAG_CHIP_P5) {
                struct bnxt_rx_ring_info *rxr = &bp->rx_ring[0];
 
-               req.default_rx_ring_id =
+               req->default_rx_ring_id =
                        cpu_to_le16(rxr->rx_ring_struct.fw_ring_id);
-               req.default_cmpl_ring_id =
+               req->default_cmpl_ring_id =
                        cpu_to_le16(bnxt_cp_ring_for_rx(bp, rxr));
-               req.enables =
+               req->enables =
                        cpu_to_le32(VNIC_CFG_REQ_ENABLES_DEFAULT_RX_RING_ID |
                                    VNIC_CFG_REQ_ENABLES_DEFAULT_CMPL_RING_ID);
                goto vnic_mru;
        }
-       req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP);
+       req->enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP);
        /* Only RSS support for now TBD: COS & LB */
        if (vnic->fw_rss_cos_lb_ctx[0] != INVALID_HW_RING_ID) {
-               req.rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]);
-               req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE |
+               req->rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]);
+               req->enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE |
                                           VNIC_CFG_REQ_ENABLES_MRU);
        } else if (vnic->flags & BNXT_VNIC_RFS_NEW_RSS_FLAG) {
-               req.rss_rule =
+               req->rss_rule =
                        cpu_to_le16(bp->vnic_info[0].fw_rss_cos_lb_ctx[0]);
-               req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE |
+               req->enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE |
                                           VNIC_CFG_REQ_ENABLES_MRU);
-               req.flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE);
+               req->flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE);
        } else {
-               req.rss_rule = cpu_to_le16(0xffff);
+               req->rss_rule = cpu_to_le16(0xffff);
        }
 
        if (BNXT_CHIP_TYPE_NITRO_A0(bp) &&
            (vnic->fw_rss_cos_lb_ctx[0] != INVALID_HW_RING_ID)) {
-               req.cos_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[1]);
-               req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_COS_RULE);
+               req->cos_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[1]);
+               req->enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_COS_RULE);
        } else {
-               req.cos_rule = cpu_to_le16(0xffff);
+               req->cos_rule = cpu_to_le16(0xffff);
        }
 
        if (vnic->flags & BNXT_VNIC_RSS_FLAG)
@@ -5465,34 +5288,36 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
                ring = bp->rx_nr_rings - 1;
 
        grp_idx = bp->rx_ring[ring].bnapi->index;
-       req.dflt_ring_grp = cpu_to_le16(bp->grp_info[grp_idx].fw_grp_id);
-       req.lb_rule = cpu_to_le16(0xffff);
+       req->dflt_ring_grp = cpu_to_le16(bp->grp_info[grp_idx].fw_grp_id);
+       req->lb_rule = cpu_to_le16(0xffff);
 vnic_mru:
-       req.mru = cpu_to_le16(bp->dev->mtu + ETH_HLEN + VLAN_HLEN);
+       req->mru = cpu_to_le16(bp->dev->mtu + ETH_HLEN + VLAN_HLEN);
 
-       req.vnic_id = cpu_to_le16(vnic->fw_vnic_id);
+       req->vnic_id = cpu_to_le16(vnic->fw_vnic_id);
 #ifdef CONFIG_BNXT_SRIOV
        if (BNXT_VF(bp))
                def_vlan = bp->vf.vlan;
 #endif
        if ((bp->flags & BNXT_FLAG_STRIP_VLAN) || def_vlan)
-               req.flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE);
+               req->flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE);
        if (!vnic_id && bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP))
-               req.flags |= cpu_to_le32(bnxt_get_roce_vnic_mode(bp));
+               req->flags |= cpu_to_le32(bnxt_get_roce_vnic_mode(bp));
 
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
 static void bnxt_hwrm_vnic_free_one(struct bnxt *bp, u16 vnic_id)
 {
        if (bp->vnic_info[vnic_id].fw_vnic_id != INVALID_HW_RING_ID) {
-               struct hwrm_vnic_free_input req = {0};
+               struct hwrm_vnic_free_input *req;
 
-               bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_FREE, -1, -1);
-               req.vnic_id =
+               if (hwrm_req_init(bp, req, HWRM_VNIC_FREE))
+                       return;
+
+               req->vnic_id =
                        cpu_to_le32(bp->vnic_info[vnic_id].fw_vnic_id);
 
-               hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               hwrm_req_send(bp, req);
                bp->vnic_info[vnic_id].fw_vnic_id = INVALID_HW_RING_ID;
        }
 }
@@ -5509,11 +5334,15 @@ static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id,
                                unsigned int start_rx_ring_idx,
                                unsigned int nr_rings)
 {
-       int rc = 0;
        unsigned int i, j, grp_idx, end_idx = start_rx_ring_idx + nr_rings;
-       struct hwrm_vnic_alloc_input req = {0};
-       struct hwrm_vnic_alloc_output *resp = bp->hwrm_cmd_resp_addr;
        struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
+       struct hwrm_vnic_alloc_output *resp;
+       struct hwrm_vnic_alloc_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_VNIC_ALLOC);
+       if (rc)
+               return rc;
 
        if (bp->flags & BNXT_FLAG_CHIP_P5)
                goto vnic_no_ring_grps;
@@ -5533,22 +5362,20 @@ vnic_no_ring_grps:
        for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++)
                vnic->fw_rss_cos_lb_ctx[i] = INVALID_HW_RING_ID;
        if (vnic_id == 0)
-               req.flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT);
+               req->flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT);
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_ALLOC, -1, -1);
-
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc)
                vnic->fw_vnic_id = le32_to_cpu(resp->vnic_id);
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
 {
-       struct hwrm_vnic_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_vnic_qcaps_input req = {0};
+       struct hwrm_vnic_qcaps_output *resp;
+       struct hwrm_vnic_qcaps_input *req;
        int rc;
 
        bp->hw_ring_stats_size = sizeof(struct ctx_hw_stats);
@@ -5556,9 +5383,12 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
        if (bp->hwrm_spec_code < 0x10600)
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_QCAPS, -1, -1);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_VNIC_QCAPS);
+       if (rc)
+               return rc;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc) {
                u32 flags = le32_to_cpu(resp->flags);
 
@@ -5584,92 +5414,96 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
                                bp->hw_ring_stats_size = BNXT_RING_STATS_SIZE_P5_SR2;
                }
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static int bnxt_hwrm_ring_grp_alloc(struct bnxt *bp)
 {
+       struct hwrm_ring_grp_alloc_output *resp;
+       struct hwrm_ring_grp_alloc_input *req;
+       int rc;
        u16 i;
-       u32 rc = 0;
 
        if (bp->flags & BNXT_FLAG_CHIP_P5)
                return 0;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
+       rc = hwrm_req_init(bp, req, HWRM_RING_GRP_ALLOC);
+       if (rc)
+               return rc;
+
+       resp = hwrm_req_hold(bp, req);
        for (i = 0; i < bp->rx_nr_rings; i++) {
-               struct hwrm_ring_grp_alloc_input req = {0};
-               struct hwrm_ring_grp_alloc_output *resp =
-                                       bp->hwrm_cmd_resp_addr;
                unsigned int grp_idx = bp->rx_ring[i].bnapi->index;
 
-               bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_GRP_ALLOC, -1, -1);
+               req->cr = cpu_to_le16(bp->grp_info[grp_idx].cp_fw_ring_id);
+               req->rr = cpu_to_le16(bp->grp_info[grp_idx].rx_fw_ring_id);
+               req->ar = cpu_to_le16(bp->grp_info[grp_idx].agg_fw_ring_id);
+               req->sc = cpu_to_le16(bp->grp_info[grp_idx].fw_stats_ctx);
 
-               req.cr = cpu_to_le16(bp->grp_info[grp_idx].cp_fw_ring_id);
-               req.rr = cpu_to_le16(bp->grp_info[grp_idx].rx_fw_ring_id);
-               req.ar = cpu_to_le16(bp->grp_info[grp_idx].agg_fw_ring_id);
-               req.sc = cpu_to_le16(bp->grp_info[grp_idx].fw_stats_ctx);
+               rc = hwrm_req_send(bp, req);
 
-               rc = _hwrm_send_message(bp, &req, sizeof(req),
-                                       HWRM_CMD_TIMEOUT);
                if (rc)
                        break;
 
                bp->grp_info[grp_idx].fw_grp_id =
                        le32_to_cpu(resp->ring_group_id);
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static void bnxt_hwrm_ring_grp_free(struct bnxt *bp)
 {
+       struct hwrm_ring_grp_free_input *req;
        u16 i;
-       struct hwrm_ring_grp_free_input req = {0};
 
        if (!bp->grp_info || (bp->flags & BNXT_FLAG_CHIP_P5))
                return;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_GRP_FREE, -1, -1);
+       if (hwrm_req_init(bp, req, HWRM_RING_GRP_FREE))
+               return;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
+       hwrm_req_hold(bp, req);
        for (i = 0; i < bp->cp_nr_rings; i++) {
                if (bp->grp_info[i].fw_grp_id == INVALID_HW_RING_ID)
                        continue;
-               req.ring_group_id =
+               req->ring_group_id =
                        cpu_to_le32(bp->grp_info[i].fw_grp_id);
 
-               _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               hwrm_req_send(bp, req);
                bp->grp_info[i].fw_grp_id = INVALID_HW_RING_ID;
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
 }
 
 static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
                                    struct bnxt_ring_struct *ring,
                                    u32 ring_type, u32 map_index)
 {
-       int rc = 0, err = 0;
-       struct hwrm_ring_alloc_input req = {0};
-       struct hwrm_ring_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_ring_alloc_output *resp;
+       struct hwrm_ring_alloc_input *req;
        struct bnxt_ring_mem_info *rmem = &ring->ring_mem;
        struct bnxt_ring_grp_info *grp_info;
+       int rc, err = 0;
        u16 ring_id;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_ALLOC, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_RING_ALLOC);
+       if (rc)
+               goto exit;
 
-       req.enables = 0;
+       req->enables = 0;
        if (rmem->nr_pages > 1) {
-               req.page_tbl_addr = cpu_to_le64(rmem->pg_tbl_map);
+               req->page_tbl_addr = cpu_to_le64(rmem->pg_tbl_map);
                /* Page size is in log2 units */
-               req.page_size = BNXT_PAGE_SHIFT;
-               req.page_tbl_depth = 1;
+               req->page_size = BNXT_PAGE_SHIFT;
+               req->page_tbl_depth = 1;
        } else {
-               req.page_tbl_addr =  cpu_to_le64(rmem->dma_arr[0]);
+               req->page_tbl_addr =  cpu_to_le64(rmem->dma_arr[0]);
        }
-       req.fbo = 0;
+       req->fbo = 0;
        /* Association of ring index with doorbell index and MSIX number */
-       req.logical_id = cpu_to_le16(map_index);
+       req->logical_id = cpu_to_le16(map_index);
 
        switch (ring_type) {
        case HWRM_RING_ALLOC_TX: {
@@ -5677,67 +5511,67 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
 
                txr = container_of(ring, struct bnxt_tx_ring_info,
                                   tx_ring_struct);
-               req.ring_type = RING_ALLOC_REQ_RING_TYPE_TX;
+               req->ring_type = RING_ALLOC_REQ_RING_TYPE_TX;
                /* Association of transmit ring with completion ring */
                grp_info = &bp->grp_info[ring->grp_idx];
-               req.cmpl_ring_id = cpu_to_le16(bnxt_cp_ring_for_tx(bp, txr));
-               req.length = cpu_to_le32(bp->tx_ring_mask + 1);
-               req.stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
-               req.queue_id = cpu_to_le16(ring->queue_id);
+               req->cmpl_ring_id = cpu_to_le16(bnxt_cp_ring_for_tx(bp, txr));
+               req->length = cpu_to_le32(bp->tx_ring_mask + 1);
+               req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
+               req->queue_id = cpu_to_le16(ring->queue_id);
                break;
        }
        case HWRM_RING_ALLOC_RX:
-               req.ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
-               req.length = cpu_to_le32(bp->rx_ring_mask + 1);
+               req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
+               req->length = cpu_to_le32(bp->rx_ring_mask + 1);
                if (bp->flags & BNXT_FLAG_CHIP_P5) {
                        u16 flags = 0;
 
                        /* Association of rx ring with stats context */
                        grp_info = &bp->grp_info[ring->grp_idx];
-                       req.rx_buf_size = cpu_to_le16(bp->rx_buf_use_size);
-                       req.stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
-                       req.enables |= cpu_to_le32(
+                       req->rx_buf_size = cpu_to_le16(bp->rx_buf_use_size);
+                       req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
+                       req->enables |= cpu_to_le32(
                                RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID);
                        if (NET_IP_ALIGN == 2)
                                flags = RING_ALLOC_REQ_FLAGS_RX_SOP_PAD;
-                       req.flags = cpu_to_le16(flags);
+                       req->flags = cpu_to_le16(flags);
                }
                break;
        case HWRM_RING_ALLOC_AGG:
                if (bp->flags & BNXT_FLAG_CHIP_P5) {
-                       req.ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG;
+                       req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG;
                        /* Association of agg ring with rx ring */
                        grp_info = &bp->grp_info[ring->grp_idx];
-                       req.rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id);
-                       req.rx_buf_size = cpu_to_le16(BNXT_RX_PAGE_SIZE);
-                       req.stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
-                       req.enables |= cpu_to_le32(
+                       req->rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id);
+                       req->rx_buf_size = cpu_to_le16(BNXT_RX_PAGE_SIZE);
+                       req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx);
+                       req->enables |= cpu_to_le32(
                                RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID |
                                RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID);
                } else {
-                       req.ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
+                       req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
                }
-               req.length = cpu_to_le32(bp->rx_agg_ring_mask + 1);
+               req->length = cpu_to_le32(bp->rx_agg_ring_mask + 1);
                break;
        case HWRM_RING_ALLOC_CMPL:
-               req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
-               req.length = cpu_to_le32(bp->cp_ring_mask + 1);
+               req->ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
+               req->length = cpu_to_le32(bp->cp_ring_mask + 1);
                if (bp->flags & BNXT_FLAG_CHIP_P5) {
                        /* Association of cp ring with nq */
                        grp_info = &bp->grp_info[map_index];
-                       req.nq_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id);
-                       req.cq_handle = cpu_to_le64(ring->handle);
-                       req.enables |= cpu_to_le32(
+                       req->nq_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id);
+                       req->cq_handle = cpu_to_le64(ring->handle);
+                       req->enables |= cpu_to_le32(
                                RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID);
                } else if (bp->flags & BNXT_FLAG_USING_MSIX) {
-                       req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+                       req->int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
                }
                break;
        case HWRM_RING_ALLOC_NQ:
-               req.ring_type = RING_ALLOC_REQ_RING_TYPE_NQ;
-               req.length = cpu_to_le32(bp->cp_ring_mask + 1);
+               req->ring_type = RING_ALLOC_REQ_RING_TYPE_NQ;
+               req->length = cpu_to_le32(bp->cp_ring_mask + 1);
                if (bp->flags & BNXT_FLAG_USING_MSIX)
-                       req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+                       req->int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
                break;
        default:
                netdev_err(bp->dev, "hwrm alloc invalid ring type %d\n",
@@ -5745,12 +5579,13 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
                return -1;
        }
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        err = le16_to_cpu(resp->error_code);
        ring_id = le16_to_cpu(resp->ring_id);
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
 
+exit:
        if (rc || err) {
                netdev_err(bp->dev, "hwrm_ring_alloc type %d failed. rc:%x err:%x\n",
                           ring_type, rc, err);
@@ -5765,23 +5600,28 @@ static int bnxt_hwrm_set_async_event_cr(struct bnxt *bp, int idx)
        int rc;
 
        if (BNXT_PF(bp)) {
-               struct hwrm_func_cfg_input req = {0};
+               struct hwrm_func_cfg_input *req;
+
+               rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+               if (rc)
+                       return rc;
 
-               bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-               req.fid = cpu_to_le16(0xffff);
-               req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_ASYNC_EVENT_CR);
-               req.async_event_cr = cpu_to_le16(idx);
-               rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               req->fid = cpu_to_le16(0xffff);
+               req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_ASYNC_EVENT_CR);
+               req->async_event_cr = cpu_to_le16(idx);
+               return hwrm_req_send(bp, req);
        } else {
-               struct hwrm_func_vf_cfg_input req = {0};
+               struct hwrm_func_vf_cfg_input *req;
 
-               bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
-               req.enables =
+               rc = hwrm_req_init(bp, req, HWRM_FUNC_VF_CFG);
+               if (rc)
+                       return rc;
+
+               req->enables =
                        cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_ASYNC_EVENT_CR);
-               req.async_event_cr = cpu_to_le16(idx);
-               rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               req->async_event_cr = cpu_to_le16(idx);
+               return hwrm_req_send(bp, req);
        }
-       return rc;
 }
 
 static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type,
@@ -5952,23 +5792,27 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp,
                                   struct bnxt_ring_struct *ring,
                                   u32 ring_type, int cmpl_ring_id)
 {
+       struct hwrm_ring_free_output *resp;
+       struct hwrm_ring_free_input *req;
+       u16 error_code = 0;
        int rc;
-       struct hwrm_ring_free_input req = {0};
-       struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr;
-       u16 error_code;
 
        if (BNXT_NO_FW_ACCESS(bp))
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, cmpl_ring_id, -1);
-       req.ring_type = ring_type;
-       req.ring_id = cpu_to_le16(ring->fw_ring_id);
+       rc = hwrm_req_init(bp, req, HWRM_RING_FREE);
+       if (rc)
+               goto exit;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-       error_code = le16_to_cpu(resp->error_code);
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       req->cmpl_ring = cpu_to_le16(cmpl_ring_id);
+       req->ring_type = ring_type;
+       req->ring_id = cpu_to_le16(ring->fw_ring_id);
 
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
+       error_code = le16_to_cpu(resp->error_code);
+       hwrm_req_drop(bp, req);
+exit:
        if (rc || error_code) {
                netdev_err(bp->dev, "hwrm_ring_free type %d failed. rc:%x err:%x\n",
                           ring_type, rc, error_code);
@@ -6083,20 +5927,23 @@ static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
 
 static int bnxt_hwrm_get_rings(struct bnxt *bp)
 {
-       struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
        struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
-       struct hwrm_func_qcfg_input req = {0};
+       struct hwrm_func_qcfg_output *resp;
+       struct hwrm_func_qcfg_input *req;
        int rc;
 
        if (bp->hwrm_spec_code < 0x10601)
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
-       req.fid = cpu_to_le16(0xffff);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_QCFG);
+       if (rc)
+               return rc;
+
+       req->fid = cpu_to_le16(0xffff);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc) {
-               mutex_unlock(&bp->hwrm_cmd_lock);
+               hwrm_req_drop(bp, req);
                return rc;
        }
 
@@ -6130,39 +5977,45 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
                hw_resc->resv_cp_rings = cp;
                hw_resc->resv_stat_ctxs = stats;
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return 0;
 }
 
-/* Caller must hold bp->hwrm_cmd_lock */
 int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings)
 {
-       struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_func_qcfg_input req = {0};
+       struct hwrm_func_qcfg_output *resp;
+       struct hwrm_func_qcfg_input *req;
        int rc;
 
        if (bp->hwrm_spec_code < 0x10601)
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
-       req.fid = cpu_to_le16(fid);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_QCFG);
+       if (rc)
+               return rc;
+
+       req->fid = cpu_to_le16(fid);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc)
                *tx_rings = le16_to_cpu(resp->alloc_tx_rings);
 
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static bool bnxt_rfs_supported(struct bnxt *bp);
 
-static void
-__bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
-                            int tx_rings, int rx_rings, int ring_grps,
-                            int cp_rings, int stats, int vnics)
+static struct hwrm_func_cfg_input *
+__bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
+                            int ring_grps, int cp_rings, int stats, int vnics)
 {
+       struct hwrm_func_cfg_input *req;
        u32 enables = 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_CFG, -1, -1);
+       if (hwrm_req_init(bp, req, HWRM_FUNC_CFG))
+               return NULL;
+
        req->fid = cpu_to_le16(0xffff);
        enables |= tx_rings ? FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
        req->num_tx_rings = cpu_to_le16(tx_rings);
@@ -6203,17 +6056,19 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, struct hwrm_func_cfg_input *req,
                req->num_vnics = cpu_to_le16(vnics);
        }
        req->enables = cpu_to_le32(enables);
+       return req;
 }
 
-static void
-__bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
-                            struct hwrm_func_vf_cfg_input *req, int tx_rings,
-                            int rx_rings, int ring_grps, int cp_rings,
-                            int stats, int vnics)
+static struct hwrm_func_vf_cfg_input *
+__bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
+                            int ring_grps, int cp_rings, int stats, int vnics)
 {
+       struct hwrm_func_vf_cfg_input *req;
        u32 enables = 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, req, HWRM_FUNC_VF_CFG, -1, -1);
+       if (hwrm_req_init(bp, req, HWRM_FUNC_VF_CFG))
+               return NULL;
+
        enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
        enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
                              FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
@@ -6245,21 +6100,27 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp,
        req->num_vnics = cpu_to_le16(vnics);
 
        req->enables = cpu_to_le32(enables);
+       return req;
 }
 
 static int
 bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
                           int ring_grps, int cp_rings, int stats, int vnics)
 {
-       struct hwrm_func_cfg_input req = {0};
+       struct hwrm_func_cfg_input *req;
        int rc;
 
-       __bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
-                                    cp_rings, stats, vnics);
-       if (!req.enables)
+       req = __bnxt_hwrm_reserve_pf_rings(bp, tx_rings, rx_rings, ring_grps,
+                                          cp_rings, stats, vnics);
+       if (!req)
+               return -ENOMEM;
+
+       if (!req->enables) {
+               hwrm_req_drop(bp, req);
                return 0;
+       }
 
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_send(bp, req);
        if (rc)
                return rc;
 
@@ -6273,7 +6134,7 @@ static int
 bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
                           int ring_grps, int cp_rings, int stats, int vnics)
 {
-       struct hwrm_func_vf_cfg_input req = {0};
+       struct hwrm_func_vf_cfg_input *req;
        int rc;
 
        if (!BNXT_NEW_RM(bp)) {
@@ -6281,9 +6142,12 @@ bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
                return 0;
        }
 
-       __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
-                                    cp_rings, stats, vnics);
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req = __bnxt_hwrm_reserve_vf_rings(bp, tx_rings, rx_rings, ring_grps,
+                                          cp_rings, stats, vnics);
+       if (!req)
+               return -ENOMEM;
+
+       rc = hwrm_req_send(bp, req);
        if (rc)
                return rc;
 
@@ -6484,14 +6348,14 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
                                    int ring_grps, int cp_rings, int stats,
                                    int vnics)
 {
-       struct hwrm_func_vf_cfg_input req = {0};
+       struct hwrm_func_vf_cfg_input *req;
        u32 flags;
 
        if (!BNXT_NEW_RM(bp))
                return 0;
 
-       __bnxt_hwrm_reserve_vf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
-                                    cp_rings, stats, vnics);
+       req = __bnxt_hwrm_reserve_vf_rings(bp, tx_rings, rx_rings, ring_grps,
+                                          cp_rings, stats, vnics);
        flags = FUNC_VF_CFG_REQ_FLAGS_TX_ASSETS_TEST |
                FUNC_VF_CFG_REQ_FLAGS_RX_ASSETS_TEST |
                FUNC_VF_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
@@ -6501,20 +6365,19 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
        if (!(bp->flags & BNXT_FLAG_CHIP_P5))
                flags |= FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST;
 
-       req.flags = cpu_to_le32(flags);
-       return hwrm_send_message_silent(bp, &req, sizeof(req),
-                                       HWRM_CMD_TIMEOUT);
+       req->flags = cpu_to_le32(flags);
+       return hwrm_req_send_silent(bp, req);
 }
 
 static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
                                    int ring_grps, int cp_rings, int stats,
                                    int vnics)
 {
-       struct hwrm_func_cfg_input req = {0};
+       struct hwrm_func_cfg_input *req;
        u32 flags;
 
-       __bnxt_hwrm_reserve_pf_rings(bp, &req, tx_rings, rx_rings, ring_grps,
-                                    cp_rings, stats, vnics);
+       req = __bnxt_hwrm_reserve_pf_rings(bp, tx_rings, rx_rings, ring_grps,
+                                          cp_rings, stats, vnics);
        flags = FUNC_CFG_REQ_FLAGS_TX_ASSETS_TEST;
        if (BNXT_NEW_RM(bp)) {
                flags |= FUNC_CFG_REQ_FLAGS_RX_ASSETS_TEST |
@@ -6528,9 +6391,8 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
                        flags |= FUNC_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST;
        }
 
-       req.flags = cpu_to_le32(flags);
-       return hwrm_send_message_silent(bp, &req, sizeof(req),
-                                       HWRM_CMD_TIMEOUT);
+       req->flags = cpu_to_le32(flags);
+       return hwrm_req_send_silent(bp, req);
 }
 
 static int bnxt_hwrm_check_rings(struct bnxt *bp, int tx_rings, int rx_rings,
@@ -6551,9 +6413,9 @@ static int bnxt_hwrm_check_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 
 static void bnxt_hwrm_coal_params_qcaps(struct bnxt *bp)
 {
-       struct hwrm_ring_aggint_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
        struct bnxt_coal_cap *coal_cap = &bp->coal_cap;
-       struct hwrm_ring_aggint_qcaps_input req = {0};
+       struct hwrm_ring_aggint_qcaps_output *resp;
+       struct hwrm_ring_aggint_qcaps_input *req;
        int rc;
 
        coal_cap->cmpl_params = BNXT_LEGACY_COAL_CMPL_PARAMS;
@@ -6569,9 +6431,11 @@ static void bnxt_hwrm_coal_params_qcaps(struct bnxt *bp)
        if (bp->hwrm_spec_code < 0x10902)
                return;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_AGGINT_QCAPS, -1, -1);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       if (hwrm_req_init(bp, req, HWRM_RING_AGGINT_QCAPS))
+               return;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send_silent(bp, req);
        if (!rc) {
                coal_cap->cmpl_params = le32_to_cpu(resp->cmpl_params);
                coal_cap->nq_params = le32_to_cpu(resp->nq_params);
@@ -6591,7 +6455,7 @@ static void bnxt_hwrm_coal_params_qcaps(struct bnxt *bp)
                        le16_to_cpu(resp->num_cmpl_aggr_int_max);
                coal_cap->timer_units = le16_to_cpu(resp->timer_units);
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
 }
 
 static u16 bnxt_usec_to_coal_tmr(struct bnxt *bp, u16 usec)
@@ -6659,37 +6523,40 @@ static void bnxt_hwrm_set_coal_params(struct bnxt *bp,
        req->enables |= cpu_to_le16(BNXT_COAL_CMPL_ENABLES);
 }
 
-/* Caller holds bp->hwrm_cmd_lock */
 static int __bnxt_hwrm_set_coal_nq(struct bnxt *bp, struct bnxt_napi *bnapi,
                                   struct bnxt_coal *hw_coal)
 {
-       struct hwrm_ring_cmpl_ring_cfg_aggint_params_input req = {0};
+       struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req;
        struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
        struct bnxt_coal_cap *coal_cap = &bp->coal_cap;
        u32 nq_params = coal_cap->nq_params;
        u16 tmr;
+       int rc;
 
        if (!(nq_params & RING_AGGINT_QCAPS_RESP_NQ_PARAMS_INT_LAT_TMR_MIN))
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS,
-                              -1, -1);
-       req.ring_id = cpu_to_le16(cpr->cp_ring_struct.fw_ring_id);
-       req.flags =
+       rc = hwrm_req_init(bp, req, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS);
+       if (rc)
+               return rc;
+
+       req->ring_id = cpu_to_le16(cpr->cp_ring_struct.fw_ring_id);
+       req->flags =
                cpu_to_le16(RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_IS_NQ);
 
        tmr = bnxt_usec_to_coal_tmr(bp, hw_coal->coal_ticks) / 2;
        tmr = clamp_t(u16, tmr, 1, coal_cap->int_lat_tmr_min_max);
-       req.int_lat_tmr_min = cpu_to_le16(tmr);
-       req.enables |= cpu_to_le16(BNXT_COAL_CMPL_MIN_TMR_ENABLE);
-       return _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->int_lat_tmr_min = cpu_to_le16(tmr);
+       req->enables |= cpu_to_le16(BNXT_COAL_CMPL_MIN_TMR_ENABLE);
+       return hwrm_req_send(bp, req);
 }
 
 int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi)
 {
-       struct hwrm_ring_cmpl_ring_cfg_aggint_params_input req_rx = {0};
+       struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req_rx;
        struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
        struct bnxt_coal coal;
+       int rc;
 
        /* Tick values in micro seconds.
         * 1 coal_buf x bufs_per_record = 1 completion record.
@@ -6702,48 +6569,53 @@ int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi)
        if (!bnapi->rx_ring)
                return -ENODEV;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req_rx,
-                              HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1);
+       rc = hwrm_req_init(bp, req_rx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS);
+       if (rc)
+               return rc;
 
-       bnxt_hwrm_set_coal_params(bp, &coal, &req_rx);
+       bnxt_hwrm_set_coal_params(bp, &coal, req_rx);
 
-       req_rx.ring_id = cpu_to_le16(bnxt_cp_ring_for_rx(bp, bnapi->rx_ring));
+       req_rx->ring_id = cpu_to_le16(bnxt_cp_ring_for_rx(bp, bnapi->rx_ring));
 
-       return hwrm_send_message(bp, &req_rx, sizeof(req_rx),
-                                HWRM_CMD_TIMEOUT);
+       return hwrm_req_send(bp, req_rx);
 }
 
 int bnxt_hwrm_set_coal(struct bnxt *bp)
 {
-       int i, rc = 0;
-       struct hwrm_ring_cmpl_ring_cfg_aggint_params_input req_rx = {0},
-                                                          req_tx = {0}, *req;
+       struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req_rx, *req_tx,
+                                                          *req;
+       int i, rc;
+
+       rc = hwrm_req_init(bp, req_rx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS);
+       if (rc)
+               return rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req_rx,
-                              HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1);
-       bnxt_hwrm_cmd_hdr_init(bp, &req_tx,
-                              HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1);
+       rc = hwrm_req_init(bp, req_tx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS);
+       if (rc) {
+               hwrm_req_drop(bp, req_rx);
+               return rc;
+       }
 
-       bnxt_hwrm_set_coal_params(bp, &bp->rx_coal, &req_rx);
-       bnxt_hwrm_set_coal_params(bp, &bp->tx_coal, &req_tx);
+       bnxt_hwrm_set_coal_params(bp, &bp->rx_coal, req_rx);
+       bnxt_hwrm_set_coal_params(bp, &bp->tx_coal, req_tx);
 
-       mutex_lock(&bp->hwrm_cmd_lock);
+       hwrm_req_hold(bp, req_rx);
+       hwrm_req_hold(bp, req_tx);
        for (i = 0; i < bp->cp_nr_rings; i++) {
                struct bnxt_napi *bnapi = bp->bnapi[i];
                struct bnxt_coal *hw_coal;
                u16 ring_id;
 
-               req = &req_rx;
+               req = req_rx;
                if (!bnapi->rx_ring) {
                        ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring);
-                       req = &req_tx;
+                       req = req_tx;
                } else {
                        ring_id = bnxt_cp_ring_for_rx(bp, bnapi->rx_ring);
                }
                req->ring_id = cpu_to_le16(ring_id);
 
-               rc = _hwrm_send_message(bp, req, sizeof(*req),
-                                       HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send(bp, req);
                if (rc)
                        break;
 
@@ -6751,11 +6623,10 @@ int bnxt_hwrm_set_coal(struct bnxt *bp)
                        continue;
 
                if (bnapi->rx_ring && bnapi->tx_ring) {
-                       req = &req_tx;
+                       req = req_tx;
                        ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring);
                        req->ring_id = cpu_to_le16(ring_id);
-                       rc = _hwrm_send_message(bp, req, sizeof(*req),
-                                               HWRM_CMD_TIMEOUT);
+                       rc = hwrm_req_send(bp, req);
                        if (rc)
                                break;
                }
@@ -6765,14 +6636,15 @@ int bnxt_hwrm_set_coal(struct bnxt *bp)
                        hw_coal = &bp->tx_coal;
                __bnxt_hwrm_set_coal_nq(bp, bnapi, hw_coal);
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req_rx);
+       hwrm_req_drop(bp, req_tx);
        return rc;
 }
 
 static void bnxt_hwrm_stat_ctx_free(struct bnxt *bp)
 {
-       struct hwrm_stat_ctx_clr_stats_input req0 = {0};
-       struct hwrm_stat_ctx_free_input req = {0};
+       struct hwrm_stat_ctx_clr_stats_input *req0 = NULL;
+       struct hwrm_stat_ctx_free_input *req;
        int i;
 
        if (!bp->bnapi)
@@ -6781,53 +6653,60 @@ static void bnxt_hwrm_stat_ctx_free(struct bnxt *bp)
        if (BNXT_CHIP_TYPE_NITRO_A0(bp))
                return;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req0, HWRM_STAT_CTX_CLR_STATS, -1, -1);
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_FREE, -1, -1);
-
-       mutex_lock(&bp->hwrm_cmd_lock);
+       if (hwrm_req_init(bp, req, HWRM_STAT_CTX_FREE))
+               return;
+       if (BNXT_FW_MAJ(bp) <= 20) {
+               if (hwrm_req_init(bp, req0, HWRM_STAT_CTX_CLR_STATS)) {
+                       hwrm_req_drop(bp, req);
+                       return;
+               }
+               hwrm_req_hold(bp, req0);
+       }
+       hwrm_req_hold(bp, req);
        for (i = 0; i < bp->cp_nr_rings; i++) {
                struct bnxt_napi *bnapi = bp->bnapi[i];
                struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 
                if (cpr->hw_stats_ctx_id != INVALID_STATS_CTX_ID) {
-                       req.stat_ctx_id = cpu_to_le32(cpr->hw_stats_ctx_id);
-                       if (BNXT_FW_MAJ(bp) <= 20) {
-                               req0.stat_ctx_id = req.stat_ctx_id;
-                               _hwrm_send_message(bp, &req0, sizeof(req0),
-                                                  HWRM_CMD_TIMEOUT);
+                       req->stat_ctx_id = cpu_to_le32(cpr->hw_stats_ctx_id);
+                       if (req0) {
+                               req0->stat_ctx_id = req->stat_ctx_id;
+                               hwrm_req_send(bp, req0);
                        }
-                       _hwrm_send_message(bp, &req, sizeof(req),
-                                          HWRM_CMD_TIMEOUT);
+                       hwrm_req_send(bp, req);
 
                        cpr->hw_stats_ctx_id = INVALID_STATS_CTX_ID;
                }
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
+       if (req0)
+               hwrm_req_drop(bp, req0);
 }
 
 static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp)
 {
-       int rc = 0, i;
-       struct hwrm_stat_ctx_alloc_input req = {0};
-       struct hwrm_stat_ctx_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_stat_ctx_alloc_output *resp;
+       struct hwrm_stat_ctx_alloc_input *req;
+       int rc, i;
 
        if (BNXT_CHIP_TYPE_NITRO_A0(bp))
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_STAT_CTX_ALLOC);
+       if (rc)
+               return rc;
 
-       req.stats_dma_length = cpu_to_le16(bp->hw_ring_stats_size);
-       req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
+       req->stats_dma_length = cpu_to_le16(bp->hw_ring_stats_size);
+       req->update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
 
-       mutex_lock(&bp->hwrm_cmd_lock);
+       resp = hwrm_req_hold(bp, req);
        for (i = 0; i < bp->cp_nr_rings; i++) {
                struct bnxt_napi *bnapi = bp->bnapi[i];
                struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 
-               req.stats_dma_addr = cpu_to_le64(cpr->stats.hw_stats_map);
+               req->stats_dma_addr = cpu_to_le64(cpr->stats.hw_stats_map);
 
-               rc = _hwrm_send_message(bp, &req, sizeof(req),
-                                       HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send(bp, req);
                if (rc)
                        break;
 
@@ -6835,22 +6714,25 @@ static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp)
 
                bp->grp_info[i].fw_stats_ctx = cpr->hw_stats_ctx_id;
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 {
-       struct hwrm_func_qcfg_input req = {0};
-       struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_func_qcfg_output *resp;
+       struct hwrm_func_qcfg_input *req;
        u32 min_db_offset = 0;
        u16 flags;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
-       req.fid = cpu_to_le16(0xffff);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_QCFG);
+       if (rc)
+               return rc;
+
+       req->fid = cpu_to_le16(0xffff);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc)
                goto func_qcfg_exit;
 
@@ -6910,7 +6792,7 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
                bp->db_size = pci_resource_len(bp->pdev, 2);
 
 func_qcfg_exit:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -6949,17 +6831,19 @@ static void bnxt_init_ctx_initializer(struct bnxt_ctx_mem_info *ctx,
 
 static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
 {
-       struct hwrm_func_backing_store_qcaps_input req = {0};
-       struct hwrm_func_backing_store_qcaps_output *resp =
-               bp->hwrm_cmd_resp_addr;
+       struct hwrm_func_backing_store_qcaps_output *resp;
+       struct hwrm_func_backing_store_qcaps_input *req;
        int rc;
 
        if (bp->hwrm_spec_code < 0x10902 || BNXT_VF(bp) || bp->ctx)
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BACKING_STORE_QCAPS, -1, -1);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS);
+       if (rc)
+               return rc;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send_silent(bp, req);
        if (!rc) {
                struct bnxt_ctx_pg_info *ctx_pg;
                struct bnxt_ctx_mem_info *ctx;
@@ -7024,7 +6908,7 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
                rc = 0;
        }
 ctx_err:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -7055,15 +6939,17 @@ static void bnxt_hwrm_set_pg_attr(struct bnxt_ring_mem_info *rmem, u8 *pg_attr,
 
 static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
 {
-       struct hwrm_func_backing_store_cfg_input req = {0};
+       struct hwrm_func_backing_store_cfg_input *req;
        struct bnxt_ctx_mem_info *ctx = bp->ctx;
        struct bnxt_ctx_pg_info *ctx_pg;
-       u32 req_len = sizeof(req);
+       void **__req = (void **)&req;
+       u32 req_len = sizeof(*req);
        __le32 *num_entries;
        __le64 *pg_dir;
        u32 flags = 0;
        u8 *pg_attr;
        u32 ena;
+       int rc;
        int i;
 
        if (!ctx)
@@ -7071,90 +6957,93 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
 
        if (req_len > bp->hwrm_max_ext_req_len)
                req_len = BNXT_BACKING_STORE_CFG_LEGACY_LEN;
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BACKING_STORE_CFG, -1, -1);
-       req.enables = cpu_to_le32(enables);
+       rc = __hwrm_req_init(bp, __req, HWRM_FUNC_BACKING_STORE_CFG, req_len);
+       if (rc)
+               return rc;
 
+       req->enables = cpu_to_le32(enables);
        if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP) {
                ctx_pg = &ctx->qp_mem;
-               req.qp_num_entries = cpu_to_le32(ctx_pg->entries);
-               req.qp_num_qp1_entries = cpu_to_le16(ctx->qp_min_qp1_entries);
-               req.qp_num_l2_entries = cpu_to_le16(ctx->qp_max_l2_entries);
-               req.qp_entry_size = cpu_to_le16(ctx->qp_entry_size);
+               req->qp_num_entries = cpu_to_le32(ctx_pg->entries);
+               req->qp_num_qp1_entries = cpu_to_le16(ctx->qp_min_qp1_entries);
+               req->qp_num_l2_entries = cpu_to_le16(ctx->qp_max_l2_entries);
+               req->qp_entry_size = cpu_to_le16(ctx->qp_entry_size);
                bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
-                                     &req.qpc_pg_size_qpc_lvl,
-                                     &req.qpc_page_dir);
+                                     &req->qpc_pg_size_qpc_lvl,
+                                     &req->qpc_page_dir);
        }
        if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ) {
                ctx_pg = &ctx->srq_mem;
-               req.srq_num_entries = cpu_to_le32(ctx_pg->entries);
-               req.srq_num_l2_entries = cpu_to_le16(ctx->srq_max_l2_entries);
-               req.srq_entry_size = cpu_to_le16(ctx->srq_entry_size);
+               req->srq_num_entries = cpu_to_le32(ctx_pg->entries);
+               req->srq_num_l2_entries = cpu_to_le16(ctx->srq_max_l2_entries);
+               req->srq_entry_size = cpu_to_le16(ctx->srq_entry_size);
                bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
-                                     &req.srq_pg_size_srq_lvl,
-                                     &req.srq_page_dir);
+                                     &req->srq_pg_size_srq_lvl,
+                                     &req->srq_page_dir);
        }
        if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ) {
                ctx_pg = &ctx->cq_mem;
-               req.cq_num_entries = cpu_to_le32(ctx_pg->entries);
-               req.cq_num_l2_entries = cpu_to_le16(ctx->cq_max_l2_entries);
-               req.cq_entry_size = cpu_to_le16(ctx->cq_entry_size);
-               bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, &req.cq_pg_size_cq_lvl,
-                                     &req.cq_page_dir);
+               req->cq_num_entries = cpu_to_le32(ctx_pg->entries);
+               req->cq_num_l2_entries = cpu_to_le16(ctx->cq_max_l2_entries);
+               req->cq_entry_size = cpu_to_le16(ctx->cq_entry_size);
+               bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+                                     &req->cq_pg_size_cq_lvl,
+                                     &req->cq_page_dir);
        }
        if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC) {
                ctx_pg = &ctx->vnic_mem;
-               req.vnic_num_vnic_entries =
+               req->vnic_num_vnic_entries =
                        cpu_to_le16(ctx->vnic_max_vnic_entries);
-               req.vnic_num_ring_table_entries =
+               req->vnic_num_ring_table_entries =
                        cpu_to_le16(ctx->vnic_max_ring_table_entries);
-               req.vnic_entry_size = cpu_to_le16(ctx->vnic_entry_size);
+               req->vnic_entry_size = cpu_to_le16(ctx->vnic_entry_size);
                bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
-                                     &req.vnic_pg_size_vnic_lvl,
-                                     &req.vnic_page_dir);
+                                     &req->vnic_pg_size_vnic_lvl,
+                                     &req->vnic_page_dir);
        }
        if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT) {
                ctx_pg = &ctx->stat_mem;
-               req.stat_num_entries = cpu_to_le32(ctx->stat_max_entries);
-               req.stat_entry_size = cpu_to_le16(ctx->stat_entry_size);
+               req->stat_num_entries = cpu_to_le32(ctx->stat_max_entries);
+               req->stat_entry_size = cpu_to_le16(ctx->stat_entry_size);
                bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
-                                     &req.stat_pg_size_stat_lvl,
-                                     &req.stat_page_dir);
+                                     &req->stat_pg_size_stat_lvl,
+                                     &req->stat_page_dir);
        }
        if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV) {
                ctx_pg = &ctx->mrav_mem;
-               req.mrav_num_entries = cpu_to_le32(ctx_pg->entries);
+               req->mrav_num_entries = cpu_to_le32(ctx_pg->entries);
                if (ctx->mrav_num_entries_units)
                        flags |=
                        FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT;
-               req.mrav_entry_size = cpu_to_le16(ctx->mrav_entry_size);
+               req->mrav_entry_size = cpu_to_le16(ctx->mrav_entry_size);
                bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
-                                     &req.mrav_pg_size_mrav_lvl,
-                                     &req.mrav_page_dir);
+                                     &req->mrav_pg_size_mrav_lvl,
+                                     &req->mrav_page_dir);
        }
        if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM) {
                ctx_pg = &ctx->tim_mem;
-               req.tim_num_entries = cpu_to_le32(ctx_pg->entries);
-               req.tim_entry_size = cpu_to_le16(ctx->tim_entry_size);
+               req->tim_num_entries = cpu_to_le32(ctx_pg->entries);
+               req->tim_entry_size = cpu_to_le16(ctx->tim_entry_size);
                bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
-                                     &req.tim_pg_size_tim_lvl,
-                                     &req.tim_page_dir);
+                                     &req->tim_pg_size_tim_lvl,
+                                     &req->tim_page_dir);
        }
-       for (i = 0, num_entries = &req.tqm_sp_num_entries,
-            pg_attr = &req.tqm_sp_pg_size_tqm_sp_lvl,
-            pg_dir = &req.tqm_sp_page_dir,
+       for (i = 0, num_entries = &req->tqm_sp_num_entries,
+            pg_attr = &req->tqm_sp_pg_size_tqm_sp_lvl,
+            pg_dir = &req->tqm_sp_page_dir,
             ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP;
             i < BNXT_MAX_TQM_RINGS;
             i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) {
                if (!(enables & ena))
                        continue;
 
-               req.tqm_entry_size = cpu_to_le16(ctx->tqm_entry_size);
+               req->tqm_entry_size = cpu_to_le16(ctx->tqm_entry_size);
                ctx_pg = ctx->tqm_mem[i];
                *num_entries = cpu_to_le32(ctx_pg->entries);
                bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, pg_attr, pg_dir);
        }
-       req.flags = cpu_to_le32(flags);
-       return hwrm_send_message(bp, &req, req_len, HWRM_CMD_TIMEOUT);
+       req->flags = cpu_to_le32(flags);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
@@ -7434,17 +7323,18 @@ skip_rdma:
 
 int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all)
 {
-       struct hwrm_func_resource_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_func_resource_qcaps_input req = {0};
+       struct hwrm_func_resource_qcaps_output *resp;
+       struct hwrm_func_resource_qcaps_input *req;
        struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_RESOURCE_QCAPS, -1, -1);
-       req.fid = cpu_to_le16(0xffff);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_RESOURCE_QCAPS);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message_silent(bp, &req, sizeof(req),
-                                      HWRM_CMD_TIMEOUT);
+       req->fid = cpu_to_le16(0xffff);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send_silent(bp, req);
        if (rc)
                goto hwrm_func_resc_qcaps_exit;
 
@@ -7485,15 +7375,14 @@ int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all)
                        pf->vf_resv_strategy = BNXT_VF_RESV_STRATEGY_MAXIMAL;
        }
 hwrm_func_resc_qcaps_exit:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
-/* bp->hwrm_cmd_lock already held. */
 static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
 {
-       struct hwrm_port_mac_ptp_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_port_mac_ptp_qcfg_input req = {0};
+       struct hwrm_port_mac_ptp_qcfg_output *resp;
+       struct hwrm_port_mac_ptp_qcfg_input *req;
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
        u8 flags;
        int rc;
@@ -7503,21 +7392,27 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
                goto no_ptp;
        }
 
-       req.port_id = cpu_to_le16(bp->pf.port_id);
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_MAC_PTP_QCFG, -1, -1);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_PTP_QCFG);
        if (rc)
                goto no_ptp;
 
+       req->port_id = cpu_to_le16(bp->pf.port_id);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
+       if (rc)
+               goto exit;
+
        flags = resp->flags;
        if (!(flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS)) {
                rc = -ENODEV;
-               goto no_ptp;
+               goto exit;
        }
        if (!ptp) {
                ptp = kzalloc(sizeof(*ptp), GFP_KERNEL);
-               if (!ptp)
-                       return -ENOMEM;
+               if (!ptp) {
+                       rc = -ENOMEM;
+                       goto exit;
+               }
                ptp->bp = bp;
                bp->ptp_cfg = ptp;
        }
@@ -7529,11 +7424,18 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
                ptp->refclk_regs[1] = BNXT_TS_REG_TIMESYNC_TS0_UPPER;
        } else {
                rc = -ENODEV;
-               goto no_ptp;
+               goto exit;
        }
-       return 0;
+       rc = bnxt_ptp_init(bp);
+       if (rc)
+               netdev_warn(bp->dev, "PTP initialization failed.\n");
+exit:
+       hwrm_req_drop(bp, req);
+       if (!rc)
+               return 0;
 
 no_ptp:
+       bnxt_ptp_clear(bp);
        kfree(ptp);
        bp->ptp_cfg = NULL;
        return rc;
@@ -7541,17 +7443,19 @@ no_ptp:
 
 static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 {
-       int rc = 0;
-       struct hwrm_func_qcaps_input req = {0};
-       struct hwrm_func_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_func_qcaps_output *resp;
+       struct hwrm_func_qcaps_input *req;
        struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
        u32 flags, flags_ext;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCAPS, -1, -1);
-       req.fid = cpu_to_le16(0xffff);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_QCAPS);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->fid = cpu_to_le16(0xffff);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc)
                goto hwrm_func_qcaps_exit;
 
@@ -7576,6 +7480,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
        flags_ext = le32_to_cpu(resp->flags_ext);
        if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_EXT_HW_STATS_SUPPORTED)
                bp->fw_cap |= BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED;
+       if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_PTP_PPS_SUPPORTED))
+               bp->fw_cap |= BNXT_FW_CAP_PTP_PPS;
 
        bp->tx_push_thresh = 0;
        if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) &&
@@ -7613,6 +7519,7 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
                if (flags & FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED) {
                        __bnxt_hwrm_ptp_qcfg(bp);
                } else {
+                       bnxt_ptp_clear(bp);
                        kfree(bp->ptp_cfg);
                        bp->ptp_cfg = NULL;
                }
@@ -7626,7 +7533,7 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
        }
 
 hwrm_func_qcaps_exit:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -7657,19 +7564,20 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 
 static int bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(struct bnxt *bp)
 {
-       struct hwrm_cfa_adv_flow_mgnt_qcaps_input req = {0};
        struct hwrm_cfa_adv_flow_mgnt_qcaps_output *resp;
-       int rc = 0;
+       struct hwrm_cfa_adv_flow_mgnt_qcaps_input *req;
        u32 flags;
+       int rc;
 
        if (!(bp->fw_cap & BNXT_FW_CAP_CFA_ADV_FLOW))
                return 0;
 
-       resp = bp->hwrm_cmd_resp_addr;
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ADV_FLOW_MGNT_QCAPS, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_ADV_FLOW_MGNT_QCAPS);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc)
                goto hwrm_cfa_adv_qcaps_exit;
 
@@ -7679,7 +7587,7 @@ static int bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(struct bnxt *bp)
                bp->fw_cap |= BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2;
 
 hwrm_cfa_adv_qcaps_exit:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -7822,17 +7730,20 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp)
 
 static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
 {
-       struct hwrm_error_recovery_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
        struct bnxt_fw_health *fw_health = bp->fw_health;
-       struct hwrm_error_recovery_qcfg_input req = {0};
+       struct hwrm_error_recovery_qcfg_output *resp;
+       struct hwrm_error_recovery_qcfg_input *req;
        int rc, i;
 
        if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_ERROR_RECOVERY_QCFG, -1, -1);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_ERROR_RECOVERY_QCFG);
+       if (rc)
+               return rc;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc)
                goto err_recovery_out;
        fw_health->flags = le32_to_cpu(resp->flags);
@@ -7874,7 +7785,7 @@ static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
                        resp->delay_after_reset[i];
        }
 err_recovery_out:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        if (!rc)
                rc = bnxt_map_fw_health_regs(bp);
        if (rc)
@@ -7884,12 +7795,16 @@ err_recovery_out:
 
 static int bnxt_hwrm_func_reset(struct bnxt *bp)
 {
-       struct hwrm_func_reset_input req = {0};
+       struct hwrm_func_reset_input *req;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_RESET, -1, -1);
-       req.enables = 0;
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_RESET);
+       if (rc)
+               return rc;
 
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_RESET_TIMEOUT);
+       req->enables = 0;
+       hwrm_req_timeout(bp, req, HWRM_RESET_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
 static void bnxt_nvm_cfg_ver_get(struct bnxt *bp)
@@ -7904,16 +7819,18 @@ static void bnxt_nvm_cfg_ver_get(struct bnxt *bp)
 
 static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
 {
-       int rc = 0;
-       struct hwrm_queue_qportcfg_input req = {0};
-       struct hwrm_queue_qportcfg_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_queue_qportcfg_output *resp;
+       struct hwrm_queue_qportcfg_input *req;
        u8 i, j, *qptr;
        bool no_rdma;
+       int rc = 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_QPORTCFG, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_QUEUE_QPORTCFG);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc)
                goto qportcfg_exit;
 
@@ -7947,35 +7864,48 @@ static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
                bp->max_lltc = bp->max_tc;
 
 qportcfg_exit:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
-static int __bnxt_hwrm_ver_get(struct bnxt *bp, bool silent)
+static int bnxt_hwrm_poll(struct bnxt *bp)
 {
-       struct hwrm_ver_get_input req = {0};
+       struct hwrm_ver_get_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VER_GET, -1, -1);
-       req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
-       req.hwrm_intf_min = HWRM_VERSION_MINOR;
-       req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
+       rc = hwrm_req_init(bp, req, HWRM_VER_GET);
+       if (rc)
+               return rc;
+
+       req->hwrm_intf_maj = HWRM_VERSION_MAJOR;
+       req->hwrm_intf_min = HWRM_VERSION_MINOR;
+       req->hwrm_intf_upd = HWRM_VERSION_UPDATE;
 
-       rc = bnxt_hwrm_do_send_msg(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT,
-                                  silent);
+       hwrm_req_flags(bp, req, BNXT_HWRM_CTX_SILENT | BNXT_HWRM_FULL_WAIT);
+       rc = hwrm_req_send(bp, req);
        return rc;
 }
 
 static int bnxt_hwrm_ver_get(struct bnxt *bp)
 {
-       struct hwrm_ver_get_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_ver_get_output *resp;
+       struct hwrm_ver_get_input *req;
        u16 fw_maj, fw_min, fw_bld, fw_rsv;
        u32 dev_caps_cfg, hwrm_ver;
        int rc, len;
 
+       rc = hwrm_req_init(bp, req, HWRM_VER_GET);
+       if (rc)
+               return rc;
+
+       hwrm_req_flags(bp, req, BNXT_HWRM_FULL_WAIT);
        bp->hwrm_max_req_len = HWRM_MAX_REQ_LEN;
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = __bnxt_hwrm_ver_get(bp, false);
+       req->hwrm_intf_maj = HWRM_VERSION_MAJOR;
+       req->hwrm_intf_min = HWRM_VERSION_MINOR;
+       req->hwrm_intf_upd = HWRM_VERSION_UPDATE;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc)
                goto hwrm_ver_get_exit;
 
@@ -8067,29 +7997,33 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp)
                bp->fw_cap |= BNXT_FW_CAP_CFA_ADV_FLOW;
 
 hwrm_ver_get_exit:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 int bnxt_hwrm_fw_set_time(struct bnxt *bp)
 {
-       struct hwrm_fw_set_time_input req = {0};
+       struct hwrm_fw_set_time_input *req;
        struct tm tm;
        time64_t now = ktime_get_real_seconds();
+       int rc;
 
        if ((BNXT_VF(bp) && bp->hwrm_spec_code < 0x10901) ||
            bp->hwrm_spec_code < 0x10400)
                return -EOPNOTSUPP;
 
        time64_to_tm(now, 0, &tm);
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_SET_TIME, -1, -1);
-       req.year = cpu_to_le16(1900 + tm.tm_year);
-       req.month = 1 + tm.tm_mon;
-       req.day = tm.tm_mday;
-       req.hour = tm.tm_hour;
-       req.minute = tm.tm_min;
-       req.second = tm.tm_sec;
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_FW_SET_TIME);
+       if (rc)
+               return rc;
+
+       req->year = cpu_to_le16(1900 + tm.tm_year);
+       req->month = 1 + tm.tm_mon;
+       req->day = tm.tm_mday;
+       req->hour = tm.tm_hour;
+       req->minute = tm.tm_min;
+       req->second = tm.tm_sec;
+       return hwrm_req_send(bp, req);
 }
 
 static void bnxt_add_one_ctr(u64 hw, u64 *sw, u64 mask)
@@ -8177,8 +8111,9 @@ static void bnxt_accumulate_all_stats(struct bnxt *bp)
 
 static int bnxt_hwrm_port_qstats(struct bnxt *bp, u8 flags)
 {
+       struct hwrm_port_qstats_input *req;
        struct bnxt_pf_info *pf = &bp->pf;
-       struct hwrm_port_qstats_input req = {0};
+       int rc;
 
        if (!(bp->flags & BNXT_FLAG_PORT_STATS))
                return 0;
@@ -8186,20 +8121,24 @@ static int bnxt_hwrm_port_qstats(struct bnxt *bp, u8 flags)
        if (flags && !(bp->fw_cap & BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED))
                return -EOPNOTSUPP;
 
-       req.flags = flags;
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_QSTATS, -1, -1);
-       req.port_id = cpu_to_le16(pf->port_id);
-       req.tx_stat_host_addr = cpu_to_le64(bp->port_stats.hw_stats_map +
+       rc = hwrm_req_init(bp, req, HWRM_PORT_QSTATS);
+       if (rc)
+               return rc;
+
+       req->flags = flags;
+       req->port_id = cpu_to_le16(pf->port_id);
+       req->tx_stat_host_addr = cpu_to_le64(bp->port_stats.hw_stats_map +
                                            BNXT_TX_PORT_STATS_BYTE_OFFSET);
-       req.rx_stat_host_addr = cpu_to_le64(bp->port_stats.hw_stats_map);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->rx_stat_host_addr = cpu_to_le64(bp->port_stats.hw_stats_map);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp, u8 flags)
 {
-       struct hwrm_port_qstats_ext_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_queue_pri2cos_qcfg_input req2 = {0};
-       struct hwrm_port_qstats_ext_input req = {0};
+       struct hwrm_queue_pri2cos_qcfg_output *resp_qc;
+       struct hwrm_queue_pri2cos_qcfg_input *req_qc;
+       struct hwrm_port_qstats_ext_output *resp_qs;
+       struct hwrm_port_qstats_ext_input *req_qs;
        struct bnxt_pf_info *pf = &bp->pf;
        u32 tx_stat_size;
        int rc;
@@ -8210,46 +8149,53 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp, u8 flags)
        if (flags && !(bp->fw_cap & BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED))
                return -EOPNOTSUPP;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_QSTATS_EXT, -1, -1);
-       req.flags = flags;
-       req.port_id = cpu_to_le16(pf->port_id);
-       req.rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext));
-       req.rx_stat_host_addr = cpu_to_le64(bp->rx_port_stats_ext.hw_stats_map);
+       rc = hwrm_req_init(bp, req_qs, HWRM_PORT_QSTATS_EXT);
+       if (rc)
+               return rc;
+
+       req_qs->flags = flags;
+       req_qs->port_id = cpu_to_le16(pf->port_id);
+       req_qs->rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext));
+       req_qs->rx_stat_host_addr = cpu_to_le64(bp->rx_port_stats_ext.hw_stats_map);
        tx_stat_size = bp->tx_port_stats_ext.hw_stats ?
                       sizeof(struct tx_port_stats_ext) : 0;
-       req.tx_stat_size = cpu_to_le16(tx_stat_size);
-       req.tx_stat_host_addr = cpu_to_le64(bp->tx_port_stats_ext.hw_stats_map);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req_qs->tx_stat_size = cpu_to_le16(tx_stat_size);
+       req_qs->tx_stat_host_addr = cpu_to_le64(bp->tx_port_stats_ext.hw_stats_map);
+       resp_qs = hwrm_req_hold(bp, req_qs);
+       rc = hwrm_req_send(bp, req_qs);
        if (!rc) {
-               bp->fw_rx_stats_ext_size = le16_to_cpu(resp->rx_stat_size) / 8;
+               bp->fw_rx_stats_ext_size =
+                       le16_to_cpu(resp_qs->rx_stat_size) / 8;
                bp->fw_tx_stats_ext_size = tx_stat_size ?
-                       le16_to_cpu(resp->tx_stat_size) / 8 : 0;
+                       le16_to_cpu(resp_qs->tx_stat_size) / 8 : 0;
        } else {
                bp->fw_rx_stats_ext_size = 0;
                bp->fw_tx_stats_ext_size = 0;
        }
+       hwrm_req_drop(bp, req_qs);
+
        if (flags)
-               goto qstats_done;
+               return rc;
 
        if (bp->fw_tx_stats_ext_size <=
            offsetof(struct tx_port_stats_ext, pfc_pri0_tx_duration_us) / 8) {
-               mutex_unlock(&bp->hwrm_cmd_lock);
                bp->pri2cos_valid = 0;
                return rc;
        }
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req2, HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
-       req2.flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN);
+       rc = hwrm_req_init(bp, req_qc, HWRM_QUEUE_PRI2COS_QCFG);
+       if (rc)
+               return rc;
+
+       req_qc->flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN);
 
-       rc = _hwrm_send_message(bp, &req2, sizeof(req2), HWRM_CMD_TIMEOUT);
+       resp_qc = hwrm_req_hold(bp, req_qc);
+       rc = hwrm_req_send(bp, req_qc);
        if (!rc) {
-               struct hwrm_queue_pri2cos_qcfg_output *resp2;
                u8 *pri2cos;
                int i, j;
 
-               resp2 = bp->hwrm_cmd_resp_addr;
-               pri2cos = &resp2->pri0_cos_queue_id;
+               pri2cos = &resp_qc->pri0_cos_queue_id;
                for (i = 0; i < 8; i++) {
                        u8 queue_id = pri2cos[i];
                        u8 queue_idx;
@@ -8258,17 +8204,18 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp, u8 flags)
                        queue_idx = queue_id % 10;
                        if (queue_idx > BNXT_MAX_QUEUE) {
                                bp->pri2cos_valid = false;
-                               goto qstats_done;
+                               hwrm_req_drop(bp, req_qc);
+                               return rc;
                        }
                        for (j = 0; j < bp->max_q; j++) {
                                if (bp->q_ids[j] == queue_id)
                                        bp->pri2cos_idx[i] = queue_idx;
                        }
                }
-               bp->pri2cos_valid = 1;
+               bp->pri2cos_valid = true;
        }
-qstats_done:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req_qc);
+
        return rc;
 }
 
@@ -8343,35 +8290,46 @@ static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path,
 
 static int bnxt_hwrm_set_br_mode(struct bnxt *bp, u16 br_mode)
 {
-       struct hwrm_func_cfg_input req = {0};
+       struct hwrm_func_cfg_input *req;
+       u8 evb_mode;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-       req.fid = cpu_to_le16(0xffff);
-       req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_EVB_MODE);
        if (br_mode == BRIDGE_MODE_VEB)
-               req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEB;
+               evb_mode = FUNC_CFG_REQ_EVB_MODE_VEB;
        else if (br_mode == BRIDGE_MODE_VEPA)
-               req.evb_mode = FUNC_CFG_REQ_EVB_MODE_VEPA;
+               evb_mode = FUNC_CFG_REQ_EVB_MODE_VEPA;
        else
                return -EINVAL;
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+       if (rc)
+               return rc;
+
+       req->fid = cpu_to_le16(0xffff);
+       req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_EVB_MODE);
+       req->evb_mode = evb_mode;
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_hwrm_set_cache_line_size(struct bnxt *bp, int size)
 {
-       struct hwrm_func_cfg_input req = {0};
+       struct hwrm_func_cfg_input *req;
+       int rc;
 
        if (BNXT_VF(bp) || bp->hwrm_spec_code < 0x10803)
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-       req.fid = cpu_to_le16(0xffff);
-       req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_CACHE_LINESIZE);
-       req.options = FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_64;
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+       if (rc)
+               return rc;
+
+       req->fid = cpu_to_le16(0xffff);
+       req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_CACHE_LINESIZE);
+       req->options = FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_64;
        if (size == 128)
-               req.options = FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_128;
+               req->options = FUNC_CFG_REQ_OPTIONS_CACHE_LINESIZE_SIZE_128;
 
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
 static int __bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
@@ -9319,18 +9277,20 @@ static bool bnxt_phy_qcaps_no_speed(struct hwrm_port_phy_qcaps_output *resp)
 
 static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 {
-       int rc = 0;
-       struct hwrm_port_phy_qcaps_input req = {0};
-       struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
        struct bnxt_link_info *link_info = &bp->link_info;
+       struct hwrm_port_phy_qcaps_output *resp;
+       struct hwrm_port_phy_qcaps_input *req;
+       int rc = 0;
 
        if (bp->hwrm_spec_code < 0x10201)
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCAPS, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_QCAPS);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc)
                goto hwrm_phy_qcaps_exit;
 
@@ -9368,7 +9328,7 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
        bp->port_count = resp->port_cnt;
 
 hwrm_phy_qcaps_exit:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -9381,19 +9341,21 @@ static bool bnxt_support_dropped(u16 advertising, u16 supported)
 
 int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
 {
-       int rc = 0;
        struct bnxt_link_info *link_info = &bp->link_info;
-       struct hwrm_port_phy_qcfg_input req = {0};
-       struct hwrm_port_phy_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_port_phy_qcfg_output *resp;
+       struct hwrm_port_phy_qcfg_input *req;
        u8 link_up = link_info->link_up;
        bool support_changed = false;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCFG, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_QCFG);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc) {
-               mutex_unlock(&bp->hwrm_cmd_lock);
+               hwrm_req_drop(bp, req);
                return rc;
        }
 
@@ -9488,7 +9450,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
                /* alwasy link down if not require to update link state */
                link_info->link_up = 0;
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
 
        if (!BNXT_PHY_CFG_ABLE(bp))
                return 0;
@@ -9598,18 +9560,20 @@ static void bnxt_hwrm_set_link_common(struct bnxt *bp, struct hwrm_port_phy_cfg_
 
 int bnxt_hwrm_set_pause(struct bnxt *bp)
 {
-       struct hwrm_port_phy_cfg_input req = {0};
+       struct hwrm_port_phy_cfg_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
-       bnxt_hwrm_set_pause_common(bp, &req);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_CFG);
+       if (rc)
+               return rc;
+
+       bnxt_hwrm_set_pause_common(bp, req);
 
        if ((bp->link_info.autoneg & BNXT_AUTONEG_FLOW_CTRL) ||
            bp->link_info.force_link_chng)
-               bnxt_hwrm_set_link_common(bp, &req);
+               bnxt_hwrm_set_link_common(bp, req);
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_send(bp, req);
        if (!rc && !(bp->link_info.autoneg & BNXT_AUTONEG_FLOW_CTRL)) {
                /* since changing of pause setting doesn't trigger any link
                 * change event, the driver needs to update the current pause
@@ -9622,7 +9586,6 @@ int bnxt_hwrm_set_pause(struct bnxt *bp)
                        bnxt_report_link(bp);
        }
        bp->link_info.force_link_chng = false;
-       mutex_unlock(&bp->hwrm_cmd_lock);
        return rc;
 }
 
@@ -9651,22 +9614,27 @@ static void bnxt_hwrm_set_eee(struct bnxt *bp,
 
 int bnxt_hwrm_set_link_setting(struct bnxt *bp, bool set_pause, bool set_eee)
 {
-       struct hwrm_port_phy_cfg_input req = {0};
+       struct hwrm_port_phy_cfg_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_CFG);
+       if (rc)
+               return rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
        if (set_pause)
-               bnxt_hwrm_set_pause_common(bp, &req);
+               bnxt_hwrm_set_pause_common(bp, req);
 
-       bnxt_hwrm_set_link_common(bp, &req);
+       bnxt_hwrm_set_link_common(bp, req);
 
        if (set_eee)
-               bnxt_hwrm_set_eee(bp, &req);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               bnxt_hwrm_set_eee(bp, req);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
 {
-       struct hwrm_port_phy_cfg_input req = {0};
+       struct hwrm_port_phy_cfg_input *req;
+       int rc;
 
        if (!BNXT_SINGLE_PF(bp))
                return 0;
@@ -9675,9 +9643,12 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
            !(bp->phy_flags & BNXT_PHY_FL_FW_MANAGED_LKDN))
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
-       req.flags = cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_CFG);
+       if (rc)
+               return rc;
+
+       req->flags = cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_fw_init_one(struct bnxt *bp);
@@ -9703,16 +9674,14 @@ static int bnxt_try_recover_fw(struct bnxt *bp)
                int retry = 0, rc;
                u32 sts;
 
-               mutex_lock(&bp->hwrm_cmd_lock);
                do {
                        sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
-                       rc = __bnxt_hwrm_ver_get(bp, true);
+                       rc = bnxt_hwrm_poll(bp);
                        if (!BNXT_FW_IS_BOOTING(sts) &&
                            !BNXT_FW_IS_RECOVERING(sts))
                                break;
                        retry++;
                } while (rc == -EBUSY && retry < BNXT_FW_RETRY);
-               mutex_unlock(&bp->hwrm_cmd_lock);
 
                if (!BNXT_FW_IS_HEALTHY(sts)) {
                        netdev_err(bp->dev,
@@ -9732,8 +9701,8 @@ static int bnxt_try_recover_fw(struct bnxt *bp)
 
 static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 {
-       struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_func_drv_if_change_input req = {0};
+       struct hwrm_func_drv_if_change_output *resp;
+       struct hwrm_func_drv_if_change_input *req;
        bool fw_reset = !bp->irq_tbl;
        bool resc_reinit = false;
        int rc, retry = 0;
@@ -9742,29 +9711,34 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
        if (!(bp->fw_cap & BNXT_FW_CAP_IF_CHANGE))
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_DRV_IF_CHANGE, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_DRV_IF_CHANGE);
+       if (rc)
+               return rc;
+
        if (up)
-               req.flags = cpu_to_le32(FUNC_DRV_IF_CHANGE_REQ_FLAGS_UP);
-       mutex_lock(&bp->hwrm_cmd_lock);
+               req->flags = cpu_to_le32(FUNC_DRV_IF_CHANGE_REQ_FLAGS_UP);
+       resp = hwrm_req_hold(bp, req);
+
+       hwrm_req_flags(bp, req, BNXT_HWRM_FULL_WAIT);
        while (retry < BNXT_FW_IF_RETRY) {
-               rc = _hwrm_send_message(bp, &req, sizeof(req),
-                                       HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send(bp, req);
                if (rc != -EAGAIN)
                        break;
 
                msleep(50);
                retry++;
        }
-       if (!rc)
-               flags = le32_to_cpu(resp->flags);
-       mutex_unlock(&bp->hwrm_cmd_lock);
 
-       if (rc == -EAGAIN)
+       if (rc == -EAGAIN) {
+               hwrm_req_drop(bp, req);
                return rc;
-       if (rc && up) {
+       } else if (!rc) {
+               flags = le32_to_cpu(resp->flags);
+       } else if (up) {
                rc = bnxt_try_recover_fw(bp);
                fw_reset = true;
        }
+       hwrm_req_drop(bp, req);
        if (rc)
                return rc;
 
@@ -9833,8 +9807,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 
 static int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
 {
-       struct hwrm_port_led_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_port_led_qcaps_input req = {0};
+       struct hwrm_port_led_qcaps_output *resp;
+       struct hwrm_port_led_qcaps_input *req;
        struct bnxt_pf_info *pf = &bp->pf;
        int rc;
 
@@ -9842,12 +9816,15 @@ static int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
        if (BNXT_VF(bp) || bp->hwrm_spec_code < 0x10601)
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_LED_QCAPS, -1, -1);
-       req.port_id = cpu_to_le16(pf->port_id);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_LED_QCAPS);
+       if (rc)
+               return rc;
+
+       req->port_id = cpu_to_le16(pf->port_id);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc) {
-               mutex_unlock(&bp->hwrm_cmd_lock);
+               hwrm_req_drop(bp, req);
                return rc;
        }
        if (resp->num_leds > 0 && resp->num_leds < BNXT_MAX_LED) {
@@ -9867,52 +9844,64 @@ static int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
                        }
                }
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return 0;
 }
 
 int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp)
 {
-       struct hwrm_wol_filter_alloc_input req = {0};
-       struct hwrm_wol_filter_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_wol_filter_alloc_output *resp;
+       struct hwrm_wol_filter_alloc_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_WOL_FILTER_ALLOC, -1, -1);
-       req.port_id = cpu_to_le16(bp->pf.port_id);
-       req.wol_type = WOL_FILTER_ALLOC_REQ_WOL_TYPE_MAGICPKT;
-       req.enables = cpu_to_le32(WOL_FILTER_ALLOC_REQ_ENABLES_MAC_ADDRESS);
-       memcpy(req.mac_address, bp->dev->dev_addr, ETH_ALEN);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_WOL_FILTER_ALLOC);
+       if (rc)
+               return rc;
+
+       req->port_id = cpu_to_le16(bp->pf.port_id);
+       req->wol_type = WOL_FILTER_ALLOC_REQ_WOL_TYPE_MAGICPKT;
+       req->enables = cpu_to_le32(WOL_FILTER_ALLOC_REQ_ENABLES_MAC_ADDRESS);
+       memcpy(req->mac_address, bp->dev->dev_addr, ETH_ALEN);
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc)
                bp->wol_filter_id = resp->wol_filter_id;
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 int bnxt_hwrm_free_wol_fltr(struct bnxt *bp)
 {
-       struct hwrm_wol_filter_free_input req = {0};
+       struct hwrm_wol_filter_free_input *req;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_WOL_FILTER_FREE, -1, -1);
-       req.port_id = cpu_to_le16(bp->pf.port_id);
-       req.enables = cpu_to_le32(WOL_FILTER_FREE_REQ_ENABLES_WOL_FILTER_ID);
-       req.wol_filter_id = bp->wol_filter_id;
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_WOL_FILTER_FREE);
+       if (rc)
+               return rc;
+
+       req->port_id = cpu_to_le16(bp->pf.port_id);
+       req->enables = cpu_to_le32(WOL_FILTER_FREE_REQ_ENABLES_WOL_FILTER_ID);
+       req->wol_filter_id = bp->wol_filter_id;
+
+       return hwrm_req_send(bp, req);
 }
 
 static u16 bnxt_hwrm_get_wol_fltrs(struct bnxt *bp, u16 handle)
 {
-       struct hwrm_wol_filter_qcfg_input req = {0};
-       struct hwrm_wol_filter_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_wol_filter_qcfg_output *resp;
+       struct hwrm_wol_filter_qcfg_input *req;
        u16 next_handle = 0;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_WOL_FILTER_QCFG, -1, -1);
-       req.port_id = cpu_to_le16(bp->pf.port_id);
-       req.handle = cpu_to_le16(handle);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_WOL_FILTER_QCFG);
+       if (rc)
+               return rc;
+
+       req->port_id = cpu_to_le16(bp->pf.port_id);
+       req->handle = cpu_to_le16(handle);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc) {
                next_handle = le16_to_cpu(resp->next_handle);
                if (next_handle != 0) {
@@ -9923,7 +9912,7 @@ static u16 bnxt_hwrm_get_wol_fltrs(struct bnxt *bp, u16 handle)
                        }
                }
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return next_handle;
 }
 
@@ -9944,19 +9933,20 @@ static void bnxt_get_wol_settings(struct bnxt *bp)
 static ssize_t bnxt_show_temp(struct device *dev,
                              struct device_attribute *devattr, char *buf)
 {
-       struct hwrm_temp_monitor_query_input req = {0};
        struct hwrm_temp_monitor_query_output *resp;
+       struct hwrm_temp_monitor_query_input *req;
        struct bnxt *bp = dev_get_drvdata(dev);
        u32 len = 0;
        int rc;
 
-       resp = bp->hwrm_cmd_resp_addr;
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_TEMP_MONITOR_QUERY);
+       if (rc)
+               return rc;
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc)
                len = sprintf(buf, "%u\n", resp->temp * 1000); /* display millidegree */
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        if (rc)
                return rc;
        return len;
@@ -9979,12 +9969,13 @@ static void bnxt_hwmon_close(struct bnxt *bp)
 
 static void bnxt_hwmon_open(struct bnxt *bp)
 {
-       struct hwrm_temp_monitor_query_input req = {0};
+       struct hwrm_temp_monitor_query_input *req;
        struct pci_dev *pdev = bp->pdev;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1);
-       rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_TEMP_MONITOR_QUERY);
+       if (!rc)
+               rc = hwrm_req_send_silent(bp, req);
        if (rc == -EACCES || rc == -EOPNOTSUPP) {
                bnxt_hwmon_close(bp);
                return;
@@ -10209,7 +10200,9 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        bnxt_tx_enable(bp);
        mod_timer(&bp->timer, jiffies + bp->current_interval);
        /* Poll link status and check for SFP+ module status */
+       mutex_lock(&bp->link_lock);
        bnxt_get_port_module_status(bp);
+       mutex_unlock(&bp->link_lock);
 
        /* VF-reps may need to be re-opened after the PF is re-opened */
        if (BNXT_PF(bp))
@@ -10316,15 +10309,9 @@ static int bnxt_open(struct net_device *dev)
        if (rc)
                return rc;
 
-       if (bnxt_ptp_init(bp)) {
-               netdev_warn(dev, "PTP initialization failed.\n");
-               kfree(bp->ptp_cfg);
-               bp->ptp_cfg = NULL;
-       }
        rc = __bnxt_open_nic(bp, true, true);
        if (rc) {
                bnxt_hwrm_if_change(bp, false);
-               bnxt_ptp_clear(bp);
        } else {
                if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) {
                        if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
@@ -10415,7 +10402,6 @@ static int bnxt_close(struct net_device *dev)
 {
        struct bnxt *bp = netdev_priv(dev);
 
-       bnxt_ptp_clear(bp);
        bnxt_hwmon_close(bp);
        bnxt_close_nic(bp, true, true);
        bnxt_hwrm_shutdown_link(bp);
@@ -10426,53 +10412,60 @@ static int bnxt_close(struct net_device *dev)
 static int bnxt_hwrm_port_phy_read(struct bnxt *bp, u16 phy_addr, u16 reg,
                                   u16 *val)
 {
-       struct hwrm_port_phy_mdio_read_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_port_phy_mdio_read_input req = {0};
+       struct hwrm_port_phy_mdio_read_output *resp;
+       struct hwrm_port_phy_mdio_read_input *req;
        int rc;
 
        if (bp->hwrm_spec_code < 0x10a00)
                return -EOPNOTSUPP;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_MDIO_READ, -1, -1);
-       req.port_id = cpu_to_le16(bp->pf.port_id);
-       req.phy_addr = phy_addr;
-       req.reg_addr = cpu_to_le16(reg & 0x1f);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_MDIO_READ);
+       if (rc)
+               return rc;
+
+       req->port_id = cpu_to_le16(bp->pf.port_id);
+       req->phy_addr = phy_addr;
+       req->reg_addr = cpu_to_le16(reg & 0x1f);
        if (mdio_phy_id_is_c45(phy_addr)) {
-               req.cl45_mdio = 1;
-               req.phy_addr = mdio_phy_id_prtad(phy_addr);
-               req.dev_addr = mdio_phy_id_devad(phy_addr);
-               req.reg_addr = cpu_to_le16(reg);
+               req->cl45_mdio = 1;
+               req->phy_addr = mdio_phy_id_prtad(phy_addr);
+               req->dev_addr = mdio_phy_id_devad(phy_addr);
+               req->reg_addr = cpu_to_le16(reg);
        }
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc)
                *val = le16_to_cpu(resp->reg_data);
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static int bnxt_hwrm_port_phy_write(struct bnxt *bp, u16 phy_addr, u16 reg,
                                    u16 val)
 {
-       struct hwrm_port_phy_mdio_write_input req = {0};
+       struct hwrm_port_phy_mdio_write_input *req;
+       int rc;
 
        if (bp->hwrm_spec_code < 0x10a00)
                return -EOPNOTSUPP;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_MDIO_WRITE, -1, -1);
-       req.port_id = cpu_to_le16(bp->pf.port_id);
-       req.phy_addr = phy_addr;
-       req.reg_addr = cpu_to_le16(reg & 0x1f);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_MDIO_WRITE);
+       if (rc)
+               return rc;
+
+       req->port_id = cpu_to_le16(bp->pf.port_id);
+       req->phy_addr = phy_addr;
+       req->reg_addr = cpu_to_le16(reg & 0x1f);
        if (mdio_phy_id_is_c45(phy_addr)) {
-               req.cl45_mdio = 1;
-               req.phy_addr = mdio_phy_id_prtad(phy_addr);
-               req.dev_addr = mdio_phy_id_devad(phy_addr);
-               req.reg_addr = cpu_to_le16(reg);
+               req->cl45_mdio = 1;
+               req->phy_addr = mdio_phy_id_prtad(phy_addr);
+               req->dev_addr = mdio_phy_id_devad(phy_addr);
+               req->reg_addr = cpu_to_le16(reg);
        }
-       req.reg_data = cpu_to_le16(val);
+       req->reg_data = cpu_to_le16(val);
 
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
 /* rtnl_lock held */
@@ -10551,6 +10544,10 @@ static void bnxt_get_ring_stats(struct bnxt *bp,
                stats->multicast += BNXT_GET_RING_STATS64(sw, rx_mcast_pkts);
 
                stats->tx_dropped += BNXT_GET_RING_STATS64(sw, tx_error_pkts);
+
+               stats->rx_dropped +=
+                       cpr->sw_stats.rx.rx_netpoll_discards +
+                       cpr->sw_stats.rx.rx_oom_discards;
        }
 }
 
@@ -10565,6 +10562,7 @@ static void bnxt_add_prev_stats(struct bnxt *bp,
        stats->tx_bytes += prev_stats->tx_bytes;
        stats->rx_missed_errors += prev_stats->rx_missed_errors;
        stats->multicast += prev_stats->multicast;
+       stats->rx_dropped += prev_stats->rx_dropped;
        stats->tx_dropped += prev_stats->tx_dropped;
 }
 
@@ -10709,6 +10707,7 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
 {
        struct net_device *dev = bp->dev;
        struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
+       struct hwrm_cfa_l2_filter_free_input *req;
        struct netdev_hw_addr *ha;
        int i, off = 0, rc;
        bool uc_update;
@@ -10720,19 +10719,16 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
        if (!uc_update)
                goto skip_uc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_FREE);
+       if (rc)
+               return rc;
+       hwrm_req_hold(bp, req);
        for (i = 1; i < vnic->uc_filter_count; i++) {
-               struct hwrm_cfa_l2_filter_free_input req = {0};
-
-               bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_L2_FILTER_FREE, -1,
-                                      -1);
-
-               req.l2_filter_id = vnic->fw_l2_filter_id[i];
+               req->l2_filter_id = vnic->fw_l2_filter_id[i];
 
-               rc = _hwrm_send_message(bp, &req, sizeof(req),
-                                       HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send(bp, req);
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
 
        vnic->uc_filter_count = 1;
 
@@ -11084,22 +11080,30 @@ static netdev_features_t bnxt_features_check(struct sk_buff *skb,
 int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words,
                         u32 *reg_buf)
 {
-       struct hwrm_dbg_read_direct_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_dbg_read_direct_input req = {0};
+       struct hwrm_dbg_read_direct_output *resp;
+       struct hwrm_dbg_read_direct_input *req;
        __le32 *dbg_reg_buf;
        dma_addr_t mapping;
        int rc, i;
 
-       dbg_reg_buf = dma_alloc_coherent(&bp->pdev->dev, num_words * 4,
-                                        &mapping, GFP_KERNEL);
-       if (!dbg_reg_buf)
-               return -ENOMEM;
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_READ_DIRECT, -1, -1);
-       req.host_dest_addr = cpu_to_le64(mapping);
-       req.read_addr = cpu_to_le32(reg_off + CHIMP_REG_VIEW_ADDR);
-       req.read_len32 = cpu_to_le32(num_words);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_DBG_READ_DIRECT);
+       if (rc)
+               return rc;
+
+       dbg_reg_buf = hwrm_req_dma_slice(bp, req, num_words * 4,
+                                        &mapping);
+       if (!dbg_reg_buf) {
+               rc = -ENOMEM;
+               goto dbg_rd_reg_exit;
+       }
+
+       req->host_dest_addr = cpu_to_le64(mapping);
+
+       resp = hwrm_req_hold(bp, req);
+       req->read_addr = cpu_to_le32(reg_off + CHIMP_REG_VIEW_ADDR);
+       req->read_len32 = cpu_to_le32(num_words);
+
+       rc = hwrm_req_send(bp, req);
        if (rc || resp->error_code) {
                rc = -EIO;
                goto dbg_rd_reg_exit;
@@ -11108,28 +11112,30 @@ int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words,
                reg_buf[i] = le32_to_cpu(dbg_reg_buf[i]);
 
 dbg_rd_reg_exit:
-       mutex_unlock(&bp->hwrm_cmd_lock);
-       dma_free_coherent(&bp->pdev->dev, num_words * 4, dbg_reg_buf, mapping);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static int bnxt_dbg_hwrm_ring_info_get(struct bnxt *bp, u8 ring_type,
                                       u32 ring_id, u32 *prod, u32 *cons)
 {
-       struct hwrm_dbg_ring_info_get_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_dbg_ring_info_get_input req = {0};
+       struct hwrm_dbg_ring_info_get_output *resp;
+       struct hwrm_dbg_ring_info_get_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_RING_INFO_GET, -1, -1);
-       req.ring_type = ring_type;
-       req.fw_ring_id = cpu_to_le32(ring_id);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_DBG_RING_INFO_GET);
+       if (rc)
+               return rc;
+
+       req->ring_type = ring_type;
+       req->fw_ring_id = cpu_to_le32(ring_id);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc) {
                *prod = le32_to_cpu(resp->producer_index);
                *cons = le32_to_cpu(resp->consumer_index);
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -11187,18 +11193,22 @@ static void bnxt_dbg_dump_states(struct bnxt *bp)
 static int bnxt_hwrm_rx_ring_reset(struct bnxt *bp, int ring_nr)
 {
        struct bnxt_rx_ring_info *rxr = &bp->rx_ring[ring_nr];
-       struct hwrm_ring_reset_input req = {0};
+       struct hwrm_ring_reset_input *req;
        struct bnxt_napi *bnapi = rxr->bnapi;
        struct bnxt_cp_ring_info *cpr;
        u16 cp_ring_id;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_RING_RESET);
+       if (rc)
+               return rc;
 
        cpr = &bnapi->cp_ring;
        cp_ring_id = cpr->cp_ring_struct.fw_ring_id;
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_RESET, cp_ring_id, -1);
-       req.ring_type = RING_RESET_REQ_RING_TYPE_RX_RING_GRP;
-       req.ring_id = cpu_to_le16(bp->grp_info[bnapi->index].fw_grp_id);
-       return hwrm_send_message_silent(bp, &req, sizeof(req),
-                                       HWRM_CMD_TIMEOUT);
+       req->cmpl_ring = cpu_to_le16(cp_ring_id);
+       req->ring_type = RING_RESET_REQ_RING_TYPE_RX_RING_GRP;
+       req->ring_id = cpu_to_le16(bp->grp_info[bnapi->index].fw_grp_id);
+       return hwrm_req_send_silent(bp, req);
 }
 
 static void bnxt_reset_task(struct bnxt *bp, bool silent)
@@ -11405,7 +11415,6 @@ static void bnxt_fw_reset_close(struct bnxt *bp)
                bnxt_clear_int_mode(bp);
                pci_disable_device(bp->pdev);
        }
-       bnxt_ptp_clear(bp);
        __bnxt_close_nic(bp, true, false);
        bnxt_vf_reps_free(bp);
        bnxt_clear_int_mode(bp);
@@ -11441,13 +11450,20 @@ static bool is_bnxt_fw_ok(struct bnxt *bp)
 static void bnxt_force_fw_reset(struct bnxt *bp)
 {
        struct bnxt_fw_health *fw_health = bp->fw_health;
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
        u32 wait_dsecs;
 
        if (!test_bit(BNXT_STATE_OPEN, &bp->state) ||
            test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
                return;
 
-       set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+       if (ptp) {
+               spin_lock_bh(&ptp->ptp_lock);
+               set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+               spin_unlock_bh(&ptp->ptp_lock);
+       } else {
+               set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+       }
        bnxt_fw_reset_close(bp);
        wait_dsecs = fw_health->master_func_wait_dsecs;
        if (fw_health->master) {
@@ -11503,9 +11519,16 @@ void bnxt_fw_reset(struct bnxt *bp)
        bnxt_rtnl_lock_sp(bp);
        if (test_bit(BNXT_STATE_OPEN, &bp->state) &&
            !test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) {
+               struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
                int n = 0, tmo;
 
-               set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+               if (ptp) {
+                       spin_lock_bh(&ptp->ptp_lock);
+                       set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+                       spin_unlock_bh(&ptp->ptp_lock);
+               } else {
+                       set_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
+               }
                if (bp->pf.active_vfs &&
                    !test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
                        n = bnxt_get_registered_vfs(bp);
@@ -11614,12 +11637,15 @@ static void bnxt_init_ethtool_link_settings(struct bnxt *bp)
 static void bnxt_fw_echo_reply(struct bnxt *bp)
 {
        struct bnxt_fw_health *fw_health = bp->fw_health;
-       struct hwrm_func_echo_response_input req = {0};
+       struct hwrm_func_echo_response_input *req;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_ECHO_RESPONSE, -1, -1);
-       req.event_data1 = cpu_to_le32(fw_health->echo_req_data1);
-       req.event_data2 = cpu_to_le32(fw_health->echo_req_data2);
-       hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_ECHO_RESPONSE);
+       if (rc)
+               return;
+       req->event_data1 = cpu_to_le32(fw_health->echo_req_data1);
+       req->event_data2 = cpu_to_le32(fw_health->echo_req_data2);
+       hwrm_req_send(bp, req);
 }
 
 static void bnxt_sp_task(struct work_struct *work)
@@ -11824,18 +11850,6 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp)
                        return rc;
        }
 
-       if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) {
-               rc = bnxt_alloc_kong_hwrm_resources(bp);
-               if (rc)
-                       bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL;
-       }
-
-       if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
-           bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) {
-               rc = bnxt_alloc_hwrm_short_cmd_req(bp);
-               if (rc)
-                       return rc;
-       }
        bnxt_nvm_cfg_ver_get(bp);
 
        rc = bnxt_hwrm_func_reset(bp);
@@ -12010,14 +12024,16 @@ static void bnxt_reset_all(struct bnxt *bp)
                for (i = 0; i < fw_health->fw_reset_seq_cnt; i++)
                        bnxt_fw_reset_writel(bp, i);
        } else if (fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) {
-               struct hwrm_fw_reset_input req = {0};
-
-               bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1);
-               req.resp_addr = cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr);
-               req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP;
-               req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
-               req.flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL;
-               rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               struct hwrm_fw_reset_input *req;
+
+               rc = hwrm_req_init(bp, req, HWRM_FW_RESET);
+               if (!rc) {
+                       req->target_id = cpu_to_le16(HWRM_TARGET_ID_KONG);
+                       req->embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP;
+                       req->selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
+                       req->flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL;
+                       rc = hwrm_req_send(bp, req);
+               }
                if (rc != -ENODEV)
                        netdev_warn(bp->dev, "Unable to reset FW rc=%d\n", rc);
        }
@@ -12144,7 +12160,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                fallthrough;
        case BNXT_FW_RESET_STATE_POLL_FW:
                bp->hwrm_cmd_timeout = SHORT_HWRM_CMD_TIMEOUT;
-               rc = __bnxt_hwrm_ver_get(bp, true);
+               rc = bnxt_hwrm_poll(bp);
                if (rc) {
                        if (bnxt_fw_reset_timeout(bp)) {
                                netdev_err(bp->dev, "Firmware reset aborted\n");
@@ -12177,6 +12193,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                bnxt_reenable_sriov(bp);
                bnxt_vf_reps_alloc(bp);
                bnxt_vf_reps_open(bp);
+               bnxt_ptp_reapply_pps(bp);
                bnxt_dl_health_recovery_done(bp);
                bnxt_dl_health_status_update(bp, true);
                rtnl_unlock();
@@ -12708,7 +12725,7 @@ static const struct net_device_ops bnxt_netdev_ops = {
        .ndo_stop               = bnxt_close,
        .ndo_get_stats64        = bnxt_get_stats64,
        .ndo_set_rx_mode        = bnxt_set_rx_mode,
-       .ndo_do_ioctl           = bnxt_ioctl,
+       .ndo_eth_ioctl          = bnxt_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = bnxt_change_mac_addr,
        .ndo_change_mtu         = bnxt_change_mtu,
@@ -12747,6 +12764,7 @@ static void bnxt_remove_one(struct pci_dev *pdev)
        if (BNXT_PF(bp))
                devlink_port_type_clear(&bp->dl_port);
 
+       bnxt_ptp_clear(bp);
        pci_disable_pcie_error_reporting(pdev);
        unregister_netdev(dev);
        clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
@@ -12762,7 +12780,6 @@ static void bnxt_remove_one(struct pci_dev *pdev)
        bnxt_clear_int_mode(bp);
        bnxt_hwrm_func_drv_unrgtr(bp);
        bnxt_free_hwrm_resources(bp);
-       bnxt_free_hwrm_short_cmd_req(bp);
        bnxt_ethtool_free(bp);
        bnxt_dcb_free(bp);
        kfree(bp->edev);
@@ -12800,8 +12817,10 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
        if (!fw_dflt)
                return 0;
 
+       mutex_lock(&bp->link_lock);
        rc = bnxt_update_link(bp, false);
        if (rc) {
+               mutex_unlock(&bp->link_lock);
                netdev_err(bp->dev, "Probe phy can't update link (rc: %x)\n",
                           rc);
                return rc;
@@ -12814,6 +12833,7 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
                link_info->support_auto_speeds = link_info->support_speeds;
 
        bnxt_init_ethtool_link_settings(bp);
+       mutex_unlock(&bp->link_lock);
        return 0;
 }
 
@@ -13085,6 +13105,12 @@ static void bnxt_vpd_read_info(struct bnxt *bp)
                goto exit;
        }
 
+       i = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA);
+       if (i < 0) {
+               netdev_err(bp->dev, "VPD READ-Only not found\n");
+               goto exit;
+       }
+
        ro_size = pci_vpd_lrdt_size(&vpd_data[i]);
        i += PCI_VPD_LRDT_TAG_SIZE;
        if (i + ro_size > vpd_size)
@@ -13356,9 +13382,9 @@ init_err_cleanup:
 
 init_err_pci_clean:
        bnxt_hwrm_func_drv_unrgtr(bp);
-       bnxt_free_hwrm_short_cmd_req(bp);
        bnxt_free_hwrm_resources(bp);
        bnxt_ethtool_free(bp);
+       bnxt_ptp_clear(bp);
        kfree(bp->ptp_cfg);
        bp->ptp_cfg = NULL;
        kfree(bp->fw_health);
index ba4e0fc..a8212dc 100644 (file)
@@ -496,6 +496,16 @@ struct rx_tpa_end_cmp_ext {
        !!((data1) &                                                    \
           ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED)
 
+#define BNXT_EVENT_ERROR_REPORT_TYPE(data1)                            \
+       (((data1) &                                                     \
+         ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK) >>\
+        ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT)
+
+#define BNXT_EVENT_INVALID_SIGNAL_DATA(data2)                          \
+       (((data2) &                                                     \
+         ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_MASK) >>\
+        ASYNC_EVENT_CMPL_ERROR_REPORT_INVALID_SIGNAL_EVENT_DATA2_PIN_ID_SFT)
+
 struct nqe_cn {
        __le16  type;
        #define NQ_CN_TYPE_MASK           0x3fUL
@@ -586,15 +596,17 @@ struct nqe_cn {
 #define MAX_TPA_SEGS_P5        0x3f
 
 #if (BNXT_PAGE_SHIFT == 16)
-#define MAX_RX_PAGES   1
+#define MAX_RX_PAGES_AGG_ENA   1
+#define MAX_RX_PAGES   4
 #define MAX_RX_AGG_PAGES       4
 #define MAX_TX_PAGES   1
-#define MAX_CP_PAGES   8
+#define MAX_CP_PAGES   16
 #else
-#define MAX_RX_PAGES   8
+#define MAX_RX_PAGES_AGG_ENA   8
+#define MAX_RX_PAGES   32
 #define MAX_RX_AGG_PAGES       32
 #define MAX_TX_PAGES   8
-#define MAX_CP_PAGES   64
+#define MAX_CP_PAGES   128
 #endif
 
 #define RX_DESC_CNT (BNXT_PAGE_SIZE / sizeof(struct rx_bd))
@@ -612,6 +624,7 @@ struct nqe_cn {
 #define HW_CMPD_RING_SIZE (sizeof(struct tx_cmp) * CP_DESC_CNT)
 
 #define BNXT_MAX_RX_DESC_CNT           (RX_DESC_CNT * MAX_RX_PAGES - 1)
+#define BNXT_MAX_RX_DESC_CNT_JUM_ENA   (RX_DESC_CNT * MAX_RX_PAGES_AGG_ENA - 1)
 #define BNXT_MAX_RX_JUM_DESC_CNT       (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1)
 #define BNXT_MAX_TX_DESC_CNT           (TX_DESC_CNT * MAX_TX_PAGES - 1)
 
@@ -656,37 +669,7 @@ struct nqe_cn {
 #define RING_CMP(idx)          ((idx) & bp->cp_ring_mask)
 #define NEXT_CMP(idx)          RING_CMP(ADV_RAW_CMP(idx, 1))
 
-#define BNXT_HWRM_MAX_REQ_LEN          (bp->hwrm_max_req_len)
-#define BNXT_HWRM_SHORT_REQ_LEN                sizeof(struct hwrm_short_input)
 #define DFLT_HWRM_CMD_TIMEOUT          500
-#define HWRM_CMD_MAX_TIMEOUT           40000
-#define SHORT_HWRM_CMD_TIMEOUT         20
-#define HWRM_CMD_TIMEOUT               (bp->hwrm_cmd_timeout)
-#define HWRM_RESET_TIMEOUT             ((HWRM_CMD_TIMEOUT) * 4)
-#define HWRM_COREDUMP_TIMEOUT          ((HWRM_CMD_TIMEOUT) * 12)
-#define BNXT_HWRM_REQ_MAX_SIZE         128
-#define BNXT_HWRM_REQS_PER_PAGE                (BNXT_PAGE_SIZE /       \
-                                        BNXT_HWRM_REQ_MAX_SIZE)
-#define HWRM_SHORT_MIN_TIMEOUT         3
-#define HWRM_SHORT_MAX_TIMEOUT         10
-#define HWRM_SHORT_TIMEOUT_COUNTER     5
-
-#define HWRM_MIN_TIMEOUT               25
-#define HWRM_MAX_TIMEOUT               40
-
-#define HWRM_WAIT_MUST_ABORT(bp, req)                                  \
-       (le16_to_cpu((req)->req_type) != HWRM_VER_GET &&                \
-        !bnxt_is_fw_healthy(bp))
-
-#define HWRM_TOTAL_TIMEOUT(n)  (((n) <= HWRM_SHORT_TIMEOUT_COUNTER) ?  \
-       ((n) * HWRM_SHORT_MIN_TIMEOUT) :                                \
-       (HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT +          \
-        ((n) - HWRM_SHORT_TIMEOUT_COUNTER) * HWRM_MIN_TIMEOUT))
-
-#define HWRM_VALID_BIT_DELAY_USEC      150
-
-#define BNXT_HWRM_CHNL_CHIMP   0
-#define BNXT_HWRM_CHNL_KONG    1
 
 #define BNXT_RX_EVENT          1
 #define BNXT_AGG_EVENT         2
@@ -926,6 +909,8 @@ struct bnxt_rx_sw_stats {
        u64                     rx_l4_csum_errors;
        u64                     rx_resets;
        u64                     rx_buf_errors;
+       u64                     rx_oom_discards;
+       u64                     rx_netpoll_discards;
 };
 
 struct bnxt_cmn_sw_stats {
@@ -963,11 +948,11 @@ struct bnxt_cp_ring_info {
        struct dim              dim;
 
        union {
-               struct tx_cmp   *cp_desc_ring[MAX_CP_PAGES];
-               struct nqe_cn   *nq_desc_ring[MAX_CP_PAGES];
+               struct tx_cmp   **cp_desc_ring;
+               struct nqe_cn   **nq_desc_ring;
        };
 
-       dma_addr_t              cp_desc_mapping[MAX_CP_PAGES];
+       dma_addr_t              *cp_desc_mapping;
 
        struct bnxt_stats_mem   stats;
        u32                     hw_stats_ctx_id;
@@ -1888,19 +1873,15 @@ struct bnxt {
        #define BNXT_FW_CAP_VLAN_RX_STRIP               0x01000000
        #define BNXT_FW_CAP_VLAN_TX_INSERT              0x02000000
        #define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED      0x04000000
+       #define BNXT_FW_CAP_PTP_PPS                     0x10000000
        #define BNXT_FW_CAP_RING_MONITOR                0x40000000
 
 #define BNXT_NEW_RM(bp)                ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
        u32                     hwrm_spec_code;
        u16                     hwrm_cmd_seq;
        u16                     hwrm_cmd_kong_seq;
-       u16                     hwrm_intr_seq_id;
-       void                    *hwrm_short_cmd_req_addr;
-       dma_addr_t              hwrm_short_cmd_req_dma_addr;
-       void                    *hwrm_cmd_resp_addr;
-       dma_addr_t              hwrm_cmd_resp_dma_addr;
-       void                    *hwrm_cmd_kong_resp_addr;
-       dma_addr_t              hwrm_cmd_kong_resp_dma_addr;
+       struct dma_pool         *hwrm_dma_pool;
+       struct hlist_head       hwrm_pending_list;
 
        struct rtnl_link_stats64        net_stats_prev;
        struct bnxt_stats_mem   port_stats;
@@ -2000,7 +1981,7 @@ struct bnxt {
        struct mutex            sriov_lock;
 #endif
 
-#if BITS_PER_LONG == 32
+#ifndef writeq
        /* ensure atomic 64-bit doorbell writes on 32-bit systems. */
        spinlock_t              db_lock;
 #endif
@@ -2129,7 +2110,7 @@ static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
                ((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask);
 }
 
-#if BITS_PER_LONG == 32
+#ifndef writeq
 #define writeq(val64, db)                      \
 do {                                           \
        spin_lock(&bp->db_lock);                \
@@ -2171,63 +2152,6 @@ static inline void bnxt_db_write(struct bnxt *bp, struct bnxt_db_info *db,
        }
 }
 
-static inline bool bnxt_cfa_hwrm_message(u16 req_type)
-{
-       switch (req_type) {
-       case HWRM_CFA_ENCAP_RECORD_ALLOC:
-       case HWRM_CFA_ENCAP_RECORD_FREE:
-       case HWRM_CFA_DECAP_FILTER_ALLOC:
-       case HWRM_CFA_DECAP_FILTER_FREE:
-       case HWRM_CFA_EM_FLOW_ALLOC:
-       case HWRM_CFA_EM_FLOW_FREE:
-       case HWRM_CFA_EM_FLOW_CFG:
-       case HWRM_CFA_FLOW_ALLOC:
-       case HWRM_CFA_FLOW_FREE:
-       case HWRM_CFA_FLOW_INFO:
-       case HWRM_CFA_FLOW_FLUSH:
-       case HWRM_CFA_FLOW_STATS:
-       case HWRM_CFA_METER_PROFILE_ALLOC:
-       case HWRM_CFA_METER_PROFILE_FREE:
-       case HWRM_CFA_METER_PROFILE_CFG:
-       case HWRM_CFA_METER_INSTANCE_ALLOC:
-       case HWRM_CFA_METER_INSTANCE_FREE:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static inline bool bnxt_kong_hwrm_message(struct bnxt *bp, struct input *req)
-{
-       return (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL &&
-               bnxt_cfa_hwrm_message(le16_to_cpu(req->req_type)));
-}
-
-static inline bool bnxt_hwrm_kong_chnl(struct bnxt *bp, struct input *req)
-{
-       return (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL &&
-               req->resp_addr == cpu_to_le64(bp->hwrm_cmd_kong_resp_dma_addr));
-}
-
-static inline void *bnxt_get_hwrm_resp_addr(struct bnxt *bp, void *req)
-{
-       if (bnxt_hwrm_kong_chnl(bp, (struct input *)req))
-               return bp->hwrm_cmd_kong_resp_addr;
-       else
-               return bp->hwrm_cmd_resp_addr;
-}
-
-static inline u16 bnxt_get_hwrm_seq_id(struct bnxt *bp, u16 dst)
-{
-       u16 seq_id;
-
-       if (dst == BNXT_HWRM_CHNL_CHIMP)
-               seq_id = bp->hwrm_cmd_seq++;
-       else
-               seq_id = bp->hwrm_cmd_kong_seq++;
-       return seq_id;
-}
-
 extern const u16 bnxt_lhint_arr[];
 
 int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
@@ -2237,11 +2161,6 @@ u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx);
 void bnxt_set_tpa_flags(struct bnxt *bp);
 void bnxt_set_ring_params(struct bnxt *);
 int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
-void bnxt_hwrm_cmd_hdr_init(struct bnxt *, void *, u16, u16, u16);
-int _hwrm_send_message(struct bnxt *, void *, u32, int);
-int _hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 len, int timeout);
-int hwrm_send_message(struct bnxt *, void *, u32, int);
-int hwrm_send_message_silent(struct bnxt *, void *, u32, int);
 int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap,
                            int bmap_size, bool async_only);
 int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings);
index 8a68df4..228a5db 100644 (file)
@@ -18,6 +18,7 @@
 #include <rdma/ib_verbs.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
+#include "bnxt_hwrm.h"
 #include "bnxt_dcb.h"
 
 #ifdef CONFIG_BNXT_DCB
@@ -38,38 +39,43 @@ static int bnxt_queue_to_tc(struct bnxt *bp, u8 queue_id)
 
 static int bnxt_hwrm_queue_pri2cos_cfg(struct bnxt *bp, struct ieee_ets *ets)
 {
-       struct hwrm_queue_pri2cos_cfg_input req = {0};
+       struct hwrm_queue_pri2cos_cfg_input *req;
        u8 *pri2cos;
-       int i;
+       int rc, i;
+
+       rc = hwrm_req_init(bp, req, HWRM_QUEUE_PRI2COS_CFG);
+       if (rc)
+               return rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PRI2COS_CFG, -1, -1);
-       req.flags = cpu_to_le32(QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR |
-                               QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN);
+       req->flags = cpu_to_le32(QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR |
+                                QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN);
 
-       pri2cos = &req.pri0_cos_queue_id;
+       pri2cos = &req->pri0_cos_queue_id;
        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
                u8 qidx;
 
-               req.enables |= cpu_to_le32(
+               req->enables |= cpu_to_le32(
                        QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID << i);
 
                qidx = bp->tc_to_qidx[ets->prio_tc[i]];
                pri2cos[i] = bp->q_info[qidx].queue_id;
        }
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets)
 {
-       struct hwrm_queue_pri2cos_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_queue_pri2cos_qcfg_input req = {0};
-       int rc = 0;
+       struct hwrm_queue_pri2cos_qcfg_output *resp;
+       struct hwrm_queue_pri2cos_qcfg_input *req;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
-       req.flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN);
+       rc = hwrm_req_init(bp, req, HWRM_QUEUE_PRI2COS_QCFG);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc) {
                u8 *pri2cos = &resp->pri0_cos_queue_id;
                int i;
@@ -83,23 +89,26 @@ static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets)
                                ets->prio_tc[i] = tc;
                }
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
                                      u8 max_tc)
 {
-       struct hwrm_queue_cos2bw_cfg_input req = {0};
+       struct hwrm_queue_cos2bw_cfg_input *req;
        struct bnxt_cos2bw_cfg cos2bw;
        void *data;
-       int i;
+       int rc, i;
+
+       rc = hwrm_req_init(bp, req, HWRM_QUEUE_COS2BW_CFG);
+       if (rc)
+               return rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1);
        for (i = 0; i < max_tc; i++) {
                u8 qidx = bp->tc_to_qidx[i];
 
-               req.enables |= cpu_to_le32(
+               req->enables |= cpu_to_le32(
                        QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID <<
                        qidx);
 
@@ -120,30 +129,32 @@ static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
                                cpu_to_le32((ets->tc_tx_bw[i] * 100) |
                                            BW_VALUE_UNIT_PERCENT1_100);
                }
-               data = &req.unused_0 + qidx * (sizeof(cos2bw) - 4);
+               data = &req->unused_0 + qidx * (sizeof(cos2bw) - 4);
                memcpy(data, &cos2bw.queue_id, sizeof(cos2bw) - 4);
                if (qidx == 0) {
-                       req.queue_id0 = cos2bw.queue_id;
-                       req.unused_0 = 0;
+                       req->queue_id0 = cos2bw.queue_id;
+                       req->unused_0 = 0;
                }
        }
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets)
 {
-       struct hwrm_queue_cos2bw_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_queue_cos2bw_qcfg_input req = {0};
+       struct hwrm_queue_cos2bw_qcfg_output *resp;
+       struct hwrm_queue_cos2bw_qcfg_input *req;
        struct bnxt_cos2bw_cfg cos2bw;
        void *data;
        int rc, i;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_QCFG, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_QUEUE_COS2BW_QCFG);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc) {
-               mutex_unlock(&bp->hwrm_cmd_lock);
+               hwrm_req_drop(bp, req);
                return rc;
        }
 
@@ -167,7 +178,7 @@ static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets)
                        ets->tc_tx_bw[tc] = cos2bw.bw_weight;
                }
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return 0;
 }
 
@@ -229,11 +240,12 @@ static int bnxt_queue_remap(struct bnxt *bp, unsigned int lltc_mask)
 
 static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc)
 {
-       struct hwrm_queue_pfcenable_cfg_input req = {0};
+       struct hwrm_queue_pfcenable_cfg_input *req;
        struct ieee_ets *my_ets = bp->ieee_ets;
        unsigned int tc_mask = 0, pri_mask = 0;
        u8 i, pri, lltc_count = 0;
        bool need_q_remap = false;
+       int rc;
 
        if (!my_ets)
                return -EINVAL;
@@ -266,38 +278,43 @@ static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc)
        if (need_q_remap)
                bnxt_queue_remap(bp, tc_mask);
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_CFG, -1, -1);
-       req.flags = cpu_to_le32(pri_mask);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_QUEUE_PFCENABLE_CFG);
+       if (rc)
+               return rc;
+
+       req->flags = cpu_to_le32(pri_mask);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_hwrm_queue_pfc_qcfg(struct bnxt *bp, struct ieee_pfc *pfc)
 {
-       struct hwrm_queue_pfcenable_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_queue_pfcenable_qcfg_input req = {0};
+       struct hwrm_queue_pfcenable_qcfg_output *resp;
+       struct hwrm_queue_pfcenable_qcfg_input *req;
        u8 pri_mask;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_QCFG, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_QUEUE_PFCENABLE_QCFG);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (rc) {
-               mutex_unlock(&bp->hwrm_cmd_lock);
+               hwrm_req_drop(bp, req);
                return rc;
        }
 
        pri_mask = le32_to_cpu(resp->flags);
        pfc->pfc_en = pri_mask;
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return 0;
 }
 
 static int bnxt_hwrm_set_dcbx_app(struct bnxt *bp, struct dcb_app *app,
                                  bool add)
 {
-       struct hwrm_fw_set_structured_data_input set = {0};
-       struct hwrm_fw_get_structured_data_input get = {0};
+       struct hwrm_fw_set_structured_data_input *set;
+       struct hwrm_fw_get_structured_data_input *get;
        struct hwrm_struct_data_dcbx_app *fw_app;
        struct hwrm_struct_hdr *data;
        dma_addr_t mapping;
@@ -307,19 +324,26 @@ static int bnxt_hwrm_set_dcbx_app(struct bnxt *bp, struct dcb_app *app,
        if (bp->hwrm_spec_code < 0x10601)
                return 0;
 
+       rc = hwrm_req_init(bp, get, HWRM_FW_GET_STRUCTURED_DATA);
+       if (rc)
+               return rc;
+
+       hwrm_req_hold(bp, get);
+       hwrm_req_alloc_flags(bp, get, GFP_KERNEL | __GFP_ZERO);
+
        n = IEEE_8021QAZ_MAX_TCS;
        data_len = sizeof(*data) + sizeof(*fw_app) * n;
-       data = dma_alloc_coherent(&bp->pdev->dev, data_len, &mapping,
-                                 GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
+       data = hwrm_req_dma_slice(bp, get, data_len, &mapping);
+       if (!data) {
+               rc = -ENOMEM;
+               goto set_app_exit;
+       }
 
-       bnxt_hwrm_cmd_hdr_init(bp, &get, HWRM_FW_GET_STRUCTURED_DATA, -1, -1);
-       get.dest_data_addr = cpu_to_le64(mapping);
-       get.structure_id = cpu_to_le16(STRUCT_HDR_STRUCT_ID_DCBX_APP);
-       get.subtype = cpu_to_le16(HWRM_STRUCT_DATA_SUBTYPE_HOST_OPERATIONAL);
-       get.count = 0;
-       rc = hwrm_send_message(bp, &get, sizeof(get), HWRM_CMD_TIMEOUT);
+       get->dest_data_addr = cpu_to_le64(mapping);
+       get->structure_id = cpu_to_le16(STRUCT_HDR_STRUCT_ID_DCBX_APP);
+       get->subtype = cpu_to_le16(HWRM_STRUCT_DATA_SUBTYPE_HOST_OPERATIONAL);
+       get->count = 0;
+       rc = hwrm_req_send(bp, get);
        if (rc)
                goto set_app_exit;
 
@@ -365,44 +389,49 @@ static int bnxt_hwrm_set_dcbx_app(struct bnxt *bp, struct dcb_app *app,
        data->len = cpu_to_le16(sizeof(*fw_app) * n);
        data->subtype = cpu_to_le16(HWRM_STRUCT_DATA_SUBTYPE_HOST_OPERATIONAL);
 
-       bnxt_hwrm_cmd_hdr_init(bp, &set, HWRM_FW_SET_STRUCTURED_DATA, -1, -1);
-       set.src_data_addr = cpu_to_le64(mapping);
-       set.data_len = cpu_to_le16(sizeof(*data) + sizeof(*fw_app) * n);
-       set.hdr_cnt = 1;
-       rc = hwrm_send_message(bp, &set, sizeof(set), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, set, HWRM_FW_SET_STRUCTURED_DATA);
+       if (rc)
+               goto set_app_exit;
+
+       set->src_data_addr = cpu_to_le64(mapping);
+       set->data_len = cpu_to_le16(sizeof(*data) + sizeof(*fw_app) * n);
+       set->hdr_cnt = 1;
+       rc = hwrm_req_send(bp, set);
 
 set_app_exit:
-       dma_free_coherent(&bp->pdev->dev, data_len, data, mapping);
+       hwrm_req_drop(bp, get); /* dropping get request and associated slice */
        return rc;
 }
 
 static int bnxt_hwrm_queue_dscp_qcaps(struct bnxt *bp)
 {
-       struct hwrm_queue_dscp_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_queue_dscp_qcaps_input req = {0};
+       struct hwrm_queue_dscp_qcaps_output *resp;
+       struct hwrm_queue_dscp_qcaps_input *req;
        int rc;
 
        bp->max_dscp_value = 0;
        if (bp->hwrm_spec_code < 0x10800 || BNXT_VF(bp))
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_DSCP_QCAPS, -1, -1);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_QUEUE_DSCP_QCAPS);
+       if (rc)
+               return rc;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send_silent(bp, req);
        if (!rc) {
                bp->max_dscp_value = (1 << resp->num_dscp_bits) - 1;
                if (bp->max_dscp_value < 0x3f)
                        bp->max_dscp_value = 0;
        }
-
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
 static int bnxt_hwrm_queue_dscp2pri_cfg(struct bnxt *bp, struct dcb_app *app,
                                        bool add)
 {
-       struct hwrm_queue_dscp2pri_cfg_input req = {0};
+       struct hwrm_queue_dscp2pri_cfg_input *req;
        struct bnxt_dscp2pri_entry *dscp2pri;
        dma_addr_t mapping;
        int rc;
@@ -410,23 +439,25 @@ static int bnxt_hwrm_queue_dscp2pri_cfg(struct bnxt *bp, struct dcb_app *app,
        if (bp->hwrm_spec_code < 0x10800)
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_DSCP2PRI_CFG, -1, -1);
-       dscp2pri = dma_alloc_coherent(&bp->pdev->dev, sizeof(*dscp2pri),
-                                     &mapping, GFP_KERNEL);
-       if (!dscp2pri)
+       rc = hwrm_req_init(bp, req, HWRM_QUEUE_DSCP2PRI_CFG);
+       if (rc)
+               return rc;
+
+       dscp2pri = hwrm_req_dma_slice(bp, req, sizeof(*dscp2pri), &mapping);
+       if (!dscp2pri) {
+               hwrm_req_drop(bp, req);
                return -ENOMEM;
+       }
 
-       req.src_data_addr = cpu_to_le64(mapping);
+       req->src_data_addr = cpu_to_le64(mapping);
        dscp2pri->dscp = app->protocol;
        if (add)
                dscp2pri->mask = 0x3f;
        else
                dscp2pri->mask = 0;
        dscp2pri->pri = app->priority;
-       req.entry_cnt = cpu_to_le16(1);
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-       dma_free_coherent(&bp->pdev->dev, sizeof(*dscp2pri), dscp2pri,
-                         mapping);
+       req->entry_cnt = cpu_to_le16(1);
+       rc = hwrm_req_send(bp, req);
        return rc;
 }
 
index 64381be..1423cc6 100644 (file)
@@ -12,6 +12,7 @@
 #include <net/devlink.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
+#include "bnxt_hwrm.h"
 #include "bnxt_vfr.h"
 #include "bnxt_devlink.h"
 #include "bnxt_ethtool.h"
@@ -354,28 +355,34 @@ static void bnxt_copy_from_nvm_data(union devlink_param_value *dst,
 static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp,
                                     union devlink_param_value *nvm_cfg_ver)
 {
-       struct hwrm_nvm_get_variable_input req = {0};
+       struct hwrm_nvm_get_variable_input *req;
        union bnxt_nvm_data *data;
        dma_addr_t data_dma_addr;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_VARIABLE, -1, -1);
-       data = dma_alloc_coherent(&bp->pdev->dev, sizeof(*data),
-                                 &data_dma_addr, GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
+       rc = hwrm_req_init(bp, req, HWRM_NVM_GET_VARIABLE);
+       if (rc)
+               return rc;
+
+       data = hwrm_req_dma_slice(bp, req, sizeof(*data), &data_dma_addr);
+       if (!data) {
+               rc = -ENOMEM;
+               goto exit;
+       }
 
-       req.dest_data_addr = cpu_to_le64(data_dma_addr);
-       req.data_len = cpu_to_le16(BNXT_NVM_CFG_VER_BITS);
-       req.option_num = cpu_to_le16(NVM_OFF_NVM_CFG_VER);
+       hwrm_req_hold(bp, req);
+       req->dest_data_addr = cpu_to_le64(data_dma_addr);
+       req->data_len = cpu_to_le16(BNXT_NVM_CFG_VER_BITS);
+       req->option_num = cpu_to_le16(NVM_OFF_NVM_CFG_VER);
 
-       rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_send_silent(bp, req);
        if (!rc)
                bnxt_copy_from_nvm_data(nvm_cfg_ver, data,
                                        BNXT_NVM_CFG_VER_BITS,
                                        BNXT_NVM_CFG_VER_BYTES);
 
-       dma_free_coherent(&bp->pdev->dev, sizeof(*data), data, data_dma_addr);
+exit:
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -562,17 +569,20 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 }
 
 static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
-                            int msg_len, union devlink_param_value *val)
+                            union devlink_param_value *val)
 {
        struct hwrm_nvm_get_variable_input *req = msg;
        struct bnxt_dl_nvm_param nvm_param;
+       struct hwrm_err_output *resp;
        union bnxt_nvm_data *data;
        dma_addr_t data_dma_addr;
        int idx = 0, rc, i;
 
        /* Get/Set NVM CFG parameter is supported only on PFs */
-       if (BNXT_VF(bp))
+       if (BNXT_VF(bp)) {
+               hwrm_req_drop(bp, req);
                return -EPERM;
+       }
 
        for (i = 0; i < ARRAY_SIZE(nvm_params); i++) {
                if (nvm_params[i].id == param_id) {
@@ -581,18 +591,22 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
                }
        }
 
-       if (i == ARRAY_SIZE(nvm_params))
+       if (i == ARRAY_SIZE(nvm_params)) {
+               hwrm_req_drop(bp, req);
                return -EOPNOTSUPP;
+       }
 
        if (nvm_param.dir_type == BNXT_NVM_PORT_CFG)
                idx = bp->pf.port_id;
        else if (nvm_param.dir_type == BNXT_NVM_FUNC_CFG)
                idx = bp->pf.fw_fid - BNXT_FIRST_PF_FID;
 
-       data = dma_alloc_coherent(&bp->pdev->dev, sizeof(*data),
-                                 &data_dma_addr, GFP_KERNEL);
-       if (!data)
+       data = hwrm_req_dma_slice(bp, req, sizeof(*data), &data_dma_addr);
+
+       if (!data) {
+               hwrm_req_drop(bp, req);
                return -ENOMEM;
+       }
 
        req->dest_data_addr = cpu_to_le64(data_dma_addr);
        req->data_len = cpu_to_le16(nvm_param.nvm_num_bits);
@@ -601,26 +615,24 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
        if (idx)
                req->dimensions = cpu_to_le16(1);
 
+       resp = hwrm_req_hold(bp, req);
        if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) {
                bnxt_copy_to_nvm_data(data, val, nvm_param.nvm_num_bits,
                                      nvm_param.dl_num_bytes);
-               rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send(bp, msg);
        } else {
-               rc = hwrm_send_message_silent(bp, msg, msg_len,
-                                             HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send_silent(bp, msg);
                if (!rc) {
                        bnxt_copy_from_nvm_data(val, data,
                                                nvm_param.nvm_num_bits,
                                                nvm_param.dl_num_bytes);
                } else {
-                       struct hwrm_err_output *resp = bp->hwrm_cmd_resp_addr;
-
                        if (resp->cmd_err ==
                                NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST)
                                rc = -EOPNOTSUPP;
                }
        }
-       dma_free_coherent(&bp->pdev->dev, sizeof(*data), data, data_dma_addr);
+       hwrm_req_drop(bp, req);
        if (rc == -EACCES)
                netdev_err(bp->dev, "PF does not have admin privileges to modify NVM config\n");
        return rc;
@@ -629,15 +641,17 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
 static int bnxt_dl_nvm_param_get(struct devlink *dl, u32 id,
                                 struct devlink_param_gset_ctx *ctx)
 {
-       struct hwrm_nvm_get_variable_input req = {0};
        struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+       struct hwrm_nvm_get_variable_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_VARIABLE, -1, -1);
-       rc = bnxt_hwrm_nvm_req(bp, id, &req, sizeof(req), &ctx->val);
-       if (!rc)
-               if (id == BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK)
-                       ctx->val.vbool = !ctx->val.vbool;
+       rc = hwrm_req_init(bp, req, HWRM_NVM_GET_VARIABLE);
+       if (rc)
+               return rc;
+
+       rc = bnxt_hwrm_nvm_req(bp, id, req, &ctx->val);
+       if (!rc && id == BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK)
+               ctx->val.vbool = !ctx->val.vbool;
 
        return rc;
 }
@@ -645,15 +659,18 @@ static int bnxt_dl_nvm_param_get(struct devlink *dl, u32 id,
 static int bnxt_dl_nvm_param_set(struct devlink *dl, u32 id,
                                 struct devlink_param_gset_ctx *ctx)
 {
-       struct hwrm_nvm_set_variable_input req = {0};
        struct bnxt *bp = bnxt_get_bp_from_dl(dl);
+       struct hwrm_nvm_set_variable_input *req;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_SET_VARIABLE, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_NVM_SET_VARIABLE);
+       if (rc)
+               return rc;
 
        if (id == BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK)
                ctx->val.vbool = !ctx->val.vbool;
 
-       return bnxt_hwrm_nvm_req(bp, id, &req, sizeof(req), &ctx->val);
+       return bnxt_hwrm_nvm_req(bp, id, req, &ctx->val);
 }
 
 static int bnxt_dl_msix_validate(struct devlink *dl, u32 id,
@@ -743,14 +760,17 @@ static void bnxt_dl_params_unregister(struct bnxt *bp)
 
 int bnxt_dl_register(struct bnxt *bp)
 {
+       const struct devlink_ops *devlink_ops;
        struct devlink_port_attrs attrs = {};
        struct devlink *dl;
        int rc;
 
        if (BNXT_PF(bp))
-               dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
+               devlink_ops = &bnxt_dl_ops;
        else
-               dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
+               devlink_ops = &bnxt_vf_dl_ops;
+
+       dl = devlink_alloc(devlink_ops, sizeof(struct bnxt_dl), &bp->pdev->dev);
        if (!dl) {
                netdev_warn(bp->dev, "devlink_alloc failed\n");
                return -ENOMEM;
@@ -763,7 +783,7 @@ int bnxt_dl_register(struct bnxt *bp)
            bp->hwrm_spec_code > 0x10803)
                bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
 
-       rc = devlink_register(dl, &bp->pdev->dev);
+       rc = devlink_register(dl);
        if (rc) {
                netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc);
                goto err_dl_free;
index 786ca51..b056e3c 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/timecounter.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
+#include "bnxt_hwrm.h"
 #include "bnxt_xdp.h"
 #include "bnxt_ptp.h"
 #include "bnxt_ethtool.h"
@@ -49,7 +50,9 @@ static void bnxt_set_msglevel(struct net_device *dev, u32 value)
 }
 
 static int bnxt_get_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *coal)
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct bnxt *bp = netdev_priv(dev);
        struct bnxt_coal *hw_coal;
@@ -79,7 +82,9 @@ static int bnxt_get_coalesce(struct net_device *dev,
 }
 
 static int bnxt_set_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *coal)
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct bnxt *bp = netdev_priv(dev);
        bool update_stats = false;
@@ -303,6 +308,7 @@ static const char * const bnxt_cmn_sw_stats_str[] = {
 enum {
        RX_TOTAL_DISCARDS,
        TX_TOTAL_DISCARDS,
+       RX_NETPOLL_DISCARDS,
 };
 
 static struct {
@@ -311,6 +317,7 @@ static struct {
 } bnxt_sw_func_stats[] = {
        {0, "rx_total_discard_pkts"},
        {0, "tx_total_discard_pkts"},
+       {0, "rx_total_netpoll_discards"},
 };
 
 #define NUM_RING_RX_SW_STATS           ARRAY_SIZE(bnxt_rx_sw_stats_str)
@@ -599,6 +606,8 @@ skip_tpa_ring_stats:
                        BNXT_GET_RING_STATS64(sw_stats, rx_discard_pkts);
                bnxt_sw_func_stats[TX_TOTAL_DISCARDS].counter +=
                        BNXT_GET_RING_STATS64(sw_stats, tx_discard_pkts);
+               bnxt_sw_func_stats[RX_NETPOLL_DISCARDS].counter +=
+                       cpr->sw_stats.rx.rx_netpoll_discards;
        }
 
        for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++, j++)
@@ -768,8 +777,13 @@ static void bnxt_get_ringparam(struct net_device *dev,
 {
        struct bnxt *bp = netdev_priv(dev);
 
-       ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
-       ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+       if (bp->flags & BNXT_FLAG_AGG_RINGS) {
+               ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
+               ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+       } else {
+               ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
+               ering->rx_jumbo_max_pending = 0;
+       }
        ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT;
 
        ering->rx_pending = bp->rx_ring_size;
@@ -1352,7 +1366,7 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs,
                          void *_p)
 {
        struct pcie_ctx_hw_stats *hw_pcie_stats;
-       struct hwrm_pcie_qstats_input req = {0};
+       struct hwrm_pcie_qstats_input *req;
        struct bnxt *bp = netdev_priv(dev);
        dma_addr_t hw_pcie_stats_addr;
        int rc;
@@ -1363,18 +1377,21 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs,
        if (!(bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED))
                return;
 
-       hw_pcie_stats = dma_alloc_coherent(&bp->pdev->dev,
-                                          sizeof(*hw_pcie_stats),
-                                          &hw_pcie_stats_addr, GFP_KERNEL);
-       if (!hw_pcie_stats)
+       if (hwrm_req_init(bp, req, HWRM_PCIE_QSTATS))
                return;
 
+       hw_pcie_stats = hwrm_req_dma_slice(bp, req, sizeof(*hw_pcie_stats),
+                                          &hw_pcie_stats_addr);
+       if (!hw_pcie_stats) {
+               hwrm_req_drop(bp, req);
+               return;
+       }
+
        regs->version = 1;
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PCIE_QSTATS, -1, -1);
-       req.pcie_stat_size = cpu_to_le16(sizeof(*hw_pcie_stats));
-       req.pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       hwrm_req_hold(bp, req); /* hold on to slice */
+       req->pcie_stat_size = cpu_to_le16(sizeof(*hw_pcie_stats));
+       req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr);
+       rc = hwrm_req_send(bp, req);
        if (!rc) {
                __le64 *src = (__le64 *)hw_pcie_stats;
                u64 *dst = (u64 *)(_p + BNXT_PXP_REG_LEN);
@@ -1383,9 +1400,7 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs,
                for (i = 0; i < sizeof(*hw_pcie_stats) / sizeof(__le64); i++)
                        dst[i] = le64_to_cpu(src[i]);
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
-       dma_free_coherent(&bp->pdev->dev, sizeof(*hw_pcie_stats), hw_pcie_stats,
-                         hw_pcie_stats_addr);
+       hwrm_req_drop(bp, req);
 }
 
 static void bnxt_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -1965,7 +1980,7 @@ static u32 bnxt_ethtool_forced_fec_to_fw(struct bnxt_link_info *link_info,
 static int bnxt_set_fecparam(struct net_device *dev,
                             struct ethtool_fecparam *fecparam)
 {
-       struct hwrm_port_phy_cfg_input req = {0};
+       struct hwrm_port_phy_cfg_input *req;
        struct bnxt *bp = netdev_priv(dev);
        struct bnxt_link_info *link_info;
        u32 new_cfg, fec = fecparam->fec;
@@ -1997,9 +2012,11 @@ static int bnxt_set_fecparam(struct net_device *dev,
        }
 
 apply_fec:
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
-       req.flags = cpu_to_le32(new_cfg | PORT_PHY_CFG_REQ_FLAGS_RESET_PHY);
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_CFG);
+       if (rc)
+               return rc;
+       req->flags = cpu_to_le32(new_cfg | PORT_PHY_CFG_REQ_FLAGS_RESET_PHY);
+       rc = hwrm_req_send(bp, req);
        /* update current settings */
        if (!rc) {
                mutex_lock(&bp->link_lock);
@@ -2093,19 +2110,22 @@ static u32 bnxt_get_link(struct net_device *dev)
 int bnxt_hwrm_nvm_get_dev_info(struct bnxt *bp,
                               struct hwrm_nvm_get_dev_info_output *nvm_dev_info)
 {
-       struct hwrm_nvm_get_dev_info_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_nvm_get_dev_info_input req = {0};
+       struct hwrm_nvm_get_dev_info_output *resp;
+       struct hwrm_nvm_get_dev_info_input *req;
        int rc;
 
        if (BNXT_VF(bp))
                return -EOPNOTSUPP;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_DEV_INFO, -1, -1);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_NVM_GET_DEV_INFO);
+       if (rc)
+               return rc;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc)
                memcpy(nvm_dev_info, resp, sizeof(*resp));
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -2118,77 +2138,67 @@ static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
                                u16 ext, u16 *index, u32 *item_length,
                                u32 *data_length);
 
-static int __bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
-                             u16 dir_ordinal, u16 dir_ext, u16 dir_attr,
-                             u32 dir_item_len, const u8 *data,
-                             size_t data_len)
+static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
+                           u16 dir_ordinal, u16 dir_ext, u16 dir_attr,
+                           u32 dir_item_len, const u8 *data,
+                           size_t data_len)
 {
        struct bnxt *bp = netdev_priv(dev);
+       struct hwrm_nvm_write_input *req;
        int rc;
-       struct hwrm_nvm_write_input req = {0};
-       dma_addr_t dma_handle;
-       u8 *kmem = NULL;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_WRITE, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_NVM_WRITE);
+       if (rc)
+               return rc;
 
-       req.dir_type = cpu_to_le16(dir_type);
-       req.dir_ordinal = cpu_to_le16(dir_ordinal);
-       req.dir_ext = cpu_to_le16(dir_ext);
-       req.dir_attr = cpu_to_le16(dir_attr);
-       req.dir_item_length = cpu_to_le32(dir_item_len);
        if (data_len && data) {
-               req.dir_data_length = cpu_to_le32(data_len);
+               dma_addr_t dma_handle;
+               u8 *kmem;
 
-               kmem = dma_alloc_coherent(&bp->pdev->dev, data_len, &dma_handle,
-                                         GFP_KERNEL);
-               if (!kmem)
+               kmem = hwrm_req_dma_slice(bp, req, data_len, &dma_handle);
+               if (!kmem) {
+                       hwrm_req_drop(bp, req);
                        return -ENOMEM;
+               }
+
+               req->dir_data_length = cpu_to_le32(data_len);
 
                memcpy(kmem, data, data_len);
-               req.host_src_addr = cpu_to_le64(dma_handle);
+               req->host_src_addr = cpu_to_le64(dma_handle);
        }
 
-       rc = _hwrm_send_message(bp, &req, sizeof(req), FLASH_NVRAM_TIMEOUT);
-       if (kmem)
-               dma_free_coherent(&bp->pdev->dev, data_len, kmem, dma_handle);
+       hwrm_req_timeout(bp, req, FLASH_NVRAM_TIMEOUT);
+       req->dir_type = cpu_to_le16(dir_type);
+       req->dir_ordinal = cpu_to_le16(dir_ordinal);
+       req->dir_ext = cpu_to_le16(dir_ext);
+       req->dir_attr = cpu_to_le16(dir_attr);
+       req->dir_item_length = cpu_to_le32(dir_item_len);
+       rc = hwrm_req_send(bp, req);
 
        if (rc == -EACCES)
                bnxt_print_admin_err(bp);
        return rc;
 }
 
-static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
-                           u16 dir_ordinal, u16 dir_ext, u16 dir_attr,
-                           const u8 *data, size_t data_len)
-{
-       struct bnxt *bp = netdev_priv(dev);
-       int rc;
-
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = __bnxt_flash_nvram(dev, dir_type, dir_ordinal, dir_ext, dir_attr,
-                               0, data, data_len);
-       mutex_unlock(&bp->hwrm_cmd_lock);
-       return rc;
-}
-
 static int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type,
                                    u8 self_reset, u8 flags)
 {
-       struct hwrm_fw_reset_input req = {0};
        struct bnxt *bp = netdev_priv(dev);
+       struct hwrm_fw_reset_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_FW_RESET);
+       if (rc)
+               return rc;
 
-       req.embedded_proc_type = proc_type;
-       req.selfrst_status = self_reset;
-       req.flags = flags;
+       req->embedded_proc_type = proc_type;
+       req->selfrst_status = self_reset;
+       req->flags = flags;
 
        if (proc_type == FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP) {
-               rc = hwrm_send_message_silent(bp, &req, sizeof(req),
-                                             HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send_silent(bp, req);
        } else {
-               rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send(bp, req);
                if (rc == -EACCES)
                        bnxt_print_admin_err(bp);
        }
@@ -2326,7 +2336,7 @@ static int bnxt_flash_firmware(struct net_device *dev,
                return -EINVAL;
        }
        rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST,
-                             0, 0, fw_data, fw_size);
+                             0, 0, 0, fw_data, fw_size);
        if (rc == 0)    /* Firmware update successful */
                rc = bnxt_firmware_reset(dev, dir_type);
 
@@ -2379,7 +2389,7 @@ static int bnxt_flash_microcode(struct net_device *dev,
                return -EINVAL;
        }
        rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST,
-                             0, 0, fw_data, fw_size);
+                             0, 0, 0, fw_data, fw_size);
 
        return rc;
 }
@@ -2445,7 +2455,7 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev,
                rc = bnxt_flash_microcode(dev, dir_type, fw->data, fw->size);
        else
                rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST,
-                                     0, 0, fw->data, fw->size);
+                                     0, 0, 0, fw->data, fw->size);
        release_firmware(fw);
        return rc;
 }
@@ -2457,21 +2467,23 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev,
 int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware *fw,
                                   u32 install_type)
 {
-       struct hwrm_nvm_install_update_input install = {0};
-       struct hwrm_nvm_install_update_output resp = {0};
-       struct hwrm_nvm_modify_input modify = {0};
+       struct hwrm_nvm_install_update_input *install;
+       struct hwrm_nvm_install_update_output *resp;
+       struct hwrm_nvm_modify_input *modify;
        struct bnxt *bp = netdev_priv(dev);
        bool defrag_attempted = false;
        dma_addr_t dma_handle;
        u8 *kmem = NULL;
        u32 modify_len;
        u32 item_len;
-       int rc = 0;
        u16 index;
+       int rc;
 
        bnxt_hwrm_fw_set_time(bp);
 
-       bnxt_hwrm_cmd_hdr_init(bp, &modify, HWRM_NVM_MODIFY, -1, -1);
+       rc = hwrm_req_init(bp, modify, HWRM_NVM_MODIFY);
+       if (rc)
+               return rc;
 
        /* Try allocating a large DMA buffer first.  Older fw will
         * cause excessive NVRAM erases when using small blocks.
@@ -2479,22 +2491,33 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware
        modify_len = roundup_pow_of_two(fw->size);
        modify_len = min_t(u32, modify_len, BNXT_PKG_DMA_SIZE);
        while (1) {
-               kmem = dma_alloc_coherent(&bp->pdev->dev, modify_len,
-                                         &dma_handle, GFP_KERNEL);
+               kmem = hwrm_req_dma_slice(bp, modify, modify_len, &dma_handle);
                if (!kmem && modify_len > PAGE_SIZE)
                        modify_len /= 2;
                else
                        break;
        }
-       if (!kmem)
+       if (!kmem) {
+               hwrm_req_drop(bp, modify);
                return -ENOMEM;
+       }
 
-       modify.host_src_addr = cpu_to_le64(dma_handle);
+       rc = hwrm_req_init(bp, install, HWRM_NVM_INSTALL_UPDATE);
+       if (rc) {
+               hwrm_req_drop(bp, modify);
+               return rc;
+       }
 
-       bnxt_hwrm_cmd_hdr_init(bp, &install, HWRM_NVM_INSTALL_UPDATE, -1, -1);
+       hwrm_req_timeout(bp, modify, FLASH_PACKAGE_TIMEOUT);
+       hwrm_req_timeout(bp, install, INSTALL_PACKAGE_TIMEOUT);
+
+       hwrm_req_hold(bp, modify);
+       modify->host_src_addr = cpu_to_le64(dma_handle);
+
+       resp = hwrm_req_hold(bp, install);
        if ((install_type & 0xffff) == 0)
                install_type >>= 16;
-       install.install_type = cpu_to_le32(install_type);
+       install->install_type = cpu_to_le32(install_type);
 
        do {
                u32 copied = 0, len = modify_len;
@@ -2514,76 +2537,69 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware
                        break;
                }
 
-               modify.dir_idx = cpu_to_le16(index);
+               modify->dir_idx = cpu_to_le16(index);
 
                if (fw->size > modify_len)
-                       modify.flags = BNXT_NVM_MORE_FLAG;
+                       modify->flags = BNXT_NVM_MORE_FLAG;
                while (copied < fw->size) {
                        u32 balance = fw->size - copied;
 
                        if (balance <= modify_len) {
                                len = balance;
                                if (copied)
-                                       modify.flags |= BNXT_NVM_LAST_FLAG;
+                                       modify->flags |= BNXT_NVM_LAST_FLAG;
                        }
                        memcpy(kmem, fw->data + copied, len);
-                       modify.len = cpu_to_le32(len);
-                       modify.offset = cpu_to_le32(copied);
-                       rc = hwrm_send_message(bp, &modify, sizeof(modify),
-                                              FLASH_PACKAGE_TIMEOUT);
+                       modify->len = cpu_to_le32(len);
+                       modify->offset = cpu_to_le32(copied);
+                       rc = hwrm_req_send(bp, modify);
                        if (rc)
                                goto pkg_abort;
                        copied += len;
                }
-               mutex_lock(&bp->hwrm_cmd_lock);
-               rc = _hwrm_send_message_silent(bp, &install, sizeof(install),
-                                              INSTALL_PACKAGE_TIMEOUT);
-               memcpy(&resp, bp->hwrm_cmd_resp_addr, sizeof(resp));
+
+               rc = hwrm_req_send_silent(bp, install);
 
                if (defrag_attempted) {
                        /* We have tried to defragment already in the previous
                         * iteration. Return with the result for INSTALL_UPDATE
                         */
-                       mutex_unlock(&bp->hwrm_cmd_lock);
                        break;
                }
 
-               if (rc && ((struct hwrm_err_output *)&resp)->cmd_err ==
+               if (rc && ((struct hwrm_err_output *)resp)->cmd_err ==
                    NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) {
-                       install.flags =
+                       install->flags =
                                cpu_to_le16(NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG);
 
-                       rc = _hwrm_send_message_silent(bp, &install,
-                                                      sizeof(install),
-                                                      INSTALL_PACKAGE_TIMEOUT);
-                       memcpy(&resp, bp->hwrm_cmd_resp_addr, sizeof(resp));
+                       rc = hwrm_req_send_silent(bp, install);
 
-                       if (rc && ((struct hwrm_err_output *)&resp)->cmd_err ==
+                       if (rc && ((struct hwrm_err_output *)resp)->cmd_err ==
                            NVM_INSTALL_UPDATE_CMD_ERR_CODE_NO_SPACE) {
                                /* FW has cleared NVM area, driver will create
                                 * UPDATE directory and try the flash again
                                 */
                                defrag_attempted = true;
-                               install.flags = 0;
-                               rc = __bnxt_flash_nvram(bp->dev,
-                                                       BNX_DIR_TYPE_UPDATE,
-                                                       BNX_DIR_ORDINAL_FIRST,
-                                                       0, 0, item_len, NULL,
-                                                       0);
+                               install->flags = 0;
+                               rc = bnxt_flash_nvram(bp->dev,
+                                                     BNX_DIR_TYPE_UPDATE,
+                                                     BNX_DIR_ORDINAL_FIRST,
+                                                     0, 0, item_len, NULL, 0);
                        } else if (rc) {
                                netdev_err(dev, "HWRM_NVM_INSTALL_UPDATE failure rc :%x\n", rc);
                        }
                } else if (rc) {
                        netdev_err(dev, "HWRM_NVM_INSTALL_UPDATE failure rc :%x\n", rc);
                }
-               mutex_unlock(&bp->hwrm_cmd_lock);
        } while (defrag_attempted && !rc);
 
 pkg_abort:
-       dma_free_coherent(&bp->pdev->dev, modify_len, kmem, dma_handle);
-       if (resp.result) {
+       hwrm_req_drop(bp, modify);
+       hwrm_req_drop(bp, install);
+
+       if (resp->result) {
                netdev_err(dev, "PKG install error = %d, problem_item = %d\n",
-                          (s8)resp.result, (int)resp.problem_item);
+                          (s8)resp->result, (int)resp->problem_item);
                rc = -ENOPKG;
        }
        if (rc == -EACCES)
@@ -2629,20 +2645,22 @@ static int bnxt_flash_device(struct net_device *dev,
 
 static int nvm_get_dir_info(struct net_device *dev, u32 *entries, u32 *length)
 {
+       struct hwrm_nvm_get_dir_info_output *output;
+       struct hwrm_nvm_get_dir_info_input *req;
        struct bnxt *bp = netdev_priv(dev);
        int rc;
-       struct hwrm_nvm_get_dir_info_input req = {0};
-       struct hwrm_nvm_get_dir_info_output *output = bp->hwrm_cmd_resp_addr;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_DIR_INFO, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_NVM_GET_DIR_INFO);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       output = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc) {
                *entries = le32_to_cpu(output->entries);
                *length = le32_to_cpu(output->entry_length);
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -2668,7 +2686,7 @@ static int bnxt_get_nvram_directory(struct net_device *dev, u32 len, u8 *data)
        u8 *buf;
        size_t buflen;
        dma_addr_t dma_handle;
-       struct hwrm_nvm_get_dir_entries_input req = {0};
+       struct hwrm_nvm_get_dir_entries_input *req;
 
        rc = nvm_get_dir_info(dev, &dir_entries, &entry_length);
        if (rc != 0)
@@ -2686,20 +2704,23 @@ static int bnxt_get_nvram_directory(struct net_device *dev, u32 len, u8 *data)
        len -= 2;
        memset(data, 0xff, len);
 
+       rc = hwrm_req_init(bp, req, HWRM_NVM_GET_DIR_ENTRIES);
+       if (rc)
+               return rc;
+
        buflen = dir_entries * entry_length;
-       buf = dma_alloc_coherent(&bp->pdev->dev, buflen, &dma_handle,
-                                GFP_KERNEL);
+       buf = hwrm_req_dma_slice(bp, req, buflen, &dma_handle);
        if (!buf) {
-               netdev_err(dev, "dma_alloc_coherent failure, length = %u\n",
-                          (unsigned)buflen);
+               hwrm_req_drop(bp, req);
                return -ENOMEM;
        }
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_DIR_ENTRIES, -1, -1);
-       req.host_dest_addr = cpu_to_le64(dma_handle);
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->host_dest_addr = cpu_to_le64(dma_handle);
+
+       hwrm_req_hold(bp, req); /* hold the slice */
+       rc = hwrm_req_send(bp, req);
        if (rc == 0)
                memcpy(data, buf, len > buflen ? buflen : len);
-       dma_free_coherent(&bp->pdev->dev, buflen, buf, dma_handle);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -2710,28 +2731,31 @@ static int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset,
        int rc;
        u8 *buf;
        dma_addr_t dma_handle;
-       struct hwrm_nvm_read_input req = {0};
+       struct hwrm_nvm_read_input *req;
 
        if (!length)
                return -EINVAL;
 
-       buf = dma_alloc_coherent(&bp->pdev->dev, length, &dma_handle,
-                                GFP_KERNEL);
+       rc = hwrm_req_init(bp, req, HWRM_NVM_READ);
+       if (rc)
+               return rc;
+
+       buf = hwrm_req_dma_slice(bp, req, length, &dma_handle);
        if (!buf) {
-               netdev_err(dev, "dma_alloc_coherent failure, length = %u\n",
-                          (unsigned)length);
+               hwrm_req_drop(bp, req);
                return -ENOMEM;
        }
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_READ, -1, -1);
-       req.host_dest_addr = cpu_to_le64(dma_handle);
-       req.dir_idx = cpu_to_le16(index);
-       req.offset = cpu_to_le32(offset);
-       req.len = cpu_to_le32(length);
 
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->host_dest_addr = cpu_to_le64(dma_handle);
+       req->dir_idx = cpu_to_le16(index);
+       req->offset = cpu_to_le32(offset);
+       req->len = cpu_to_le32(length);
+
+       hwrm_req_hold(bp, req); /* hold the slice */
+       rc = hwrm_req_send(bp, req);
        if (rc == 0)
                memcpy(data, buf, length);
-       dma_free_coherent(&bp->pdev->dev, length, buf, dma_handle);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -2739,20 +2763,23 @@ static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
                                u16 ext, u16 *index, u32 *item_length,
                                u32 *data_length)
 {
+       struct hwrm_nvm_find_dir_entry_output *output;
+       struct hwrm_nvm_find_dir_entry_input *req;
        struct bnxt *bp = netdev_priv(dev);
        int rc;
-       struct hwrm_nvm_find_dir_entry_input req = {0};
-       struct hwrm_nvm_find_dir_entry_output *output = bp->hwrm_cmd_resp_addr;
-
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_FIND_DIR_ENTRY, -1, -1);
-       req.enables = 0;
-       req.dir_idx = 0;
-       req.dir_type = cpu_to_le16(type);
-       req.dir_ordinal = cpu_to_le16(ordinal);
-       req.dir_ext = cpu_to_le16(ext);
-       req.opt_ordinal = NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ;
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+
+       rc = hwrm_req_init(bp, req, HWRM_NVM_FIND_DIR_ENTRY);
+       if (rc)
+               return rc;
+
+       req->enables = 0;
+       req->dir_idx = 0;
+       req->dir_type = cpu_to_le16(type);
+       req->dir_ordinal = cpu_to_le16(ordinal);
+       req->dir_ext = cpu_to_le16(ext);
+       req->opt_ordinal = NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ;
+       output = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send_silent(bp, req);
        if (rc == 0) {
                if (index)
                        *index = le16_to_cpu(output->dir_idx);
@@ -2761,7 +2788,7 @@ static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
                if (data_length)
                        *data_length = le32_to_cpu(output->dir_data_length);
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -2856,12 +2883,16 @@ static int bnxt_get_eeprom(struct net_device *dev,
 
 static int bnxt_erase_nvram_directory(struct net_device *dev, u8 index)
 {
+       struct hwrm_nvm_erase_dir_entry_input *req;
        struct bnxt *bp = netdev_priv(dev);
-       struct hwrm_nvm_erase_dir_entry_input req = {0};
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_ERASE_DIR_ENTRY, -1, -1);
-       req.dir_idx = cpu_to_le16(index);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_NVM_ERASE_DIR_ENTRY);
+       if (rc)
+               return rc;
+
+       req->dir_idx = cpu_to_le16(index);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_set_eeprom(struct net_device *dev,
@@ -2901,7 +2932,7 @@ static int bnxt_set_eeprom(struct net_device *dev,
        ordinal = eeprom->offset >> 16;
        attr = eeprom->offset & 0xffff;
 
-       return bnxt_flash_nvram(dev, type, ordinal, ext, attr, data,
+       return bnxt_flash_nvram(dev, type, ordinal, ext, attr, 0, data,
                                eeprom->len);
 }
 
@@ -2989,31 +3020,33 @@ static int bnxt_read_sfp_module_eeprom_info(struct bnxt *bp, u16 i2c_addr,
                                            u16 page_number, u16 start_addr,
                                            u16 data_length, u8 *buf)
 {
-       struct hwrm_port_phy_i2c_read_input req = {0};
-       struct hwrm_port_phy_i2c_read_output *output = bp->hwrm_cmd_resp_addr;
+       struct hwrm_port_phy_i2c_read_output *output;
+       struct hwrm_port_phy_i2c_read_input *req;
        int rc, byte_offset = 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_I2C_READ, -1, -1);
-       req.i2c_slave_addr = i2c_addr;
-       req.page_number = cpu_to_le16(page_number);
-       req.port_id = cpu_to_le16(bp->pf.port_id);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_I2C_READ);
+       if (rc)
+               return rc;
+
+       output = hwrm_req_hold(bp, req);
+       req->i2c_slave_addr = i2c_addr;
+       req->page_number = cpu_to_le16(page_number);
+       req->port_id = cpu_to_le16(bp->pf.port_id);
        do {
                u16 xfer_size;
 
                xfer_size = min_t(u16, data_length, BNXT_MAX_PHY_I2C_RESP_SIZE);
                data_length -= xfer_size;
-               req.page_offset = cpu_to_le16(start_addr + byte_offset);
-               req.data_length = xfer_size;
-               req.enables = cpu_to_le32(start_addr + byte_offset ?
+               req->page_offset = cpu_to_le16(start_addr + byte_offset);
+               req->data_length = xfer_size;
+               req->enables = cpu_to_le32(start_addr + byte_offset ?
                                 PORT_PHY_I2C_READ_REQ_ENABLES_PAGE_OFFSET : 0);
-               mutex_lock(&bp->hwrm_cmd_lock);
-               rc = _hwrm_send_message(bp, &req, sizeof(req),
-                                       HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send(bp, req);
                if (!rc)
                        memcpy(buf + byte_offset, output->data, xfer_size);
-               mutex_unlock(&bp->hwrm_cmd_lock);
                byte_offset += xfer_size;
        } while (!rc && data_length > 0);
+       hwrm_req_drop(bp, req);
 
        return rc;
 }
@@ -3122,13 +3155,13 @@ static int bnxt_nway_reset(struct net_device *dev)
 static int bnxt_set_phys_id(struct net_device *dev,
                            enum ethtool_phys_id_state state)
 {
-       struct hwrm_port_led_cfg_input req = {0};
+       struct hwrm_port_led_cfg_input *req;
        struct bnxt *bp = netdev_priv(dev);
        struct bnxt_pf_info *pf = &bp->pf;
        struct bnxt_led_cfg *led_cfg;
        u8 led_state;
        __le16 duration;
-       int i;
+       int rc, i;
 
        if (!bp->num_leds || BNXT_VF(bp))
                return -EOPNOTSUPP;
@@ -3142,27 +3175,35 @@ static int bnxt_set_phys_id(struct net_device *dev,
        } else {
                return -EINVAL;
        }
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_LED_CFG, -1, -1);
-       req.port_id = cpu_to_le16(pf->port_id);
-       req.num_leds = bp->num_leds;
-       led_cfg = (struct bnxt_led_cfg *)&req.led0_id;
+       rc = hwrm_req_init(bp, req, HWRM_PORT_LED_CFG);
+       if (rc)
+               return rc;
+
+       req->port_id = cpu_to_le16(pf->port_id);
+       req->num_leds = bp->num_leds;
+       led_cfg = (struct bnxt_led_cfg *)&req->led0_id;
        for (i = 0; i < bp->num_leds; i++, led_cfg++) {
-               req.enables |= BNXT_LED_DFLT_ENABLES(i);
+               req->enables |= BNXT_LED_DFLT_ENABLES(i);
                led_cfg->led_id = bp->leds[i].led_id;
                led_cfg->led_state = led_state;
                led_cfg->led_blink_on = duration;
                led_cfg->led_blink_off = duration;
                led_cfg->led_group_id = bp->leds[i].led_group_id;
        }
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_hwrm_selftest_irq(struct bnxt *bp, u16 cmpl_ring)
 {
-       struct hwrm_selftest_irq_input req = {0};
+       struct hwrm_selftest_irq_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_SELFTEST_IRQ);
+       if (rc)
+               return rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_SELFTEST_IRQ, cmpl_ring, -1);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->cmpl_ring = cpu_to_le16(cmpl_ring);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_test_irq(struct bnxt *bp)
@@ -3182,31 +3223,37 @@ static int bnxt_test_irq(struct bnxt *bp)
 
 static int bnxt_hwrm_mac_loopback(struct bnxt *bp, bool enable)
 {
-       struct hwrm_port_mac_cfg_input req = {0};
+       struct hwrm_port_mac_cfg_input *req;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_MAC_CFG, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_CFG);
+       if (rc)
+               return rc;
 
-       req.enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_LPBK);
+       req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_LPBK);
        if (enable)
-               req.lpbk = PORT_MAC_CFG_REQ_LPBK_LOCAL;
+               req->lpbk = PORT_MAC_CFG_REQ_LPBK_LOCAL;
        else
-               req.lpbk = PORT_MAC_CFG_REQ_LPBK_NONE;
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               req->lpbk = PORT_MAC_CFG_REQ_LPBK_NONE;
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_query_force_speeds(struct bnxt *bp, u16 *force_speeds)
 {
-       struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_port_phy_qcaps_input req = {0};
+       struct hwrm_port_phy_qcaps_output *resp;
+       struct hwrm_port_phy_qcaps_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCAPS, -1, -1);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_QCAPS);
+       if (rc)
+               return rc;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc)
                *force_speeds = le16_to_cpu(resp->supported_speeds_force_mode);
 
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -3241,7 +3288,7 @@ static int bnxt_disable_an_for_lpbk(struct bnxt *bp,
        req->force_link_speed = cpu_to_le16(fw_speed);
        req->flags |= cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE |
                                  PORT_PHY_CFG_REQ_FLAGS_RESET_PHY);
-       rc = hwrm_send_message(bp, req, sizeof(*req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_send(bp, req);
        req->flags = 0;
        req->force_link_speed = cpu_to_le16(0);
        return rc;
@@ -3249,21 +3296,29 @@ static int bnxt_disable_an_for_lpbk(struct bnxt *bp,
 
 static int bnxt_hwrm_phy_loopback(struct bnxt *bp, bool enable, bool ext)
 {
-       struct hwrm_port_phy_cfg_input req = {0};
+       struct hwrm_port_phy_cfg_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_PORT_PHY_CFG);
+       if (rc)
+               return rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
+       /* prevent bnxt_disable_an_for_lpbk() from consuming the request */
+       hwrm_req_hold(bp, req);
 
        if (enable) {
-               bnxt_disable_an_for_lpbk(bp, &req);
+               bnxt_disable_an_for_lpbk(bp, req);
                if (ext)
-                       req.lpbk = PORT_PHY_CFG_REQ_LPBK_EXTERNAL;
+                       req->lpbk = PORT_PHY_CFG_REQ_LPBK_EXTERNAL;
                else
-                       req.lpbk = PORT_PHY_CFG_REQ_LPBK_LOCAL;
+                       req->lpbk = PORT_PHY_CFG_REQ_LPBK_LOCAL;
        } else {
-               req.lpbk = PORT_PHY_CFG_REQ_LPBK_NONE;
+               req->lpbk = PORT_PHY_CFG_REQ_LPBK_NONE;
        }
-       req.enables = cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_LPBK);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->enables = cpu_to_le32(PORT_PHY_CFG_REQ_ENABLES_LPBK);
+       rc = hwrm_req_send(bp, req);
+       hwrm_req_drop(bp, req);
+       return rc;
 }
 
 static int bnxt_rx_loopback(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
@@ -3361,7 +3416,7 @@ static int bnxt_run_loopback(struct bnxt *bp)
                data[i] = (u8)(i & 0xff);
 
        map = dma_map_single(&bp->pdev->dev, skb->data, pkt_size,
-                            PCI_DMA_TODEVICE);
+                            DMA_TO_DEVICE);
        if (dma_mapping_error(&bp->pdev->dev, map)) {
                dev_kfree_skb(skb);
                return -EIO;
@@ -3374,24 +3429,28 @@ static int bnxt_run_loopback(struct bnxt *bp)
        bnxt_db_write(bp, &txr->tx_db, txr->tx_prod);
        rc = bnxt_poll_loopback(bp, cpr, pkt_size);
 
-       dma_unmap_single(&bp->pdev->dev, map, pkt_size, PCI_DMA_TODEVICE);
+       dma_unmap_single(&bp->pdev->dev, map, pkt_size, DMA_TO_DEVICE);
        dev_kfree_skb(skb);
        return rc;
 }
 
 static int bnxt_run_fw_tests(struct bnxt *bp, u8 test_mask, u8 *test_results)
 {
-       struct hwrm_selftest_exec_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_selftest_exec_input req = {0};
+       struct hwrm_selftest_exec_output *resp;
+       struct hwrm_selftest_exec_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_SELFTEST_EXEC, -1, -1);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       resp->test_success = 0;
-       req.flags = test_mask;
-       rc = _hwrm_send_message(bp, &req, sizeof(req), bp->test_info->timeout);
+       rc = hwrm_req_init(bp, req, HWRM_SELFTEST_EXEC);
+       if (rc)
+               return rc;
+
+       hwrm_req_timeout(bp, req, bp->test_info->timeout);
+       req->flags = test_mask;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        *test_results = resp->test_success;
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -3550,32 +3609,34 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
        return 0;
 }
 
-static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, int msg_len,
+static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg,
                                  struct bnxt_hwrm_dbg_dma_info *info)
 {
-       struct hwrm_dbg_cmn_output *cmn_resp = bp->hwrm_cmd_resp_addr;
        struct hwrm_dbg_cmn_input *cmn_req = msg;
        __le16 *seq_ptr = msg + info->seq_off;
+       struct hwrm_dbg_cmn_output *cmn_resp;
        u16 seq = 0, len, segs_off;
-       void *resp = cmn_resp;
        dma_addr_t dma_handle;
+       void *dma_buf, *resp;
        int rc, off = 0;
-       void *dma_buf;
 
-       dma_buf = dma_alloc_coherent(&bp->pdev->dev, info->dma_len, &dma_handle,
-                                    GFP_KERNEL);
-       if (!dma_buf)
+       dma_buf = hwrm_req_dma_slice(bp, msg, info->dma_len, &dma_handle);
+       if (!dma_buf) {
+               hwrm_req_drop(bp, msg);
                return -ENOMEM;
+       }
+
+       hwrm_req_timeout(bp, msg, HWRM_COREDUMP_TIMEOUT);
+       cmn_resp = hwrm_req_hold(bp, msg);
+       resp = cmn_resp;
 
        segs_off = offsetof(struct hwrm_dbg_coredump_list_output,
                            total_segments);
        cmn_req->host_dest_addr = cpu_to_le64(dma_handle);
        cmn_req->host_buf_len = cpu_to_le32(info->dma_len);
-       mutex_lock(&bp->hwrm_cmd_lock);
        while (1) {
                *seq_ptr = cpu_to_le16(seq);
-               rc = _hwrm_send_message(bp, msg, msg_len,
-                                       HWRM_COREDUMP_TIMEOUT);
+               rc = hwrm_req_send(bp, msg);
                if (rc)
                        break;
 
@@ -3619,26 +3680,27 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, int msg_len,
                seq++;
                off += len;
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
-       dma_free_coherent(&bp->pdev->dev, info->dma_len, dma_buf, dma_handle);
+       hwrm_req_drop(bp, msg);
        return rc;
 }
 
 static int bnxt_hwrm_dbg_coredump_list(struct bnxt *bp,
                                       struct bnxt_coredump *coredump)
 {
-       struct hwrm_dbg_coredump_list_input req = {0};
        struct bnxt_hwrm_dbg_dma_info info = {NULL};
+       struct hwrm_dbg_coredump_list_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_COREDUMP_LIST, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_LIST);
+       if (rc)
+               return rc;
 
        info.dma_len = COREDUMP_LIST_BUF_LEN;
        info.seq_off = offsetof(struct hwrm_dbg_coredump_list_input, seq_no);
        info.data_len_off = offsetof(struct hwrm_dbg_coredump_list_output,
                                     data_len);
 
-       rc = bnxt_hwrm_dbg_dma_data(bp, &req, sizeof(req), &info);
+       rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
        if (!rc) {
                coredump->data = info.dest_buf;
                coredump->data_size = info.dest_buf_size;
@@ -3650,26 +3712,34 @@ static int bnxt_hwrm_dbg_coredump_list(struct bnxt *bp,
 static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id,
                                           u16 segment_id)
 {
-       struct hwrm_dbg_coredump_initiate_input req = {0};
+       struct hwrm_dbg_coredump_initiate_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_INITIATE);
+       if (rc)
+               return rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_COREDUMP_INITIATE, -1, -1);
-       req.component_id = cpu_to_le16(component_id);
-       req.segment_id = cpu_to_le16(segment_id);
+       hwrm_req_timeout(bp, req, HWRM_COREDUMP_TIMEOUT);
+       req->component_id = cpu_to_le16(component_id);
+       req->segment_id = cpu_to_le16(segment_id);
 
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_COREDUMP_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
 static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
                                           u16 segment_id, u32 *seg_len,
                                           void *buf, u32 buf_len, u32 offset)
 {
-       struct hwrm_dbg_coredump_retrieve_input req = {0};
+       struct hwrm_dbg_coredump_retrieve_input *req;
        struct bnxt_hwrm_dbg_dma_info info = {NULL};
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_COREDUMP_RETRIEVE, -1, -1);
-       req.component_id = cpu_to_le16(component_id);
-       req.segment_id = cpu_to_le16(segment_id);
+       rc = hwrm_req_init(bp, req, HWRM_DBG_COREDUMP_RETRIEVE);
+       if (rc)
+               return rc;
+
+       req->component_id = cpu_to_le16(component_id);
+       req->segment_id = cpu_to_le16(segment_id);
 
        info.dma_len = COREDUMP_RETRIEVE_BUF_LEN;
        info.seq_off = offsetof(struct hwrm_dbg_coredump_retrieve_input,
@@ -3682,7 +3752,7 @@ static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
                info.seg_start = offset;
        }
 
-       rc = bnxt_hwrm_dbg_dma_data(bp, &req, sizeof(req), &info);
+       rc = bnxt_hwrm_dbg_dma_data(bp, req, &info);
        if (!rc)
                *seg_len = info.dest_buf_size;
 
@@ -3961,8 +4031,8 @@ static int bnxt_get_ts_info(struct net_device *dev,
 
 void bnxt_ethtool_init(struct bnxt *bp)
 {
-       struct hwrm_selftest_qlist_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_selftest_qlist_input req = {0};
+       struct hwrm_selftest_qlist_output *resp;
+       struct hwrm_selftest_qlist_input *req;
        struct bnxt_test_info *test_info;
        struct net_device *dev = bp->dev;
        int i, rc;
@@ -3974,19 +4044,22 @@ void bnxt_ethtool_init(struct bnxt *bp)
        if (bp->hwrm_spec_code < 0x10704 || !BNXT_PF(bp))
                return;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_SELFTEST_QLIST, -1, -1);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-       if (rc)
-               goto ethtool_init_exit;
-
        test_info = bp->test_info;
-       if (!test_info)
+       if (!test_info) {
                test_info = kzalloc(sizeof(*bp->test_info), GFP_KERNEL);
-       if (!test_info)
+               if (!test_info)
+                       return;
+               bp->test_info = test_info;
+       }
+
+       if (hwrm_req_init(bp, req, HWRM_SELFTEST_QLIST))
+               return;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send_silent(bp, req);
+       if (rc)
                goto ethtool_init_exit;
 
-       bp->test_info = test_info;
        bp->num_tests = resp->num_tests + BNXT_DRV_TESTS;
        if (bp->num_tests > BNXT_MAX_TEST)
                bp->num_tests = BNXT_MAX_TEST;
@@ -4020,7 +4093,7 @@ void bnxt_ethtool_init(struct bnxt *bp)
        }
 
 ethtool_init_exit:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
 }
 
 static void bnxt_get_eth_phy_stats(struct net_device *dev,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c
new file mode 100644 (file)
index 0000000..acef61a
--- /dev/null
@@ -0,0 +1,763 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2020 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <asm/byteorder.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/errno.h>
+#include <linux/ethtool.h>
+#include <linux/if_ether.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_hwrm.h"
+
+static u64 hwrm_calc_sentinel(struct bnxt_hwrm_ctx *ctx, u16 req_type)
+{
+       return (((uintptr_t)ctx) + req_type) ^ BNXT_HWRM_SENTINEL;
+}
+
+/**
+ * __hwrm_req_init() - Initialize an HWRM request.
+ * @bp: The driver context.
+ * @req: A pointer to the request pointer to initialize.
+ * @req_type: The request type. This will be converted to the little endian
+ *     before being written to the req_type field of the returned request.
+ * @req_len: The length of the request to be allocated.
+ *
+ * Allocate DMA resources and initialize a new HWRM request object of the
+ * given type. The response address field in the request is configured with
+ * the DMA bus address that has been mapped for the response and the passed
+ * request is pointed to kernel virtual memory mapped for the request (such
+ * that short_input indirection can be accomplished without copying). The
+ * request’s target and completion ring are initialized to default values and
+ * can be overridden by writing to the returned request object directly.
+ *
+ * The initialized request can be further customized by writing to its fields
+ * directly, taking care to covert such fields to little endian. The request
+ * object will be consumed (and all its associated resources release) upon
+ * passing it to hwrm_req_send() unless ownership of the request has been
+ * claimed by the caller via a call to hwrm_req_hold(). If the request is not
+ * consumed, either because it is never sent or because ownership has been
+ * claimed, then it must be released by a call to hwrm_req_drop().
+ *
+ * Return: zero on success, negative error code otherwise:
+ *     E2BIG: the type of request pointer is too large to fit.
+ *     ENOMEM: an allocation failure occurred.
+ */
+int __hwrm_req_init(struct bnxt *bp, void **req, u16 req_type, u32 req_len)
+{
+       struct bnxt_hwrm_ctx *ctx;
+       dma_addr_t dma_handle;
+       u8 *req_addr;
+
+       if (req_len > BNXT_HWRM_CTX_OFFSET)
+               return -E2BIG;
+
+       req_addr = dma_pool_alloc(bp->hwrm_dma_pool, GFP_KERNEL | __GFP_ZERO,
+                                 &dma_handle);
+       if (!req_addr)
+               return -ENOMEM;
+
+       ctx = (struct bnxt_hwrm_ctx *)(req_addr + BNXT_HWRM_CTX_OFFSET);
+       /* safety first, sentinel used to check for invalid requests */
+       ctx->sentinel = hwrm_calc_sentinel(ctx, req_type);
+       ctx->req_len = req_len;
+       ctx->req = (struct input *)req_addr;
+       ctx->resp = (struct output *)(req_addr + BNXT_HWRM_RESP_OFFSET);
+       ctx->dma_handle = dma_handle;
+       ctx->flags = 0; /* __GFP_ZERO, but be explicit regarding ownership */
+       ctx->timeout = bp->hwrm_cmd_timeout ?: DFLT_HWRM_CMD_TIMEOUT;
+       ctx->allocated = BNXT_HWRM_DMA_SIZE - BNXT_HWRM_CTX_OFFSET;
+       ctx->gfp = GFP_KERNEL;
+       ctx->slice_addr = NULL;
+
+       /* initialize common request fields */
+       ctx->req->req_type = cpu_to_le16(req_type);
+       ctx->req->resp_addr = cpu_to_le64(dma_handle + BNXT_HWRM_RESP_OFFSET);
+       ctx->req->cmpl_ring = cpu_to_le16(BNXT_HWRM_NO_CMPL_RING);
+       ctx->req->target_id = cpu_to_le16(BNXT_HWRM_TARGET);
+       *req = ctx->req;
+
+       return 0;
+}
+
+static struct bnxt_hwrm_ctx *__hwrm_ctx(struct bnxt *bp, u8 *req_addr)
+{
+       void *ctx_addr = req_addr + BNXT_HWRM_CTX_OFFSET;
+       struct input *req = (struct input *)req_addr;
+       struct bnxt_hwrm_ctx *ctx = ctx_addr;
+       u64 sentinel;
+
+       if (!req) {
+               /* can only be due to software bug, be loud */
+               netdev_err(bp->dev, "null HWRM request");
+               dump_stack();
+               return NULL;
+       }
+
+       /* HWRM API has no type safety, verify sentinel to validate address */
+       sentinel = hwrm_calc_sentinel(ctx, le16_to_cpu(req->req_type));
+       if (ctx->sentinel != sentinel) {
+               /* can only be due to software bug, be loud */
+               netdev_err(bp->dev, "HWRM sentinel mismatch, req_type = %u\n",
+                          (u32)le16_to_cpu(req->req_type));
+               dump_stack();
+               return NULL;
+       }
+
+       return ctx;
+}
+
+/**
+ * hwrm_req_timeout() - Set the completion timeout for the request.
+ * @bp: The driver context.
+ * @req: The request to set the timeout.
+ * @timeout: The timeout in milliseconds.
+ *
+ * Set the timeout associated with the request for subsequent calls to
+ * hwrm_req_send(). Some requests are long running and require a different
+ * timeout than the default.
+ */
+void hwrm_req_timeout(struct bnxt *bp, void *req, unsigned int timeout)
+{
+       struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+       if (ctx)
+               ctx->timeout = timeout;
+}
+
+/**
+ * hwrm_req_alloc_flags() - Sets GFP allocation flags for slices.
+ * @bp: The driver context.
+ * @req: The request for which calls to hwrm_req_dma_slice() will have altered
+ *     allocation flags.
+ * @flags: A bitmask of GFP flags. These flags are passed to
+ *     dma_alloc_coherent() whenever it is used to allocate backing memory
+ *     for slices. Note that calls to hwrm_req_dma_slice() will not always
+ *     result in new allocations, however, memory suballocated from the
+ *     request buffer is already __GFP_ZERO.
+ *
+ * Sets the GFP allocation flags associated with the request for subsequent
+ * calls to hwrm_req_dma_slice(). This can be useful for specifying __GFP_ZERO
+ * for slice allocations.
+ */
+void hwrm_req_alloc_flags(struct bnxt *bp, void *req, gfp_t gfp)
+{
+       struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+       if (ctx)
+               ctx->gfp = gfp;
+}
+
+/**
+ * hwrm_req_replace() - Replace request data.
+ * @bp: The driver context.
+ * @req: The request to modify. A call to hwrm_req_replace() is conceptually
+ *     an assignment of new_req to req. Subsequent calls to HWRM API functions,
+ *     such as hwrm_req_send(), should thus use req and not new_req (in fact,
+ *     calls to HWRM API functions will fail if non-managed request objects
+ *     are passed).
+ * @len: The length of new_req.
+ * @new_req: The pre-built request to copy or reference.
+ *
+ * Replaces the request data in req with that of new_req. This is useful in
+ * scenarios where a request object has already been constructed by a third
+ * party prior to creating a resource managed request using hwrm_req_init().
+ * Depending on the length, hwrm_req_replace() will either copy the new
+ * request data into the DMA memory allocated for req, or it will simply
+ * reference the new request and use it in lieu of req during subsequent
+ * calls to hwrm_req_send(). The resource management is associated with
+ * req and is independent of and does not apply to new_req. The caller must
+ * ensure that the lifetime of new_req is least as long as req. Any slices
+ * that may have been associated with the original request are released.
+ *
+ * Return: zero on success, negative error code otherwise:
+ *     E2BIG: Request is too large.
+ *     EINVAL: Invalid request to modify.
+ */
+int hwrm_req_replace(struct bnxt *bp, void *req, void *new_req, u32 len)
+{
+       struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+       struct input *internal_req = req;
+       u16 req_type;
+
+       if (!ctx)
+               return -EINVAL;
+
+       if (len > BNXT_HWRM_CTX_OFFSET)
+               return -E2BIG;
+
+       /* free any existing slices */
+       ctx->allocated = BNXT_HWRM_DMA_SIZE - BNXT_HWRM_CTX_OFFSET;
+       if (ctx->slice_addr) {
+               dma_free_coherent(&bp->pdev->dev, ctx->slice_size,
+                                 ctx->slice_addr, ctx->slice_handle);
+               ctx->slice_addr = NULL;
+       }
+       ctx->gfp = GFP_KERNEL;
+
+       if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) || len > BNXT_HWRM_MAX_REQ_LEN) {
+               memcpy(internal_req, new_req, len);
+       } else {
+               internal_req->req_type = ((struct input *)new_req)->req_type;
+               ctx->req = new_req;
+       }
+
+       ctx->req_len = len;
+       ctx->req->resp_addr = cpu_to_le64(ctx->dma_handle +
+                                         BNXT_HWRM_RESP_OFFSET);
+
+       /* update sentinel for potentially new request type */
+       req_type = le16_to_cpu(internal_req->req_type);
+       ctx->sentinel = hwrm_calc_sentinel(ctx, req_type);
+
+       return 0;
+}
+
+/**
+ * hwrm_req_flags() - Set non internal flags of the ctx
+ * @bp: The driver context.
+ * @req: The request containing the HWRM command
+ * @flags: ctx flags that don't have BNXT_HWRM_INTERNAL_FLAG set
+ *
+ * ctx flags can be used by the callers to instruct how the subsequent
+ * hwrm_req_send() should behave. Example: callers can use hwrm_req_flags
+ * with BNXT_HWRM_CTX_SILENT to omit kernel prints of errors of hwrm_req_send()
+ * or with BNXT_HWRM_FULL_WAIT enforce hwrm_req_send() to wait for full timeout
+ * even if FW is not responding.
+ * This generic function can be used to set any flag that is not an internal flag
+ * of the HWRM module.
+ */
+void hwrm_req_flags(struct bnxt *bp, void *req, enum bnxt_hwrm_ctx_flags flags)
+{
+       struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+       if (ctx)
+               ctx->flags |= (flags & HWRM_API_FLAGS);
+}
+
+/**
+ * hwrm_req_hold() - Claim ownership of the request's resources.
+ * @bp: The driver context.
+ * @req: A pointer to the request to own. The request will no longer be
+ *     consumed by calls to hwrm_req_send().
+ *
+ * Take ownership of the request. Ownership places responsibility on the
+ * caller to free the resources associated with the request via a call to
+ * hwrm_req_drop(). The caller taking ownership implies that a subsequent
+ * call to hwrm_req_send() will not consume the request (ie. sending will
+ * not free the associated resources if the request is owned by the caller).
+ * Taking ownership returns a reference to the response. Retaining and
+ * accessing the response data is the most common reason to take ownership
+ * of the request. Ownership can also be acquired in order to reuse the same
+ * request object across multiple invocations of hwrm_req_send().
+ *
+ * Return: A pointer to the response object.
+ *
+ * The resources associated with the response will remain available to the
+ * caller until ownership of the request is relinquished via a call to
+ * hwrm_req_drop(). It is not possible for hwrm_req_hold() to return NULL if
+ * a valid request is provided. A returned NULL value would imply a driver
+ * bug and the implementation will complain loudly in the logs to aid in
+ * detection. It should not be necessary to check the result for NULL.
+ */
+void *hwrm_req_hold(struct bnxt *bp, void *req)
+{
+       struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+       struct input *input = (struct input *)req;
+
+       if (!ctx)
+               return NULL;
+
+       if (ctx->flags & BNXT_HWRM_INTERNAL_CTX_OWNED) {
+               /* can only be due to software bug, be loud */
+               netdev_err(bp->dev, "HWRM context already owned, req_type = %u\n",
+                          (u32)le16_to_cpu(input->req_type));
+               dump_stack();
+               return NULL;
+       }
+
+       ctx->flags |= BNXT_HWRM_INTERNAL_CTX_OWNED;
+       return ((u8 *)req) + BNXT_HWRM_RESP_OFFSET;
+}
+
+static void __hwrm_ctx_drop(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx)
+{
+       void *addr = ((u8 *)ctx) - BNXT_HWRM_CTX_OFFSET;
+       dma_addr_t dma_handle = ctx->dma_handle; /* save before invalidate */
+
+       /* unmap any auxiliary DMA slice */
+       if (ctx->slice_addr)
+               dma_free_coherent(&bp->pdev->dev, ctx->slice_size,
+                                 ctx->slice_addr, ctx->slice_handle);
+
+       /* invalidate, ensure ownership, sentinel and dma_handle are cleared */
+       memset(ctx, 0, sizeof(struct bnxt_hwrm_ctx));
+
+       /* return the buffer to the DMA pool */
+       if (dma_handle)
+               dma_pool_free(bp->hwrm_dma_pool, addr, dma_handle);
+}
+
+/**
+ * hwrm_req_drop() - Release all resources associated with the request.
+ * @bp: The driver context.
+ * @req: The request to consume, releasing the associated resources. The
+ *     request object, any slices, and its associated response are no
+ *     longer valid.
+ *
+ * It is legal to call hwrm_req_drop() on an unowned request, provided it
+ * has not already been consumed by hwrm_req_send() (for example, to release
+ * an aborted request). A given request should not be dropped more than once,
+ * nor should it be dropped after having been consumed by hwrm_req_send(). To
+ * do so is an error (the context will not be found and a stack trace will be
+ * rendered in the kernel log).
+ */
+void hwrm_req_drop(struct bnxt *bp, void *req)
+{
+       struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+       if (ctx)
+               __hwrm_ctx_drop(bp, ctx);
+}
+
+static int __hwrm_to_stderr(u32 hwrm_err)
+{
+       switch (hwrm_err) {
+       case HWRM_ERR_CODE_SUCCESS:
+               return 0;
+       case HWRM_ERR_CODE_RESOURCE_LOCKED:
+               return -EROFS;
+       case HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED:
+               return -EACCES;
+       case HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR:
+               return -ENOSPC;
+       case HWRM_ERR_CODE_INVALID_PARAMS:
+       case HWRM_ERR_CODE_INVALID_FLAGS:
+       case HWRM_ERR_CODE_INVALID_ENABLES:
+       case HWRM_ERR_CODE_UNSUPPORTED_TLV:
+       case HWRM_ERR_CODE_UNSUPPORTED_OPTION_ERR:
+               return -EINVAL;
+       case HWRM_ERR_CODE_NO_BUFFER:
+               return -ENOMEM;
+       case HWRM_ERR_CODE_HOT_RESET_PROGRESS:
+       case HWRM_ERR_CODE_BUSY:
+               return -EAGAIN;
+       case HWRM_ERR_CODE_CMD_NOT_SUPPORTED:
+               return -EOPNOTSUPP;
+       default:
+               return -EIO;
+       }
+}
+
+static struct bnxt_hwrm_wait_token *
+__hwrm_acquire_token(struct bnxt *bp, enum bnxt_hwrm_chnl dst)
+{
+       struct bnxt_hwrm_wait_token *token;
+
+       token = kzalloc(sizeof(*token), GFP_KERNEL);
+       if (!token)
+               return NULL;
+
+       mutex_lock(&bp->hwrm_cmd_lock);
+
+       token->dst = dst;
+       token->state = BNXT_HWRM_PENDING;
+       if (dst == BNXT_HWRM_CHNL_CHIMP) {
+               token->seq_id = bp->hwrm_cmd_seq++;
+               hlist_add_head_rcu(&token->node, &bp->hwrm_pending_list);
+       } else {
+               token->seq_id = bp->hwrm_cmd_kong_seq++;
+       }
+
+       return token;
+}
+
+static void
+__hwrm_release_token(struct bnxt *bp, struct bnxt_hwrm_wait_token *token)
+{
+       if (token->dst == BNXT_HWRM_CHNL_CHIMP) {
+               hlist_del_rcu(&token->node);
+               kfree_rcu(token, rcu);
+       } else {
+               kfree(token);
+       }
+       mutex_unlock(&bp->hwrm_cmd_lock);
+}
+
+void
+hwrm_update_token(struct bnxt *bp, u16 seq_id, enum bnxt_hwrm_wait_state state)
+{
+       struct bnxt_hwrm_wait_token *token;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(token, &bp->hwrm_pending_list, node) {
+               if (token->seq_id == seq_id) {
+                       WRITE_ONCE(token->state, state);
+                       rcu_read_unlock();
+                       return;
+               }
+       }
+       rcu_read_unlock();
+       netdev_err(bp->dev, "Invalid hwrm seq id %d\n", seq_id);
+}
+
+static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx)
+{
+       u32 doorbell_offset = BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER;
+       enum bnxt_hwrm_chnl dst = BNXT_HWRM_CHNL_CHIMP;
+       u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM;
+       struct bnxt_hwrm_wait_token *token = NULL;
+       struct hwrm_short_input short_input = {0};
+       u16 max_req_len = BNXT_HWRM_MAX_REQ_LEN;
+       unsigned int i, timeout, tmo_count;
+       u32 *data = (u32 *)ctx->req;
+       u32 msg_len = ctx->req_len;
+       int rc = -EBUSY;
+       u32 req_type;
+       u16 len = 0;
+       u8 *valid;
+
+       if (ctx->flags & BNXT_HWRM_INTERNAL_RESP_DIRTY)
+               memset(ctx->resp, 0, PAGE_SIZE);
+
+       req_type = le16_to_cpu(ctx->req->req_type);
+       if (BNXT_NO_FW_ACCESS(bp) && req_type != HWRM_FUNC_RESET)
+               goto exit;
+
+       if (msg_len > BNXT_HWRM_MAX_REQ_LEN &&
+           msg_len > bp->hwrm_max_ext_req_len) {
+               rc = -E2BIG;
+               goto exit;
+       }
+
+       if (bnxt_kong_hwrm_message(bp, ctx->req)) {
+               dst = BNXT_HWRM_CHNL_KONG;
+               bar_offset = BNXT_GRCPF_REG_KONG_COMM;
+               doorbell_offset = BNXT_GRCPF_REG_KONG_COMM_TRIGGER;
+               if (le16_to_cpu(ctx->req->cmpl_ring) != INVALID_HW_RING_ID) {
+                       netdev_err(bp->dev, "Ring completions not supported for KONG commands, req_type = %d\n",
+                                  req_type);
+                       rc = -EINVAL;
+                       goto exit;
+               }
+       }
+
+       token = __hwrm_acquire_token(bp, dst);
+       if (!token) {
+               rc = -ENOMEM;
+               goto exit;
+       }
+       ctx->req->seq_id = cpu_to_le16(token->seq_id);
+
+       if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) ||
+           msg_len > BNXT_HWRM_MAX_REQ_LEN) {
+               short_input.req_type = ctx->req->req_type;
+               short_input.signature =
+                               cpu_to_le16(SHORT_REQ_SIGNATURE_SHORT_CMD);
+               short_input.size = cpu_to_le16(msg_len);
+               short_input.req_addr = cpu_to_le64(ctx->dma_handle);
+
+               data = (u32 *)&short_input;
+               msg_len = sizeof(short_input);
+
+               max_req_len = BNXT_HWRM_SHORT_REQ_LEN;
+       }
+
+       /* Ensure any associated DMA buffers are written before doorbell */
+       wmb();
+
+       /* Write request msg to hwrm channel */
+       __iowrite32_copy(bp->bar0 + bar_offset, data, msg_len / 4);
+
+       for (i = msg_len; i < max_req_len; i += 4)
+               writel(0, bp->bar0 + bar_offset + i);
+
+       /* Ring channel doorbell */
+       writel(1, bp->bar0 + doorbell_offset);
+
+       if (!pci_is_enabled(bp->pdev)) {
+               rc = -ENODEV;
+               goto exit;
+       }
+
+       /* Limit timeout to an upper limit */
+       timeout = min_t(uint, ctx->timeout, HWRM_CMD_MAX_TIMEOUT);
+       /* convert timeout to usec */
+       timeout *= 1000;
+
+       i = 0;
+       /* Short timeout for the first few iterations:
+        * number of loops = number of loops for short timeout +
+        * number of loops for standard timeout.
+        */
+       tmo_count = HWRM_SHORT_TIMEOUT_COUNTER;
+       timeout = timeout - HWRM_SHORT_MIN_TIMEOUT * HWRM_SHORT_TIMEOUT_COUNTER;
+       tmo_count += DIV_ROUND_UP(timeout, HWRM_MIN_TIMEOUT);
+
+       if (le16_to_cpu(ctx->req->cmpl_ring) != INVALID_HW_RING_ID) {
+               /* Wait until hwrm response cmpl interrupt is processed */
+               while (READ_ONCE(token->state) < BNXT_HWRM_COMPLETE &&
+                      i++ < tmo_count) {
+                       /* Abort the wait for completion if the FW health
+                        * check has failed.
+                        */
+                       if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
+                               goto exit;
+                       /* on first few passes, just barely sleep */
+                       if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
+                               usleep_range(HWRM_SHORT_MIN_TIMEOUT,
+                                            HWRM_SHORT_MAX_TIMEOUT);
+                       } else {
+                               if (HWRM_WAIT_MUST_ABORT(bp, ctx))
+                                       break;
+                               usleep_range(HWRM_MIN_TIMEOUT,
+                                            HWRM_MAX_TIMEOUT);
+                       }
+               }
+
+               if (READ_ONCE(token->state) != BNXT_HWRM_COMPLETE) {
+                       if (!(ctx->flags & BNXT_HWRM_CTX_SILENT))
+                               netdev_err(bp->dev, "Resp cmpl intr err msg: 0x%x\n",
+                                          le16_to_cpu(ctx->req->req_type));
+                       goto exit;
+               }
+               len = le16_to_cpu(READ_ONCE(ctx->resp->resp_len));
+               valid = ((u8 *)ctx->resp) + len - 1;
+       } else {
+               __le16 seen_out_of_seq = ctx->req->seq_id; /* will never see */
+               int j;
+
+               /* Check if response len is updated */
+               for (i = 0; i < tmo_count; i++) {
+                       /* Abort the wait for completion if the FW health
+                        * check has failed.
+                        */
+                       if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
+                               goto exit;
+
+                       if (token &&
+                           READ_ONCE(token->state) == BNXT_HWRM_DEFERRED) {
+                               __hwrm_release_token(bp, token);
+                               token = NULL;
+                       }
+
+                       len = le16_to_cpu(READ_ONCE(ctx->resp->resp_len));
+                       if (len) {
+                               __le16 resp_seq = READ_ONCE(ctx->resp->seq_id);
+
+                               if (resp_seq == ctx->req->seq_id)
+                                       break;
+                               if (resp_seq != seen_out_of_seq) {
+                                       netdev_warn(bp->dev, "Discarding out of seq response: 0x%x for msg {0x%x 0x%x}\n",
+                                                   le16_to_cpu(resp_seq),
+                                                   le16_to_cpu(ctx->req->req_type),
+                                                   le16_to_cpu(ctx->req->seq_id));
+                                       seen_out_of_seq = resp_seq;
+                               }
+                       }
+
+                       /* on first few passes, just barely sleep */
+                       if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
+                               usleep_range(HWRM_SHORT_MIN_TIMEOUT,
+                                            HWRM_SHORT_MAX_TIMEOUT);
+                       } else {
+                               if (HWRM_WAIT_MUST_ABORT(bp, ctx))
+                                       goto timeout_abort;
+                               usleep_range(HWRM_MIN_TIMEOUT,
+                                            HWRM_MAX_TIMEOUT);
+                       }
+               }
+
+               if (i >= tmo_count) {
+timeout_abort:
+                       if (!(ctx->flags & BNXT_HWRM_CTX_SILENT))
+                               netdev_err(bp->dev, "Error (timeout: %u) msg {0x%x 0x%x} len:%d\n",
+                                          hwrm_total_timeout(i),
+                                          le16_to_cpu(ctx->req->req_type),
+                                          le16_to_cpu(ctx->req->seq_id), len);
+                       goto exit;
+               }
+
+               /* Last byte of resp contains valid bit */
+               valid = ((u8 *)ctx->resp) + len - 1;
+               for (j = 0; j < HWRM_VALID_BIT_DELAY_USEC; j++) {
+                       /* make sure we read from updated DMA memory */
+                       dma_rmb();
+                       if (*valid)
+                               break;
+                       usleep_range(1, 5);
+               }
+
+               if (j >= HWRM_VALID_BIT_DELAY_USEC) {
+                       if (!(ctx->flags & BNXT_HWRM_CTX_SILENT))
+                               netdev_err(bp->dev, "Error (timeout: %u) msg {0x%x 0x%x} len:%d v:%d\n",
+                                          hwrm_total_timeout(i),
+                                          le16_to_cpu(ctx->req->req_type),
+                                          le16_to_cpu(ctx->req->seq_id), len,
+                                          *valid);
+                       goto exit;
+               }
+       }
+
+       /* Zero valid bit for compatibility.  Valid bit in an older spec
+        * may become a new field in a newer spec.  We must make sure that
+        * a new field not implemented by old spec will read zero.
+        */
+       *valid = 0;
+       rc = le16_to_cpu(ctx->resp->error_code);
+       if (rc && !(ctx->flags & BNXT_HWRM_CTX_SILENT)) {
+               netdev_err(bp->dev, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n",
+                          le16_to_cpu(ctx->resp->req_type),
+                          le16_to_cpu(ctx->resp->seq_id), rc);
+       }
+       rc = __hwrm_to_stderr(rc);
+exit:
+       if (token)
+               __hwrm_release_token(bp, token);
+       if (ctx->flags & BNXT_HWRM_INTERNAL_CTX_OWNED)
+               ctx->flags |= BNXT_HWRM_INTERNAL_RESP_DIRTY;
+       else
+               __hwrm_ctx_drop(bp, ctx);
+       return rc;
+}
+
+/**
+ * hwrm_req_send() - Execute an HWRM command.
+ * @bp: The driver context.
+ * @req: A pointer to the request to send. The DMA resources associated with
+ *     the request will be released (ie. the request will be consumed) unless
+ *     ownership of the request has been assumed by the caller via a call to
+ *     hwrm_req_hold().
+ *
+ * Send an HWRM request to the device and wait for a response. The request is
+ * consumed if it is not owned by the caller. This function will block until
+ * the request has either completed or times out due to an error.
+ *
+ * Return: A result code.
+ *
+ * The result is zero on success, otherwise the negative error code indicates
+ * one of the following errors:
+ *     E2BIG: The request was too large.
+ *     EBUSY: The firmware is in a fatal state or the request timed out
+ *     EACCESS: HWRM access denied.
+ *     ENOSPC: HWRM resource allocation error.
+ *     EINVAL: Request parameters are invalid.
+ *     ENOMEM: HWRM has no buffers.
+ *     EAGAIN: HWRM busy or reset in progress.
+ *     EOPNOTSUPP: Invalid request type.
+ *     EIO: Any other error.
+ * Error handling is orthogonal to request ownership. An unowned request will
+ * still be consumed on error. If the caller owns the request, then the caller
+ * is responsible for releasing the resources. Otherwise, hwrm_req_send() will
+ * always consume the request.
+ */
+int hwrm_req_send(struct bnxt *bp, void *req)
+{
+       struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+
+       if (!ctx)
+               return -EINVAL;
+
+       return __hwrm_send(bp, ctx);
+}
+
+/**
+ * hwrm_req_send_silent() - A silent version of hwrm_req_send().
+ * @bp: The driver context.
+ * @req: The request to send without logging.
+ *
+ * The same as hwrm_req_send(), except that the request is silenced using
+ * hwrm_req_silence() prior the call. This version of the function is
+ * provided solely to preserve the legacy API’s flavor for this functionality.
+ *
+ * Return: A result code, see hwrm_req_send().
+ */
+int hwrm_req_send_silent(struct bnxt *bp, void *req)
+{
+       hwrm_req_flags(bp, req, BNXT_HWRM_CTX_SILENT);
+       return hwrm_req_send(bp, req);
+}
+
+/**
+ * hwrm_req_dma_slice() - Allocate a slice of DMA mapped memory.
+ * @bp: The driver context.
+ * @req: The request for which indirect data will be associated.
+ * @size: The size of the allocation.
+ * @dma: The bus address associated with the allocation. The HWRM API has no
+ *     knowledge about the type of the request and so cannot infer how the
+ *     caller intends to use the indirect data. Thus, the caller is
+ *     responsible for configuring the request object appropriately to
+ *     point to the associated indirect memory. Note, DMA handle has the
+ *     same definition as it does in dma_alloc_coherent(), the caller is
+ *     responsible for endian conversions via cpu_to_le64() before assigning
+ *     this address.
+ *
+ * Allocates DMA mapped memory for indirect data related to a request. The
+ * lifetime of the DMA resources will be bound to that of the request (ie.
+ * they will be automatically released when the request is either consumed by
+ * hwrm_req_send() or dropped by hwrm_req_drop()). Small allocations are
+ * efficiently suballocated out of the request buffer space, hence the name
+ * slice, while larger requests are satisfied via an underlying call to
+ * dma_alloc_coherent(). Multiple suballocations are supported, however, only
+ * one externally mapped region is.
+ *
+ * Return: The kernel virtual address of the DMA mapping.
+ */
+void *
+hwrm_req_dma_slice(struct bnxt *bp, void *req, u32 size, dma_addr_t *dma_handle)
+{
+       struct bnxt_hwrm_ctx *ctx = __hwrm_ctx(bp, req);
+       u8 *end = ((u8 *)req) + BNXT_HWRM_DMA_SIZE;
+       struct input *input = req;
+       u8 *addr, *req_addr = req;
+       u32 max_offset, offset;
+
+       if (!ctx)
+               return NULL;
+
+       max_offset = BNXT_HWRM_DMA_SIZE - ctx->allocated;
+       offset = max_offset - size;
+       offset = ALIGN_DOWN(offset, BNXT_HWRM_DMA_ALIGN);
+       addr = req_addr + offset;
+
+       if (addr < req_addr + max_offset && req_addr + ctx->req_len <= addr) {
+               ctx->allocated = end - addr;
+               *dma_handle = ctx->dma_handle + offset;
+               return addr;
+       }
+
+       /* could not suballocate from ctx buffer, try create a new mapping */
+       if (ctx->slice_addr) {
+               /* if one exists, can only be due to software bug, be loud */
+               netdev_err(bp->dev, "HWRM refusing to reallocate DMA slice, req_type = %u\n",
+                          (u32)le16_to_cpu(input->req_type));
+               dump_stack();
+               return NULL;
+       }
+
+       addr = dma_alloc_coherent(&bp->pdev->dev, size, dma_handle, ctx->gfp);
+
+       if (!addr)
+               return NULL;
+
+       ctx->slice_addr = addr;
+       ctx->slice_size = size;
+       ctx->slice_handle = *dma_handle;
+
+       return addr;
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.h
new file mode 100644 (file)
index 0000000..4d17f0d
--- /dev/null
@@ -0,0 +1,145 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2020 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_HWRM_H
+#define BNXT_HWRM_H
+
+#include "bnxt_hsi.h"
+
+enum bnxt_hwrm_ctx_flags {
+       /* Update the HWRM_API_FLAGS right below for any new non-internal bit added here */
+       BNXT_HWRM_INTERNAL_CTX_OWNED    = BIT(0), /* caller owns the context */
+       BNXT_HWRM_INTERNAL_RESP_DIRTY   = BIT(1), /* response contains data */
+       BNXT_HWRM_CTX_SILENT            = BIT(2), /* squelch firmware errors */
+       BNXT_HWRM_FULL_WAIT             = BIT(3), /* wait for full timeout of HWRM command */
+};
+
+#define HWRM_API_FLAGS (BNXT_HWRM_CTX_SILENT | BNXT_HWRM_FULL_WAIT)
+
+struct bnxt_hwrm_ctx {
+       u64 sentinel;
+       dma_addr_t dma_handle;
+       struct output *resp;
+       struct input *req;
+       dma_addr_t slice_handle;
+       void *slice_addr;
+       u32 slice_size;
+       u32 req_len;
+       enum bnxt_hwrm_ctx_flags flags;
+       unsigned int timeout;
+       u32 allocated;
+       gfp_t gfp;
+};
+
+enum bnxt_hwrm_wait_state {
+       BNXT_HWRM_PENDING,
+       BNXT_HWRM_DEFERRED,
+       BNXT_HWRM_COMPLETE,
+       BNXT_HWRM_CANCELLED,
+};
+
+enum bnxt_hwrm_chnl { BNXT_HWRM_CHNL_CHIMP, BNXT_HWRM_CHNL_KONG };
+
+struct bnxt_hwrm_wait_token {
+       struct rcu_head rcu;
+       struct hlist_node node;
+       enum bnxt_hwrm_wait_state state;
+       enum bnxt_hwrm_chnl dst;
+       u16 seq_id;
+};
+
+void hwrm_update_token(struct bnxt *bp, u16 seq, enum bnxt_hwrm_wait_state s);
+
+#define BNXT_HWRM_MAX_REQ_LEN          (bp->hwrm_max_req_len)
+#define BNXT_HWRM_SHORT_REQ_LEN                sizeof(struct hwrm_short_input)
+#define HWRM_CMD_MAX_TIMEOUT           40000
+#define SHORT_HWRM_CMD_TIMEOUT         20
+#define HWRM_CMD_TIMEOUT               (bp->hwrm_cmd_timeout)
+#define HWRM_RESET_TIMEOUT             ((HWRM_CMD_TIMEOUT) * 4)
+#define HWRM_COREDUMP_TIMEOUT          ((HWRM_CMD_TIMEOUT) * 12)
+#define BNXT_HWRM_TARGET               0xffff
+#define BNXT_HWRM_NO_CMPL_RING         -1
+#define BNXT_HWRM_REQ_MAX_SIZE         128
+#define BNXT_HWRM_DMA_SIZE             (2 * PAGE_SIZE) /* space for req+resp */
+#define BNXT_HWRM_RESP_RESERVED                PAGE_SIZE
+#define BNXT_HWRM_RESP_OFFSET          (BNXT_HWRM_DMA_SIZE -           \
+                                        BNXT_HWRM_RESP_RESERVED)
+#define BNXT_HWRM_CTX_OFFSET           (BNXT_HWRM_RESP_OFFSET -        \
+                                        sizeof(struct bnxt_hwrm_ctx))
+#define BNXT_HWRM_DMA_ALIGN            16
+#define BNXT_HWRM_SENTINEL             0xb6e1f68a12e9a7eb /* arbitrary value */
+#define BNXT_HWRM_REQS_PER_PAGE                (BNXT_PAGE_SIZE /       \
+                                        BNXT_HWRM_REQ_MAX_SIZE)
+#define HWRM_SHORT_MIN_TIMEOUT         3
+#define HWRM_SHORT_MAX_TIMEOUT         10
+#define HWRM_SHORT_TIMEOUT_COUNTER     5
+
+#define HWRM_MIN_TIMEOUT               25
+#define HWRM_MAX_TIMEOUT               40
+
+#define HWRM_WAIT_MUST_ABORT(bp, ctx)                                  \
+       (le16_to_cpu((ctx)->req->req_type) != HWRM_VER_GET &&           \
+        !bnxt_is_fw_healthy(bp))
+
+static inline unsigned int hwrm_total_timeout(unsigned int n)
+{
+       return n <= HWRM_SHORT_TIMEOUT_COUNTER ? n * HWRM_SHORT_MIN_TIMEOUT :
+               HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT +
+               (n - HWRM_SHORT_TIMEOUT_COUNTER) * HWRM_MIN_TIMEOUT;
+}
+
+
+#define HWRM_VALID_BIT_DELAY_USEC      150
+
+static inline bool bnxt_cfa_hwrm_message(u16 req_type)
+{
+       switch (req_type) {
+       case HWRM_CFA_ENCAP_RECORD_ALLOC:
+       case HWRM_CFA_ENCAP_RECORD_FREE:
+       case HWRM_CFA_DECAP_FILTER_ALLOC:
+       case HWRM_CFA_DECAP_FILTER_FREE:
+       case HWRM_CFA_EM_FLOW_ALLOC:
+       case HWRM_CFA_EM_FLOW_FREE:
+       case HWRM_CFA_EM_FLOW_CFG:
+       case HWRM_CFA_FLOW_ALLOC:
+       case HWRM_CFA_FLOW_FREE:
+       case HWRM_CFA_FLOW_INFO:
+       case HWRM_CFA_FLOW_FLUSH:
+       case HWRM_CFA_FLOW_STATS:
+       case HWRM_CFA_METER_PROFILE_ALLOC:
+       case HWRM_CFA_METER_PROFILE_FREE:
+       case HWRM_CFA_METER_PROFILE_CFG:
+       case HWRM_CFA_METER_INSTANCE_ALLOC:
+       case HWRM_CFA_METER_INSTANCE_FREE:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static inline bool bnxt_kong_hwrm_message(struct bnxt *bp, struct input *req)
+{
+       return (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL &&
+               (bnxt_cfa_hwrm_message(le16_to_cpu(req->req_type)) ||
+                le16_to_cpu(req->target_id) == HWRM_TARGET_ID_KONG));
+}
+
+int __hwrm_req_init(struct bnxt *bp, void **req, u16 req_type, u32 req_len);
+#define hwrm_req_init(bp, req, req_type) \
+       __hwrm_req_init((bp), (void **)&(req), (req_type), sizeof(*(req)))
+void *hwrm_req_hold(struct bnxt *bp, void *req);
+void hwrm_req_drop(struct bnxt *bp, void *req);
+void hwrm_req_flags(struct bnxt *bp, void *req, enum bnxt_hwrm_ctx_flags flags);
+void hwrm_req_timeout(struct bnxt *bp, void *req, unsigned int timeout);
+int hwrm_req_send(struct bnxt *bp, void *req);
+int hwrm_req_send_silent(struct bnxt *bp, void *req);
+int hwrm_req_replace(struct bnxt *bp, void *req, void *new_req, u32 len);
+void hwrm_req_alloc_flags(struct bnxt *bp, void *req, gfp_t flags);
+void *hwrm_req_dma_slice(struct bnxt *bp, void *req, u32 size, dma_addr_t *dma);
+#endif
index 81f40ab..f0aa480 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/ptp_classify.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
+#include "bnxt_hwrm.h"
 #include "bnxt_ptp.h"
 
 int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off)
@@ -56,16 +57,19 @@ static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info,
 }
 
 /* Caller holds ptp_lock */
-static u64 bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts)
+static int bnxt_refclk_read(struct bnxt *bp, struct ptp_system_timestamp *sts,
+                           u64 *ns)
 {
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
-       u64 ns;
+
+       if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+               return -EIO;
 
        ptp_read_system_prets(sts);
-       ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
+       *ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
        ptp_read_system_postts(sts);
-       ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32;
-       return ns;
+       *ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32;
+       return 0;
 }
 
 static void bnxt_ptp_get_current_time(struct bnxt *bp)
@@ -76,30 +80,34 @@ static void bnxt_ptp_get_current_time(struct bnxt *bp)
                return;
        spin_lock_bh(&ptp->ptp_lock);
        WRITE_ONCE(ptp->old_time, ptp->current_time);
-       ptp->current_time = bnxt_refclk_read(bp, NULL);
+       bnxt_refclk_read(bp, NULL, &ptp->current_time);
        spin_unlock_bh(&ptp->ptp_lock);
 }
 
 static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts)
 {
-       struct hwrm_port_ts_query_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_port_ts_query_input req = {0};
+       struct hwrm_port_ts_query_output *resp;
+       struct hwrm_port_ts_query_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_TS_QUERY, -1, -1);
-       req.flags = cpu_to_le32(flags);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_TS_QUERY);
+       if (rc)
+               return rc;
+
+       req->flags = cpu_to_le32(flags);
        if ((flags & PORT_TS_QUERY_REQ_FLAGS_PATH) ==
            PORT_TS_QUERY_REQ_FLAGS_PATH_TX) {
-               req.enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES);
-               req.ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid);
-               req.ptp_hdr_offset = cpu_to_le16(bp->ptp_cfg->tx_hdr_off);
-               req.ts_req_timeout = cpu_to_le16(BNXT_PTP_QTS_TIMEOUT);
+               req->enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES);
+               req->ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid);
+               req->ptp_hdr_offset = cpu_to_le16(bp->ptp_cfg->tx_hdr_off);
+               req->ts_req_timeout = cpu_to_le16(BNXT_PTP_QTS_TIMEOUT);
        }
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+
+       rc = hwrm_req_send(bp, req);
        if (!rc)
                *ts = le64_to_cpu(resp->ptp_msg_ts);
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -110,9 +118,14 @@ static int bnxt_ptp_gettimex(struct ptp_clock_info *ptp_info,
        struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
                                                ptp_info);
        u64 ns, cycles;
+       int rc;
 
        spin_lock_bh(&ptp->ptp_lock);
-       cycles = bnxt_refclk_read(ptp->bp, sts);
+       rc = bnxt_refclk_read(ptp->bp, sts, &cycles);
+       if (rc) {
+               spin_unlock_bh(&ptp->ptp_lock);
+               return rc;
+       }
        ns = timecounter_cyc2time(&ptp->tc, cycles);
        spin_unlock_bh(&ptp->ptp_lock);
        *ts = ns_to_timespec64(ns);
@@ -135,33 +148,246 @@ static int bnxt_ptp_adjfreq(struct ptp_clock_info *ptp_info, s32 ppb)
 {
        struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
                                                ptp_info);
-       struct hwrm_port_mac_cfg_input req = {0};
+       struct hwrm_port_mac_cfg_input *req;
        struct bnxt *bp = ptp->bp;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_MAC_CFG, -1, -1);
-       req.ptp_freq_adj_ppb = cpu_to_le32(ppb);
-       req.enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB);
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_CFG);
+       if (rc)
+               return rc;
+
+       req->ptp_freq_adj_ppb = cpu_to_le32(ppb);
+       req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB);
+       rc = hwrm_req_send(ptp->bp, req);
        if (rc)
                netdev_err(ptp->bp->dev,
                           "ptp adjfreq failed. rc = %d\n", rc);
        return rc;
 }
 
-static int bnxt_ptp_enable(struct ptp_clock_info *ptp,
+void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+       struct ptp_clock_event event;
+       u64 ns, pps_ts;
+
+       pps_ts = EVENT_PPS_TS(data2, data1);
+       spin_lock_bh(&ptp->ptp_lock);
+       ns = timecounter_cyc2time(&ptp->tc, pps_ts);
+       spin_unlock_bh(&ptp->ptp_lock);
+
+       switch (EVENT_DATA2_PPS_EVENT_TYPE(data2)) {
+       case ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_INTERNAL:
+               event.pps_times.ts_real = ns_to_timespec64(ns);
+               event.type = PTP_CLOCK_PPSUSR;
+               event.index = EVENT_DATA2_PPS_PIN_NUM(data2);
+               break;
+       case ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE_EXTERNAL:
+               event.timestamp = ns;
+               event.type = PTP_CLOCK_EXTTS;
+               event.index = EVENT_DATA2_PPS_PIN_NUM(data2);
+               break;
+       }
+
+       ptp_clock_event(bp->ptp_cfg->ptp_clock, &event);
+}
+
+static int bnxt_ptp_cfg_pin(struct bnxt *bp, u8 pin, u8 usage)
+{
+       struct hwrm_func_ptp_pin_cfg_input *req;
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+       u8 state = usage != BNXT_PPS_PIN_NONE;
+       u8 *pin_state, *pin_usg;
+       u32 enables;
+       int rc;
+
+       if (!TSIO_PIN_VALID(pin)) {
+               netdev_err(ptp->bp->dev, "1PPS: Invalid pin. Check pin-function configuration\n");
+               return -EOPNOTSUPP;
+       }
+
+       rc = hwrm_req_init(ptp->bp, req, HWRM_FUNC_PTP_PIN_CFG);
+       if (rc)
+               return rc;
+
+       enables = (FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN0_STATE |
+                  FUNC_PTP_PIN_CFG_REQ_ENABLES_PIN0_USAGE) << (pin * 2);
+       req->enables = cpu_to_le32(enables);
+
+       pin_state = &req->pin0_state;
+       pin_usg = &req->pin0_usage;
+
+       *(pin_state + (pin * 2)) = state;
+       *(pin_usg + (pin * 2)) = usage;
+
+       rc = hwrm_req_send(ptp->bp, req);
+       if (rc)
+               return rc;
+
+       ptp->pps_info.pins[pin].usage = usage;
+       ptp->pps_info.pins[pin].state = state;
+
+       return 0;
+}
+
+static int bnxt_ptp_cfg_event(struct bnxt *bp, u8 event)
+{
+       struct hwrm_func_ptp_cfg_input *req;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG);
+       if (rc)
+               return rc;
+
+       req->enables = cpu_to_le16(FUNC_PTP_CFG_REQ_ENABLES_PTP_PPS_EVENT);
+       req->ptp_pps_event = event;
+       return hwrm_req_send(bp, req);
+}
+
+void bnxt_ptp_reapply_pps(struct bnxt *bp)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+       struct bnxt_pps *pps;
+       u32 pin = 0;
+       int rc;
+
+       if (!ptp || !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) ||
+           !(ptp->ptp_info.pin_config))
+               return;
+       pps = &ptp->pps_info;
+       for (pin = 0; pin < BNXT_MAX_TSIO_PINS; pin++) {
+               if (pps->pins[pin].state) {
+                       rc = bnxt_ptp_cfg_pin(bp, pin, pps->pins[pin].usage);
+                       if (!rc && pps->pins[pin].event)
+                               rc = bnxt_ptp_cfg_event(bp,
+                                                       pps->pins[pin].event);
+                       if (rc)
+                               netdev_err(bp->dev, "1PPS: Failed to configure pin%d\n",
+                                          pin);
+               }
+       }
+}
+
+static int bnxt_get_target_cycles(struct bnxt_ptp_cfg *ptp, u64 target_ns,
+                                 u64 *cycles_delta)
+{
+       u64 cycles_now;
+       u64 nsec_now, nsec_delta;
+       int rc;
+
+       spin_lock_bh(&ptp->ptp_lock);
+       rc = bnxt_refclk_read(ptp->bp, NULL, &cycles_now);
+       if (rc) {
+               spin_unlock_bh(&ptp->ptp_lock);
+               return rc;
+       }
+       nsec_now = timecounter_cyc2time(&ptp->tc, cycles_now);
+       spin_unlock_bh(&ptp->ptp_lock);
+
+       nsec_delta = target_ns - nsec_now;
+       *cycles_delta = div64_u64(nsec_delta << ptp->cc.shift, ptp->cc.mult);
+       return 0;
+}
+
+static int bnxt_ptp_perout_cfg(struct bnxt_ptp_cfg *ptp,
+                              struct ptp_clock_request *rq)
+{
+       struct hwrm_func_ptp_cfg_input *req;
+       struct bnxt *bp = ptp->bp;
+       struct timespec64 ts;
+       u64 target_ns, delta;
+       u16 enables;
+       int rc;
+
+       ts.tv_sec = rq->perout.start.sec;
+       ts.tv_nsec = rq->perout.start.nsec;
+       target_ns = timespec64_to_ns(&ts);
+
+       rc = bnxt_get_target_cycles(ptp, target_ns, &delta);
+       if (rc)
+               return rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_CFG);
+       if (rc)
+               return rc;
+
+       enables = FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PERIOD |
+                 FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_UP |
+                 FUNC_PTP_CFG_REQ_ENABLES_PTP_FREQ_ADJ_EXT_PHASE;
+       req->enables = cpu_to_le16(enables);
+       req->ptp_pps_event = 0;
+       req->ptp_freq_adj_dll_source = 0;
+       req->ptp_freq_adj_dll_phase = 0;
+       req->ptp_freq_adj_ext_period = cpu_to_le32(NSEC_PER_SEC);
+       req->ptp_freq_adj_ext_up = 0;
+       req->ptp_freq_adj_ext_phase_lower = cpu_to_le32(delta);
+
+       return hwrm_req_send(bp, req);
+}
+
+static int bnxt_ptp_enable(struct ptp_clock_info *ptp_info,
                           struct ptp_clock_request *rq, int on)
 {
-       return -EOPNOTSUPP;
+       struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
+                                               ptp_info);
+       struct bnxt *bp = ptp->bp;
+       u8 pin_id;
+       int rc;
+
+       switch (rq->type) {
+       case PTP_CLK_REQ_EXTTS:
+               /* Configure an External PPS IN */
+               pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_EXTTS,
+                                     rq->extts.index);
+               if (!on)
+                       break;
+               rc = bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_PPS_IN);
+               if (rc)
+                       return rc;
+               rc = bnxt_ptp_cfg_event(bp, BNXT_PPS_EVENT_EXTERNAL);
+               if (!rc)
+                       ptp->pps_info.pins[pin_id].event = BNXT_PPS_EVENT_EXTERNAL;
+               return rc;
+       case PTP_CLK_REQ_PEROUT:
+               /* Configure a Periodic PPS OUT */
+               pin_id = ptp_find_pin(ptp->ptp_clock, PTP_PF_PEROUT,
+                                     rq->perout.index);
+               if (!on)
+                       break;
+
+               rc = bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_PPS_OUT);
+               if (!rc)
+                       rc = bnxt_ptp_perout_cfg(ptp, rq);
+
+               return rc;
+       case PTP_CLK_REQ_PPS:
+               /* Configure PHC PPS IN */
+               rc = bnxt_ptp_cfg_pin(bp, 0, BNXT_PPS_PIN_PPS_IN);
+               if (rc)
+                       return rc;
+               rc = bnxt_ptp_cfg_event(bp, BNXT_PPS_EVENT_INTERNAL);
+               if (!rc)
+                       ptp->pps_info.pins[0].event = BNXT_PPS_EVENT_INTERNAL;
+               return rc;
+       default:
+               netdev_err(ptp->bp->dev, "Unrecognized PIN function\n");
+               return -EOPNOTSUPP;
+       }
+
+       return bnxt_ptp_cfg_pin(bp, pin_id, BNXT_PPS_PIN_NONE);
 }
 
 static int bnxt_hwrm_ptp_cfg(struct bnxt *bp)
 {
-       struct hwrm_port_mac_cfg_input req = {0};
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+       struct hwrm_port_mac_cfg_input *req;
        u32 flags = 0;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_CFG);
+       if (rc)
+               return rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_MAC_CFG, -1, -1);
        if (ptp->rx_filter)
                flags |= PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_ENABLE;
        else
@@ -170,11 +396,11 @@ static int bnxt_hwrm_ptp_cfg(struct bnxt *bp)
                flags |= PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_ENABLE;
        else
                flags |= PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE;
-       req.flags = cpu_to_le32(flags);
-       req.enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE);
-       req.rx_ts_capture_ptp_msg_type = cpu_to_le16(ptp->rxctl);
+       req->flags = cpu_to_le32(flags);
+       req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE);
+       req->rx_ts_capture_ptp_msg_type = cpu_to_le16(ptp->rxctl);
 
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
 int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
@@ -311,8 +537,10 @@ static void bnxt_unmap_ptp_regs(struct bnxt *bp)
 static u64 bnxt_cc_read(const struct cyclecounter *cc)
 {
        struct bnxt_ptp_cfg *ptp = container_of(cc, struct bnxt_ptp_cfg, cc);
+       u64 ns = 0;
 
-       return bnxt_refclk_read(ptp->bp, NULL);
+       bnxt_refclk_read(ptp->bp, NULL, &ns);
+       return ns;
 }
 
 static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb)
@@ -410,6 +638,87 @@ static const struct ptp_clock_info bnxt_ptp_caps = {
        .enable         = bnxt_ptp_enable,
 };
 
+static int bnxt_ptp_verify(struct ptp_clock_info *ptp_info, unsigned int pin,
+                          enum ptp_pin_function func, unsigned int chan)
+{
+       struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
+                                               ptp_info);
+       /* Allow only PPS pin function configuration */
+       if (ptp->pps_info.pins[pin].usage <= BNXT_PPS_PIN_PPS_OUT &&
+           func != PTP_PF_PHYSYNC)
+               return 0;
+       else
+               return -EOPNOTSUPP;
+}
+
+static int bnxt_ptp_pps_init(struct bnxt *bp)
+{
+       struct hwrm_func_ptp_pin_qcfg_output *resp;
+       struct hwrm_func_ptp_pin_qcfg_input *req;
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+       struct ptp_clock_info *ptp_info;
+       struct bnxt_pps *pps_info;
+       u8 *pin_usg;
+       u32 i, rc;
+
+       /* Query current/default PIN CFG */
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_PTP_PIN_QCFG);
+       if (rc)
+               return rc;
+
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
+       if (rc || !resp->num_pins) {
+               hwrm_req_drop(bp, req);
+               return -EOPNOTSUPP;
+       }
+
+       ptp_info = &ptp->ptp_info;
+       pps_info = &ptp->pps_info;
+       pps_info->num_pins = resp->num_pins;
+       ptp_info->n_pins = pps_info->num_pins;
+       ptp_info->pin_config = kcalloc(ptp_info->n_pins,
+                                      sizeof(*ptp_info->pin_config),
+                                      GFP_KERNEL);
+       if (!ptp_info->pin_config) {
+               hwrm_req_drop(bp, req);
+               return -ENOMEM;
+       }
+
+       /* Report the TSIO capability to kernel */
+       pin_usg = &resp->pin0_usage;
+       for (i = 0; i < pps_info->num_pins; i++, pin_usg++) {
+               snprintf(ptp_info->pin_config[i].name,
+                        sizeof(ptp_info->pin_config[i].name), "bnxt_pps%d", i);
+               ptp_info->pin_config[i].index = i;
+               ptp_info->pin_config[i].chan = i;
+               if (*pin_usg == BNXT_PPS_PIN_PPS_IN)
+                       ptp_info->pin_config[i].func = PTP_PF_EXTTS;
+               else if (*pin_usg == BNXT_PPS_PIN_PPS_OUT)
+                       ptp_info->pin_config[i].func = PTP_PF_PEROUT;
+               else
+                       ptp_info->pin_config[i].func = PTP_PF_NONE;
+
+               pps_info->pins[i].usage = *pin_usg;
+       }
+       hwrm_req_drop(bp, req);
+
+       /* Only 1 each of ext_ts and per_out pins is available in HW */
+       ptp_info->n_ext_ts = 1;
+       ptp_info->n_per_out = 1;
+       ptp_info->pps = 1;
+       ptp_info->verify = bnxt_ptp_verify;
+
+       return 0;
+}
+
+static bool bnxt_pps_config_ok(struct bnxt *bp)
+{
+       struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+
+       return !(bp->fw_cap & BNXT_FW_CAP_PTP_PPS) == !ptp->ptp_info.pin_config;
+}
+
 int bnxt_ptp_init(struct bnxt *bp)
 {
        struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
@@ -422,6 +731,15 @@ int bnxt_ptp_init(struct bnxt *bp)
        if (rc)
                return rc;
 
+       if (ptp->ptp_clock && bnxt_pps_config_ok(bp))
+               return 0;
+
+       if (ptp->ptp_clock) {
+               ptp_clock_unregister(ptp->ptp_clock);
+               ptp->ptp_clock = NULL;
+               kfree(ptp->ptp_info.pin_config);
+               ptp->ptp_info.pin_config = NULL;
+       }
        atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
        spin_lock_init(&ptp->ptp_lock);
 
@@ -435,6 +753,10 @@ int bnxt_ptp_init(struct bnxt *bp)
        timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
 
        ptp->ptp_info = bnxt_ptp_caps;
+       if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) {
+               if (bnxt_ptp_pps_init(bp))
+                       netdev_err(bp->dev, "1pps not initialized, continuing without 1pps support\n");
+       }
        ptp->ptp_clock = ptp_clock_register(&ptp->ptp_info, &bp->pdev->dev);
        if (IS_ERR(ptp->ptp_clock)) {
                int err = PTR_ERR(ptp->ptp_clock);
@@ -445,7 +767,7 @@ int bnxt_ptp_init(struct bnxt *bp)
        }
        if (bp->flags & BNXT_FLAG_CHIP_P5) {
                spin_lock_bh(&ptp->ptp_lock);
-               ptp->current_time = bnxt_refclk_read(bp, NULL);
+               bnxt_refclk_read(bp, NULL, &ptp->current_time);
                WRITE_ONCE(ptp->old_time, ptp->current_time);
                spin_unlock_bh(&ptp->ptp_lock);
                ptp_schedule_worker(ptp->ptp_clock, 0);
@@ -464,6 +786,9 @@ void bnxt_ptp_clear(struct bnxt *bp)
                ptp_clock_unregister(ptp->ptp_clock);
 
        ptp->ptp_clock = NULL;
+       kfree(ptp->ptp_info.pin_config);
+       ptp->ptp_info.pin_config = NULL;
+
        if (ptp->tx_skb) {
                dev_kfree_skb_any(ptp->tx_skb);
                ptp->tx_skb = NULL;
index 524f1c2..fa5f057 100644 (file)
                                 PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT | \
                                 PORT_TS_QUERY_REQ_ENABLES_PTP_HDR_OFFSET)
 
+struct pps_pin {
+       u8 event;
+       u8 usage;
+       u8 state;
+};
+
+#define TSIO_PIN_VALID(pin) ((pin) < (BNXT_MAX_TSIO_PINS))
+
+#define EVENT_DATA2_PPS_EVENT_TYPE(data2)                              \
+       ((data2) & ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_EVENT_TYPE)
+
+#define EVENT_DATA2_PPS_PIN_NUM(data2)                                 \
+       (((data2) &                                                     \
+         ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PIN_NUMBER_MASK) >>\
+        ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PIN_NUMBER_SFT)
+
+#define BNXT_DATA2_UPPER_MSK                                           \
+       ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PPS_TIMESTAMP_UPPER_MASK
+
+#define BNXT_DATA2_UPPER_SFT                                           \
+       (32 -                                                           \
+        ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA2_PPS_TIMESTAMP_UPPER_SFT)
+
+#define BNXT_DATA1_LOWER_MSK                                           \
+       ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA1_PPS_TIMESTAMP_LOWER_MASK
+
+#define BNXT_DATA1_LOWER_SFT                                           \
+         ASYNC_EVENT_CMPL_PPS_TIMESTAMP_EVENT_DATA1_PPS_TIMESTAMP_LOWER_SFT
+
+#define EVENT_PPS_TS(data2, data1)                                     \
+       (((u64)((data2) & BNXT_DATA2_UPPER_MSK) << BNXT_DATA2_UPPER_SFT) |\
+        (((data1) & BNXT_DATA1_LOWER_MSK) >> BNXT_DATA1_LOWER_SFT))
+
+#define BNXT_PPS_PIN_DISABLE   0
+#define BNXT_PPS_PIN_ENABLE    1
+#define BNXT_PPS_PIN_NONE      0
+#define BNXT_PPS_PIN_PPS_IN    1
+#define BNXT_PPS_PIN_PPS_OUT   2
+#define BNXT_PPS_PIN_SYNC_IN   3
+#define BNXT_PPS_PIN_SYNC_OUT  4
+
+#define BNXT_PPS_EVENT_INTERNAL        1
+#define BNXT_PPS_EVENT_EXTERNAL        2
+
+struct bnxt_pps {
+       u8 num_pins;
+#define BNXT_MAX_TSIO_PINS     4
+       struct pps_pin pins[BNXT_MAX_TSIO_PINS];
+};
+
 struct bnxt_ptp_cfg {
        struct ptp_clock_info   ptp_info;
        struct ptp_clock        *ptp_clock;
        struct cyclecounter     cc;
        struct timecounter      tc;
+       struct bnxt_pps         pps_info;
        /* serialize timecounter access */
        spinlock_t              ptp_lock;
        struct sk_buff          *tx_skb;
@@ -77,6 +128,8 @@ do {                                         \
 #endif
 
 int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off);
+void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2);
+void bnxt_ptp_reapply_pps(struct bnxt *bp);
 int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr);
 int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr);
 int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb);
index 7fa881e..70d8ca3 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/etherdevice.h>
 #include "bnxt_hsi.h"
 #include "bnxt.h"
+#include "bnxt_hwrm.h"
 #include "bnxt_ulp.h"
 #include "bnxt_sriov.h"
 #include "bnxt_vfr.h"
 static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp,
                                          struct bnxt_vf_info *vf, u16 event_id)
 {
-       struct hwrm_fwd_async_event_cmpl_input req = {0};
+       struct hwrm_fwd_async_event_cmpl_input *req;
        struct hwrm_async_event_cmpl *async_cmpl;
        int rc = 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_FWD_ASYNC_EVENT_CMPL);
+       if (rc)
+               goto exit;
+
        if (vf)
-               req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid);
+               req->encap_async_event_target_id = cpu_to_le16(vf->fw_fid);
        else
                /* broadcast this async event to all VFs */
-               req.encap_async_event_target_id = cpu_to_le16(0xffff);
-       async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl;
+               req->encap_async_event_target_id = cpu_to_le16(0xffff);
+       async_cmpl =
+               (struct hwrm_async_event_cmpl *)req->encap_async_event_cmpl;
        async_cmpl->type = cpu_to_le16(ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT);
        async_cmpl->event_id = cpu_to_le16(event_id);
 
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_send(bp, req);
+exit:
        if (rc)
                netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n",
                           rc);
@@ -62,10 +68,10 @@ static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id)
 
 int bnxt_set_vf_spoofchk(struct net_device *dev, int vf_id, bool setting)
 {
-       struct hwrm_func_cfg_input req = {0};
        struct bnxt *bp = netdev_priv(dev);
-       struct bnxt_vf_info *vf;
+       struct hwrm_func_cfg_input *req;
        bool old_setting = false;
+       struct bnxt_vf_info *vf;
        u32 func_flags;
        int rc;
 
@@ -89,36 +95,38 @@ int bnxt_set_vf_spoofchk(struct net_device *dev, int vf_id, bool setting)
        /*TODO: if the driver supports VLAN filter on guest VLAN,
         * the spoof check should also include vlan anti-spoofing
         */
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-       req.fid = cpu_to_le16(vf->fw_fid);
-       req.flags = cpu_to_le32(func_flags);
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
        if (!rc) {
-               if (setting)
-                       vf->flags |= BNXT_VF_SPOOFCHK;
-               else
-                       vf->flags &= ~BNXT_VF_SPOOFCHK;
+               req->fid = cpu_to_le16(vf->fw_fid);
+               req->flags = cpu_to_le32(func_flags);
+               rc = hwrm_req_send(bp, req);
+               if (!rc) {
+                       if (setting)
+                               vf->flags |= BNXT_VF_SPOOFCHK;
+                       else
+                               vf->flags &= ~BNXT_VF_SPOOFCHK;
+               }
        }
        return rc;
 }
 
 static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf)
 {
-       struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_func_qcfg_input req = {0};
+       struct hwrm_func_qcfg_output *resp;
+       struct hwrm_func_qcfg_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
-       req.fid = cpu_to_le16(BNXT_PF(bp) ? vf->fw_fid : 0xffff);
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-       if (rc) {
-               mutex_unlock(&bp->hwrm_cmd_lock);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_QCFG);
+       if (rc)
                return rc;
-       }
-       vf->func_qcfg_flags = le16_to_cpu(resp->flags);
-       mutex_unlock(&bp->hwrm_cmd_lock);
-       return 0;
+
+       req->fid = cpu_to_le16(BNXT_PF(bp) ? vf->fw_fid : 0xffff);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
+       if (!rc)
+               vf->func_qcfg_flags = le16_to_cpu(resp->flags);
+       hwrm_req_drop(bp, req);
+       return rc;
 }
 
 bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
@@ -132,18 +140,22 @@ bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
 
 static int bnxt_hwrm_set_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
 {
-       struct hwrm_func_cfg_input req = {0};
+       struct hwrm_func_cfg_input *req;
+       int rc;
 
        if (!(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF))
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-       req.fid = cpu_to_le16(vf->fw_fid);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+       if (rc)
+               return rc;
+
+       req->fid = cpu_to_le16(vf->fw_fid);
        if (vf->flags & BNXT_VF_TRUST)
-               req.flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE);
+               req->flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE);
        else
-               req.flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_DISABLE);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               req->flags = cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_DISABLE);
+       return hwrm_req_send(bp, req);
 }
 
 int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trusted)
@@ -203,8 +215,8 @@ int bnxt_get_vf_config(struct net_device *dev, int vf_id,
 
 int bnxt_set_vf_mac(struct net_device *dev, int vf_id, u8 *mac)
 {
-       struct hwrm_func_cfg_input req = {0};
        struct bnxt *bp = netdev_priv(dev);
+       struct hwrm_func_cfg_input *req;
        struct bnxt_vf_info *vf;
        int rc;
 
@@ -220,19 +232,23 @@ int bnxt_set_vf_mac(struct net_device *dev, int vf_id, u8 *mac)
        }
        vf = &bp->pf.vf[vf_id];
 
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+       if (rc)
+               return rc;
+
        memcpy(vf->mac_addr, mac, ETH_ALEN);
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-       req.fid = cpu_to_le16(vf->fw_fid);
-       req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
-       memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+
+       req->fid = cpu_to_le16(vf->fw_fid);
+       req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
+       memcpy(req->dflt_mac_addr, mac, ETH_ALEN);
+       return hwrm_req_send(bp, req);
 }
 
 int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos,
                     __be16 vlan_proto)
 {
-       struct hwrm_func_cfg_input req = {0};
        struct bnxt *bp = netdev_priv(dev);
+       struct hwrm_func_cfg_input *req;
        struct bnxt_vf_info *vf;
        u16 vlan_tag;
        int rc;
@@ -258,21 +274,23 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos,
        if (vlan_tag == vf->vlan)
                return 0;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-       req.fid = cpu_to_le16(vf->fw_fid);
-       req.dflt_vlan = cpu_to_le16(vlan_tag);
-       req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_VLAN);
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-       if (!rc)
-               vf->vlan = vlan_tag;
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+       if (!rc) {
+               req->fid = cpu_to_le16(vf->fw_fid);
+               req->dflt_vlan = cpu_to_le16(vlan_tag);
+               req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_VLAN);
+               rc = hwrm_req_send(bp, req);
+               if (!rc)
+                       vf->vlan = vlan_tag;
+       }
        return rc;
 }
 
 int bnxt_set_vf_bw(struct net_device *dev, int vf_id, int min_tx_rate,
                   int max_tx_rate)
 {
-       struct hwrm_func_cfg_input req = {0};
        struct bnxt *bp = netdev_priv(dev);
+       struct hwrm_func_cfg_input *req;
        struct bnxt_vf_info *vf;
        u32 pf_link_speed;
        int rc;
@@ -296,16 +314,18 @@ int bnxt_set_vf_bw(struct net_device *dev, int vf_id, int min_tx_rate,
        }
        if (min_tx_rate == vf->min_tx_rate && max_tx_rate == vf->max_tx_rate)
                return 0;
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-       req.fid = cpu_to_le16(vf->fw_fid);
-       req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_MAX_BW);
-       req.max_bw = cpu_to_le32(max_tx_rate);
-       req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_MIN_BW);
-       req.min_bw = cpu_to_le32(min_tx_rate);
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
        if (!rc) {
-               vf->min_tx_rate = min_tx_rate;
-               vf->max_tx_rate = max_tx_rate;
+               req->fid = cpu_to_le16(vf->fw_fid);
+               req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_MAX_BW |
+                                          FUNC_CFG_REQ_ENABLES_MIN_BW);
+               req->max_bw = cpu_to_le32(max_tx_rate);
+               req->min_bw = cpu_to_le32(min_tx_rate);
+               rc = hwrm_req_send(bp, req);
+               if (!rc) {
+                       vf->min_tx_rate = min_tx_rate;
+                       vf->max_tx_rate = max_tx_rate;
+               }
        }
        return rc;
 }
@@ -358,21 +378,22 @@ static int bnxt_set_vf_attr(struct bnxt *bp, int num_vfs)
 
 static int bnxt_hwrm_func_vf_resource_free(struct bnxt *bp, int num_vfs)
 {
-       int i, rc = 0;
+       struct hwrm_func_vf_resc_free_input *req;
        struct bnxt_pf_info *pf = &bp->pf;
-       struct hwrm_func_vf_resc_free_input req = {0};
+       int i, rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_RESC_FREE, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_VF_RESC_FREE);
+       if (rc)
+               return rc;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
+       hwrm_req_hold(bp, req);
        for (i = pf->first_vf_id; i < pf->first_vf_id + num_vfs; i++) {
-               req.vf_id = cpu_to_le16(i);
-               rc = _hwrm_send_message(bp, &req, sizeof(req),
-                                       HWRM_CMD_TIMEOUT);
+               req->vf_id = cpu_to_le16(i);
+               rc = hwrm_req_send(bp, req);
                if (rc)
                        break;
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -446,51 +467,55 @@ static int bnxt_alloc_vf_resources(struct bnxt *bp, int num_vfs)
 
 static int bnxt_hwrm_func_buf_rgtr(struct bnxt *bp)
 {
-       struct hwrm_func_buf_rgtr_input req = {0};
+       struct hwrm_func_buf_rgtr_input *req;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BUF_RGTR, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_BUF_RGTR);
+       if (rc)
+               return rc;
 
-       req.req_buf_num_pages = cpu_to_le16(bp->pf.hwrm_cmd_req_pages);
-       req.req_buf_page_size = cpu_to_le16(BNXT_PAGE_SHIFT);
-       req.req_buf_len = cpu_to_le16(BNXT_HWRM_REQ_MAX_SIZE);
-       req.req_buf_page_addr0 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[0]);
-       req.req_buf_page_addr1 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[1]);
-       req.req_buf_page_addr2 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[2]);
-       req.req_buf_page_addr3 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[3]);
+       req->req_buf_num_pages = cpu_to_le16(bp->pf.hwrm_cmd_req_pages);
+       req->req_buf_page_size = cpu_to_le16(BNXT_PAGE_SHIFT);
+       req->req_buf_len = cpu_to_le16(BNXT_HWRM_REQ_MAX_SIZE);
+       req->req_buf_page_addr0 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[0]);
+       req->req_buf_page_addr1 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[1]);
+       req->req_buf_page_addr2 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[2]);
+       req->req_buf_page_addr3 = cpu_to_le64(bp->pf.hwrm_cmd_req_dma_addr[3]);
 
-       return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
-/* Caller holds bp->hwrm_cmd_lock mutex lock */
-static void __bnxt_set_vf_params(struct bnxt *bp, int vf_id)
+static int __bnxt_set_vf_params(struct bnxt *bp, int vf_id)
 {
-       struct hwrm_func_cfg_input req = {0};
+       struct hwrm_func_cfg_input *req;
        struct bnxt_vf_info *vf;
+       int rc;
+
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+       if (rc)
+               return rc;
 
        vf = &bp->pf.vf[vf_id];
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
-       req.fid = cpu_to_le16(vf->fw_fid);
+       req->fid = cpu_to_le16(vf->fw_fid);
 
        if (is_valid_ether_addr(vf->mac_addr)) {
-               req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
-               memcpy(req.dflt_mac_addr, vf->mac_addr, ETH_ALEN);
+               req->enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
+               memcpy(req->dflt_mac_addr, vf->mac_addr, ETH_ALEN);
        }
        if (vf->vlan) {
-               req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_VLAN);
-               req.dflt_vlan = cpu_to_le16(vf->vlan);
+               req->enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_VLAN);
+               req->dflt_vlan = cpu_to_le16(vf->vlan);
        }
        if (vf->max_tx_rate) {
-               req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_MAX_BW);
-               req.max_bw = cpu_to_le32(vf->max_tx_rate);
-#ifdef HAVE_IFLA_TX_RATE
-               req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_MIN_BW);
-               req.min_bw = cpu_to_le32(vf->min_tx_rate);
-#endif
+               req->enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_MAX_BW |
+                                           FUNC_CFG_REQ_ENABLES_MIN_BW);
+               req->max_bw = cpu_to_le32(vf->max_tx_rate);
+               req->min_bw = cpu_to_le32(vf->min_tx_rate);
        }
        if (vf->flags & BNXT_VF_TRUST)
-               req.flags |= cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE);
+               req->flags |= cpu_to_le32(FUNC_CFG_REQ_FLAGS_TRUSTED_VF_ENABLE);
 
-       _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       return hwrm_req_send(bp, req);
 }
 
 /* Only called by PF to reserve resources for VFs, returns actual number of
@@ -498,7 +523,7 @@ static void __bnxt_set_vf_params(struct bnxt *bp, int vf_id)
  */
 static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
 {
-       struct hwrm_func_vf_resource_cfg_input req = {0};
+       struct hwrm_func_vf_resource_cfg_input *req;
        struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
        u16 vf_tx_rings, vf_rx_rings, vf_cp_rings;
        u16 vf_stat_ctx, vf_vnics, vf_ring_grps;
@@ -507,7 +532,9 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
        u16 vf_msix = 0;
        u16 vf_rss;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_RESOURCE_CFG, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_VF_RESOURCE_CFG);
+       if (rc)
+               return rc;
 
        if (bp->flags & BNXT_FLAG_CHIP_P5) {
                vf_msix = hw_resc->max_nqs - bnxt_nq_rings_in_use(bp);
@@ -526,21 +553,21 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
        vf_vnics = min_t(u16, vf_vnics, vf_rx_rings);
        vf_rss = hw_resc->max_rsscos_ctxs - bp->rsscos_nr_ctxs;
 
-       req.min_rsscos_ctx = cpu_to_le16(BNXT_VF_MIN_RSS_CTX);
+       req->min_rsscos_ctx = cpu_to_le16(BNXT_VF_MIN_RSS_CTX);
        if (pf->vf_resv_strategy == BNXT_VF_RESV_STRATEGY_MINIMAL_STATIC) {
                min = 0;
-               req.min_rsscos_ctx = cpu_to_le16(min);
+               req->min_rsscos_ctx = cpu_to_le16(min);
        }
        if (pf->vf_resv_strategy == BNXT_VF_RESV_STRATEGY_MINIMAL ||
            pf->vf_resv_strategy == BNXT_VF_RESV_STRATEGY_MINIMAL_STATIC) {
-               req.min_cmpl_rings = cpu_to_le16(min);
-               req.min_tx_rings = cpu_to_le16(min);
-               req.min_rx_rings = cpu_to_le16(min);
-               req.min_l2_ctxs = cpu_to_le16(min);
-               req.min_vnics = cpu_to_le16(min);
-               req.min_stat_ctx = cpu_to_le16(min);
+               req->min_cmpl_rings = cpu_to_le16(min);
+               req->min_tx_rings = cpu_to_le16(min);
+               req->min_rx_rings = cpu_to_le16(min);
+               req->min_l2_ctxs = cpu_to_le16(min);
+               req->min_vnics = cpu_to_le16(min);
+               req->min_stat_ctx = cpu_to_le16(min);
                if (!(bp->flags & BNXT_FLAG_CHIP_P5))
-                       req.min_hw_ring_grps = cpu_to_le16(min);
+                       req->min_hw_ring_grps = cpu_to_le16(min);
        } else {
                vf_cp_rings /= num_vfs;
                vf_tx_rings /= num_vfs;
@@ -550,56 +577,57 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
                vf_ring_grps /= num_vfs;
                vf_rss /= num_vfs;
 
-               req.min_cmpl_rings = cpu_to_le16(vf_cp_rings);
-               req.min_tx_rings = cpu_to_le16(vf_tx_rings);
-               req.min_rx_rings = cpu_to_le16(vf_rx_rings);
-               req.min_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
-               req.min_vnics = cpu_to_le16(vf_vnics);
-               req.min_stat_ctx = cpu_to_le16(vf_stat_ctx);
-               req.min_hw_ring_grps = cpu_to_le16(vf_ring_grps);
-               req.min_rsscos_ctx = cpu_to_le16(vf_rss);
+               req->min_cmpl_rings = cpu_to_le16(vf_cp_rings);
+               req->min_tx_rings = cpu_to_le16(vf_tx_rings);
+               req->min_rx_rings = cpu_to_le16(vf_rx_rings);
+               req->min_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
+               req->min_vnics = cpu_to_le16(vf_vnics);
+               req->min_stat_ctx = cpu_to_le16(vf_stat_ctx);
+               req->min_hw_ring_grps = cpu_to_le16(vf_ring_grps);
+               req->min_rsscos_ctx = cpu_to_le16(vf_rss);
        }
-       req.max_cmpl_rings = cpu_to_le16(vf_cp_rings);
-       req.max_tx_rings = cpu_to_le16(vf_tx_rings);
-       req.max_rx_rings = cpu_to_le16(vf_rx_rings);
-       req.max_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
-       req.max_vnics = cpu_to_le16(vf_vnics);
-       req.max_stat_ctx = cpu_to_le16(vf_stat_ctx);
-       req.max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
-       req.max_rsscos_ctx = cpu_to_le16(vf_rss);
+       req->max_cmpl_rings = cpu_to_le16(vf_cp_rings);
+       req->max_tx_rings = cpu_to_le16(vf_tx_rings);
+       req->max_rx_rings = cpu_to_le16(vf_rx_rings);
+       req->max_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
+       req->max_vnics = cpu_to_le16(vf_vnics);
+       req->max_stat_ctx = cpu_to_le16(vf_stat_ctx);
+       req->max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
+       req->max_rsscos_ctx = cpu_to_le16(vf_rss);
        if (bp->flags & BNXT_FLAG_CHIP_P5)
-               req.max_msix = cpu_to_le16(vf_msix / num_vfs);
+               req->max_msix = cpu_to_le16(vf_msix / num_vfs);
 
-       mutex_lock(&bp->hwrm_cmd_lock);
+       hwrm_req_hold(bp, req);
        for (i = 0; i < num_vfs; i++) {
                if (reset)
                        __bnxt_set_vf_params(bp, i);
 
-               req.vf_id = cpu_to_le16(pf->first_vf_id + i);
-               rc = _hwrm_send_message(bp, &req, sizeof(req),
-                                       HWRM_CMD_TIMEOUT);
+               req->vf_id = cpu_to_le16(pf->first_vf_id + i);
+               rc = hwrm_req_send(bp, req);
                if (rc)
                        break;
                pf->active_vfs = i + 1;
                pf->vf[i].fw_fid = pf->first_vf_id + i;
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+
        if (pf->active_vfs) {
                u16 n = pf->active_vfs;
 
-               hw_resc->max_tx_rings -= le16_to_cpu(req.min_tx_rings) * n;
-               hw_resc->max_rx_rings -= le16_to_cpu(req.min_rx_rings) * n;
-               hw_resc->max_hw_ring_grps -= le16_to_cpu(req.min_hw_ring_grps) *
-                                            n;
-               hw_resc->max_cp_rings -= le16_to_cpu(req.min_cmpl_rings) * n;
-               hw_resc->max_rsscos_ctxs -= le16_to_cpu(req.min_rsscos_ctx) * n;
-               hw_resc->max_stat_ctxs -= le16_to_cpu(req.min_stat_ctx) * n;
-               hw_resc->max_vnics -= le16_to_cpu(req.min_vnics) * n;
+               hw_resc->max_tx_rings -= le16_to_cpu(req->min_tx_rings) * n;
+               hw_resc->max_rx_rings -= le16_to_cpu(req->min_rx_rings) * n;
+               hw_resc->max_hw_ring_grps -=
+                       le16_to_cpu(req->min_hw_ring_grps) * n;
+               hw_resc->max_cp_rings -= le16_to_cpu(req->min_cmpl_rings) * n;
+               hw_resc->max_rsscos_ctxs -=
+                       le16_to_cpu(req->min_rsscos_ctx) * n;
+               hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n;
+               hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n;
                if (bp->flags & BNXT_FLAG_CHIP_P5)
                        hw_resc->max_irqs -= vf_msix * n;
 
                rc = pf->active_vfs;
        }
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -608,15 +636,18 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
  */
 static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs)
 {
-       u32 rc = 0, mtu, i;
        u16 vf_tx_rings, vf_rx_rings, vf_cp_rings, vf_stat_ctx, vf_vnics;
        struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
-       struct hwrm_func_cfg_input req = {0};
        struct bnxt_pf_info *pf = &bp->pf;
+       struct hwrm_func_cfg_input *req;
        int total_vf_tx_rings = 0;
        u16 vf_ring_grps;
+       u32 mtu, i;
+       int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_CFG);
+       if (rc)
+               return rc;
 
        /* Remaining rings are distributed equally amongs VF's for now */
        vf_cp_rings = bnxt_get_avail_cp_rings_for_en(bp) / num_vfs;
@@ -632,50 +663,49 @@ static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs)
        vf_vnics = (hw_resc->max_vnics - bp->nr_vnics) / num_vfs;
        vf_vnics = min_t(u16, vf_vnics, vf_rx_rings);
 
-       req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_ADMIN_MTU |
-                                 FUNC_CFG_REQ_ENABLES_MRU |
-                                 FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS |
-                                 FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS |
-                                 FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
-                                 FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS |
-                                 FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS |
-                                 FUNC_CFG_REQ_ENABLES_NUM_L2_CTXS |
-                                 FUNC_CFG_REQ_ENABLES_NUM_VNICS |
-                                 FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS);
+       req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_ADMIN_MTU |
+                                  FUNC_CFG_REQ_ENABLES_MRU |
+                                  FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS |
+                                  FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS |
+                                  FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
+                                  FUNC_CFG_REQ_ENABLES_NUM_TX_RINGS |
+                                  FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS |
+                                  FUNC_CFG_REQ_ENABLES_NUM_L2_CTXS |
+                                  FUNC_CFG_REQ_ENABLES_NUM_VNICS |
+                                  FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS);
 
        mtu = bp->dev->mtu + ETH_HLEN + VLAN_HLEN;
-       req.mru = cpu_to_le16(mtu);
-       req.admin_mtu = cpu_to_le16(mtu);
+       req->mru = cpu_to_le16(mtu);
+       req->admin_mtu = cpu_to_le16(mtu);
 
-       req.num_rsscos_ctxs = cpu_to_le16(1);
-       req.num_cmpl_rings = cpu_to_le16(vf_cp_rings);
-       req.num_tx_rings = cpu_to_le16(vf_tx_rings);
-       req.num_rx_rings = cpu_to_le16(vf_rx_rings);
-       req.num_hw_ring_grps = cpu_to_le16(vf_ring_grps);
-       req.num_l2_ctxs = cpu_to_le16(4);
+       req->num_rsscos_ctxs = cpu_to_le16(1);
+       req->num_cmpl_rings = cpu_to_le16(vf_cp_rings);
+       req->num_tx_rings = cpu_to_le16(vf_tx_rings);
+       req->num_rx_rings = cpu_to_le16(vf_rx_rings);
+       req->num_hw_ring_grps = cpu_to_le16(vf_ring_grps);
+       req->num_l2_ctxs = cpu_to_le16(4);
 
-       req.num_vnics = cpu_to_le16(vf_vnics);
+       req->num_vnics = cpu_to_le16(vf_vnics);
        /* FIXME spec currently uses 1 bit for stats ctx */
-       req.num_stat_ctxs = cpu_to_le16(vf_stat_ctx);
+       req->num_stat_ctxs = cpu_to_le16(vf_stat_ctx);
 
-       mutex_lock(&bp->hwrm_cmd_lock);
+       hwrm_req_hold(bp, req);
        for (i = 0; i < num_vfs; i++) {
                int vf_tx_rsvd = vf_tx_rings;
 
-               req.fid = cpu_to_le16(pf->first_vf_id + i);
-               rc = _hwrm_send_message(bp, &req, sizeof(req),
-                                       HWRM_CMD_TIMEOUT);
+               req->fid = cpu_to_le16(pf->first_vf_id + i);
+               rc = hwrm_req_send(bp, req);
                if (rc)
                        break;
                pf->active_vfs = i + 1;
-               pf->vf[i].fw_fid = le16_to_cpu(req.fid);
+               pf->vf[i].fw_fid = le16_to_cpu(req->fid);
                rc = __bnxt_hwrm_get_tx_rings(bp, pf->vf[i].fw_fid,
                                              &vf_tx_rsvd);
                if (rc)
                        break;
                total_vf_tx_rings += vf_tx_rsvd;
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        if (pf->active_vfs) {
                hw_resc->max_tx_rings -= total_vf_tx_rings;
                hw_resc->max_rx_rings -= vf_rx_rings * num_vfs;
@@ -893,23 +923,24 @@ static int bnxt_hwrm_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
                              void *encap_resp, __le64 encap_resp_addr,
                              __le16 encap_resp_cpr, u32 msg_size)
 {
-       int rc = 0;
-       struct hwrm_fwd_resp_input req = {0};
+       struct hwrm_fwd_resp_input *req;
+       int rc;
 
        if (BNXT_FWD_RESP_SIZE_ERR(msg_size))
                return -EINVAL;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_RESP, -1, -1);
-
-       /* Set the new target id */
-       req.target_id = cpu_to_le16(vf->fw_fid);
-       req.encap_resp_target_id = cpu_to_le16(vf->fw_fid);
-       req.encap_resp_len = cpu_to_le16(msg_size);
-       req.encap_resp_addr = encap_resp_addr;
-       req.encap_resp_cmpl_ring = encap_resp_cpr;
-       memcpy(req.encap_resp, encap_resp, msg_size);
-
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_FWD_RESP);
+       if (!rc) {
+               /* Set the new target id */
+               req->target_id = cpu_to_le16(vf->fw_fid);
+               req->encap_resp_target_id = cpu_to_le16(vf->fw_fid);
+               req->encap_resp_len = cpu_to_le16(msg_size);
+               req->encap_resp_addr = encap_resp_addr;
+               req->encap_resp_cmpl_ring = encap_resp_cpr;
+               memcpy(req->encap_resp, encap_resp, msg_size);
+
+               rc = hwrm_req_send(bp, req);
+       }
        if (rc)
                netdev_err(bp->dev, "hwrm_fwd_resp failed. rc:%d\n", rc);
        return rc;
@@ -918,19 +949,21 @@ static int bnxt_hwrm_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
 static int bnxt_hwrm_fwd_err_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
                                  u32 msg_size)
 {
-       int rc = 0;
-       struct hwrm_reject_fwd_resp_input req = {0};
+       struct hwrm_reject_fwd_resp_input *req;
+       int rc;
 
        if (BNXT_REJ_FWD_RESP_SIZE_ERR(msg_size))
                return -EINVAL;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_REJECT_FWD_RESP, -1, -1);
-       /* Set the new target id */
-       req.target_id = cpu_to_le16(vf->fw_fid);
-       req.encap_resp_target_id = cpu_to_le16(vf->fw_fid);
-       memcpy(req.encap_request, vf->hwrm_cmd_req_addr, msg_size);
+       rc = hwrm_req_init(bp, req, HWRM_REJECT_FWD_RESP);
+       if (!rc) {
+               /* Set the new target id */
+               req->target_id = cpu_to_le16(vf->fw_fid);
+               req->encap_resp_target_id = cpu_to_le16(vf->fw_fid);
+               memcpy(req->encap_request, vf->hwrm_cmd_req_addr, msg_size);
 
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send(bp, req);
+       }
        if (rc)
                netdev_err(bp->dev, "hwrm_fwd_err_resp failed. rc:%d\n", rc);
        return rc;
@@ -939,19 +972,21 @@ static int bnxt_hwrm_fwd_err_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
 static int bnxt_hwrm_exec_fwd_resp(struct bnxt *bp, struct bnxt_vf_info *vf,
                                   u32 msg_size)
 {
-       int rc = 0;
-       struct hwrm_exec_fwd_resp_input req = {0};
+       struct hwrm_exec_fwd_resp_input *req;
+       int rc;
 
        if (BNXT_EXEC_FWD_RESP_SIZE_ERR(msg_size))
                return -EINVAL;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_EXEC_FWD_RESP, -1, -1);
-       /* Set the new target id */
-       req.target_id = cpu_to_le16(vf->fw_fid);
-       req.encap_resp_target_id = cpu_to_le16(vf->fw_fid);
-       memcpy(req.encap_request, vf->hwrm_cmd_req_addr, msg_size);
+       rc = hwrm_req_init(bp, req, HWRM_EXEC_FWD_RESP);
+       if (!rc) {
+               /* Set the new target id */
+               req->target_id = cpu_to_le16(vf->fw_fid);
+               req->encap_resp_target_id = cpu_to_le16(vf->fw_fid);
+               memcpy(req->encap_request, vf->hwrm_cmd_req_addr, msg_size);
 
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send(bp, req);
+       }
        if (rc)
                netdev_err(bp->dev, "hwrm_exec_fw_resp failed. rc:%d\n", rc);
        return rc;
@@ -1031,10 +1066,10 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf)
 
                phy_qcfg_req =
                (struct hwrm_port_phy_qcfg_input *)vf->hwrm_cmd_req_addr;
-               mutex_lock(&bp->hwrm_cmd_lock);
+               mutex_lock(&bp->link_lock);
                memcpy(&phy_qcfg_resp, &bp->link_info.phy_qcfg_resp,
                       sizeof(phy_qcfg_resp));
-               mutex_unlock(&bp->hwrm_cmd_lock);
+               mutex_unlock(&bp->link_lock);
                phy_qcfg_resp.resp_len = cpu_to_le16(sizeof(phy_qcfg_resp));
                phy_qcfg_resp.seq_id = phy_qcfg_req->seq_id;
                phy_qcfg_resp.valid = 1;
@@ -1118,7 +1153,7 @@ void bnxt_hwrm_exec_fwd_req(struct bnxt *bp)
 
 int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
 {
-       struct hwrm_func_vf_cfg_input req = {0};
+       struct hwrm_func_vf_cfg_input *req;
        int rc = 0;
 
        if (!BNXT_VF(bp))
@@ -1129,10 +1164,16 @@ int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
                        rc = -EADDRNOTAVAIL;
                goto mac_done;
        }
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
-       req.enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
-       memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_VF_CFG);
+       if (rc)
+               goto mac_done;
+
+       req->enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
+       memcpy(req->dflt_mac_addr, mac, ETH_ALEN);
+       if (!strict)
+               hwrm_req_flags(bp, req, BNXT_HWRM_CTX_SILENT);
+       rc = hwrm_req_send(bp, req);
 mac_done:
        if (rc && strict) {
                rc = -EADDRNOTAVAIL;
@@ -1145,15 +1186,17 @@ mac_done:
 
 void bnxt_update_vf_mac(struct bnxt *bp)
 {
-       struct hwrm_func_qcaps_input req = {0};
-       struct hwrm_func_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_func_qcaps_output *resp;
+       struct hwrm_func_qcaps_input *req;
        bool inform_pf = false;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCAPS, -1, -1);
-       req.fid = cpu_to_le16(0xffff);
+       if (hwrm_req_init(bp, req, HWRM_FUNC_QCAPS))
+               return;
+
+       req->fid = cpu_to_le16(0xffff);
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       if (_hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT))
+       resp = hwrm_req_hold(bp, req);
+       if (hwrm_req_send(bp, req))
                goto update_vf_mac_exit;
 
        /* Store MAC address from the firmware.  There are 2 cases:
@@ -1176,7 +1219,7 @@ void bnxt_update_vf_mac(struct bnxt *bp)
        if (is_valid_ether_addr(bp->vf.mac_addr))
                memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
 update_vf_mac_exit:
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        if (inform_pf)
                bnxt_approve_mac(bp, bp->dev->dev_addr, false);
 }
index 5e4429b..46fae1a 100644 (file)
@@ -22,6 +22,7 @@
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
+#include "bnxt_hwrm.h"
 #include "bnxt_sriov.h"
 #include "bnxt_tc.h"
 #include "bnxt_vfr.h"
@@ -502,16 +503,18 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
 static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
                                   struct bnxt_tc_flow_node *flow_node)
 {
-       struct hwrm_cfa_flow_free_input req = { 0 };
+       struct hwrm_cfa_flow_free_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_FREE, -1, -1);
-       if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
-               req.ext_flow_handle = flow_node->ext_flow_handle;
-       else
-               req.flow_handle = flow_node->flow_handle;
+       rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_FREE);
+       if (!rc) {
+               if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
+                       req->ext_flow_handle = flow_node->ext_flow_handle;
+               else
+                       req->flow_handle = flow_node->flow_handle;
 
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+               rc = hwrm_req_send(bp, req);
+       }
        if (rc)
                netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
 
@@ -587,20 +590,22 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
        struct bnxt_tc_actions *actions = &flow->actions;
        struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
        struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
-       struct hwrm_cfa_flow_alloc_input req = { 0 };
        struct hwrm_cfa_flow_alloc_output *resp;
+       struct hwrm_cfa_flow_alloc_input *req;
        u16 flow_flags = 0, action_flags = 0;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_ALLOC, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_ALLOC);
+       if (rc)
+               return rc;
 
-       req.src_fid = cpu_to_le16(flow->src_fid);
-       req.ref_flow_handle = ref_flow_handle;
+       req->src_fid = cpu_to_le16(flow->src_fid);
+       req->ref_flow_handle = ref_flow_handle;
 
        if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
-               memcpy(req.l2_rewrite_dmac, actions->l2_rewrite_dmac,
+               memcpy(req->l2_rewrite_dmac, actions->l2_rewrite_dmac,
                       ETH_ALEN);
-               memcpy(req.l2_rewrite_smac, actions->l2_rewrite_smac,
+               memcpy(req->l2_rewrite_smac, actions->l2_rewrite_smac,
                       ETH_ALEN);
                action_flags |=
                        CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
@@ -615,71 +620,71 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
                                action_flags |=
                                        CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
                                /* L3 source rewrite */
-                               req.nat_ip_address[0] =
+                               req->nat_ip_address[0] =
                                        actions->nat.l3.ipv4.saddr.s_addr;
                                /* L4 source port */
                                if (actions->nat.l4.ports.sport)
-                                       req.nat_port =
+                                       req->nat_port =
                                                actions->nat.l4.ports.sport;
                        } else {
                                action_flags |=
                                        CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
                                /* L3 destination rewrite */
-                               req.nat_ip_address[0] =
+                               req->nat_ip_address[0] =
                                        actions->nat.l3.ipv4.daddr.s_addr;
                                /* L4 destination port */
                                if (actions->nat.l4.ports.dport)
-                                       req.nat_port =
+                                       req->nat_port =
                                                actions->nat.l4.ports.dport;
                        }
                        netdev_dbg(bp->dev,
-                                  "req.nat_ip_address: %pI4 src_xlate: %d req.nat_port: %x\n",
-                                  req.nat_ip_address, actions->nat.src_xlate,
-                                  req.nat_port);
+                                  "req->nat_ip_address: %pI4 src_xlate: %d req->nat_port: %x\n",
+                                  req->nat_ip_address, actions->nat.src_xlate,
+                                  req->nat_port);
                } else {
                        if (actions->nat.src_xlate) {
                                action_flags |=
                                        CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
                                /* L3 source rewrite */
-                               memcpy(req.nat_ip_address,
+                               memcpy(req->nat_ip_address,
                                       actions->nat.l3.ipv6.saddr.s6_addr32,
-                                      sizeof(req.nat_ip_address));
+                                      sizeof(req->nat_ip_address));
                                /* L4 source port */
                                if (actions->nat.l4.ports.sport)
-                                       req.nat_port =
+                                       req->nat_port =
                                                actions->nat.l4.ports.sport;
                        } else {
                                action_flags |=
                                        CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
                                /* L3 destination rewrite */
-                               memcpy(req.nat_ip_address,
+                               memcpy(req->nat_ip_address,
                                       actions->nat.l3.ipv6.daddr.s6_addr32,
-                                      sizeof(req.nat_ip_address));
+                                      sizeof(req->nat_ip_address));
                                /* L4 destination port */
                                if (actions->nat.l4.ports.dport)
-                                       req.nat_port =
+                                       req->nat_port =
                                                actions->nat.l4.ports.dport;
                        }
                        netdev_dbg(bp->dev,
-                                  "req.nat_ip_address: %pI6 src_xlate: %d req.nat_port: %x\n",
-                                  req.nat_ip_address, actions->nat.src_xlate,
-                                  req.nat_port);
+                                  "req->nat_ip_address: %pI6 src_xlate: %d req->nat_port: %x\n",
+                                  req->nat_ip_address, actions->nat.src_xlate,
+                                  req->nat_port);
                }
        }
 
        if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
            actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
-               req.tunnel_handle = tunnel_handle;
+               req->tunnel_handle = tunnel_handle;
                flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL;
                action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL;
        }
 
-       req.ethertype = flow->l2_key.ether_type;
-       req.ip_proto = flow->l4_key.ip_proto;
+       req->ethertype = flow->l2_key.ether_type;
+       req->ip_proto = flow->l4_key.ip_proto;
 
        if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
-               memcpy(req.dmac, flow->l2_key.dmac, ETH_ALEN);
-               memcpy(req.smac, flow->l2_key.smac, ETH_ALEN);
+               memcpy(req->dmac, flow->l2_key.dmac, ETH_ALEN);
+               memcpy(req->smac, flow->l2_key.smac, ETH_ALEN);
        }
 
        if (flow->l2_key.num_vlans > 0) {
@@ -688,7 +693,7 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
                 * in outer_vlan_tci when num_vlans is 1 (which is
                 * always the case in TC.)
                 */
-               req.outer_vlan_tci = flow->l2_key.inner_vlan_tci;
+               req->outer_vlan_tci = flow->l2_key.inner_vlan_tci;
        }
 
        /* If all IP and L4 fields are wildcarded then this is an L2 flow */
@@ -701,68 +706,67 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
                                CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
 
                if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
-                       req.ip_dst[0] = l3_key->ipv4.daddr.s_addr;
-                       req.ip_dst_mask_len =
+                       req->ip_dst[0] = l3_key->ipv4.daddr.s_addr;
+                       req->ip_dst_mask_len =
                                inet_mask_len(l3_mask->ipv4.daddr.s_addr);
-                       req.ip_src[0] = l3_key->ipv4.saddr.s_addr;
-                       req.ip_src_mask_len =
+                       req->ip_src[0] = l3_key->ipv4.saddr.s_addr;
+                       req->ip_src_mask_len =
                                inet_mask_len(l3_mask->ipv4.saddr.s_addr);
                } else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
-                       memcpy(req.ip_dst, l3_key->ipv6.daddr.s6_addr32,
-                              sizeof(req.ip_dst));
-                       req.ip_dst_mask_len =
+                       memcpy(req->ip_dst, l3_key->ipv6.daddr.s6_addr32,
+                              sizeof(req->ip_dst));
+                       req->ip_dst_mask_len =
                                        ipv6_mask_len(&l3_mask->ipv6.daddr);
-                       memcpy(req.ip_src, l3_key->ipv6.saddr.s6_addr32,
-                              sizeof(req.ip_src));
-                       req.ip_src_mask_len =
+                       memcpy(req->ip_src, l3_key->ipv6.saddr.s6_addr32,
+                              sizeof(req->ip_src));
+                       req->ip_src_mask_len =
                                        ipv6_mask_len(&l3_mask->ipv6.saddr);
                }
        }
 
        if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
-               req.l4_src_port = flow->l4_key.ports.sport;
-               req.l4_src_port_mask = flow->l4_mask.ports.sport;
-               req.l4_dst_port = flow->l4_key.ports.dport;
-               req.l4_dst_port_mask = flow->l4_mask.ports.dport;
+               req->l4_src_port = flow->l4_key.ports.sport;
+               req->l4_src_port_mask = flow->l4_mask.ports.sport;
+               req->l4_dst_port = flow->l4_key.ports.dport;
+               req->l4_dst_port_mask = flow->l4_mask.ports.dport;
        } else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
                /* l4 ports serve as type/code when ip_proto is ICMP */
-               req.l4_src_port = htons(flow->l4_key.icmp.type);
-               req.l4_src_port_mask = htons(flow->l4_mask.icmp.type);
-               req.l4_dst_port = htons(flow->l4_key.icmp.code);
-               req.l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
+               req->l4_src_port = htons(flow->l4_key.icmp.type);
+               req->l4_src_port_mask = htons(flow->l4_mask.icmp.type);
+               req->l4_dst_port = htons(flow->l4_key.icmp.code);
+               req->l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
        }
-       req.flags = cpu_to_le16(flow_flags);
+       req->flags = cpu_to_le16(flow_flags);
 
        if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
                action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
        } else {
                if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
                        action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
-                       req.dst_fid = cpu_to_le16(actions->dst_fid);
+                       req->dst_fid = cpu_to_le16(actions->dst_fid);
                }
                if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
                        action_flags |=
                            CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
-                       req.l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
-                       req.l2_rewrite_vlan_tci = actions->push_vlan_tci;
-                       memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
-                       memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
+                       req->l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
+                       req->l2_rewrite_vlan_tci = actions->push_vlan_tci;
+                       memcpy(&req->l2_rewrite_dmac, &req->dmac, ETH_ALEN);
+                       memcpy(&req->l2_rewrite_smac, &req->smac, ETH_ALEN);
                }
                if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
                        action_flags |=
                            CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
                        /* Rewrite config with tpid = 0 implies vlan pop */
-                       req.l2_rewrite_vlan_tpid = 0;
-                       memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
-                       memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
+                       req->l2_rewrite_vlan_tpid = 0;
+                       memcpy(&req->l2_rewrite_dmac, &req->dmac, ETH_ALEN);
+                       memcpy(&req->l2_rewrite_smac, &req->smac, ETH_ALEN);
                }
        }
-       req.action_flags = cpu_to_le16(action_flags);
+       req->action_flags = cpu_to_le16(action_flags);
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send_silent(bp, req);
        if (!rc) {
-               resp = bnxt_get_hwrm_resp_addr(bp, &req);
                /* CFA_FLOW_ALLOC response interpretation:
                 *                  fw with          fw with
                 *                  16-bit           64-bit
@@ -778,7 +782,7 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
                        flow_node->flow_id = resp->flow_id;
                }
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
@@ -788,67 +792,69 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
                                       __le32 ref_decap_handle,
                                       __le32 *decap_filter_handle)
 {
-       struct hwrm_cfa_decap_filter_alloc_input req = { 0 };
        struct hwrm_cfa_decap_filter_alloc_output *resp;
        struct ip_tunnel_key *tun_key = &flow->tun_key;
+       struct hwrm_cfa_decap_filter_alloc_input *req;
        u32 enables = 0;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_ALLOC, -1, -1);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_DECAP_FILTER_ALLOC);
+       if (rc)
+               goto exit;
 
-       req.flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
+       req->flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
        enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE |
                   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL;
-       req.tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
-       req.ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
+       req->tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
+       req->ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
 
        if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) {
                enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID;
                /* tunnel_id is wrongly defined in hsi defn. as __le32 */
-               req.tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
+               req->tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
        }
 
        if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
                enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
-               ether_addr_copy(req.dst_macaddr, l2_info->dmac);
+               ether_addr_copy(req->dst_macaddr, l2_info->dmac);
        }
        if (l2_info->num_vlans) {
                enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
-               req.t_ivlan_vid = l2_info->inner_vlan_tci;
+               req->t_ivlan_vid = l2_info->inner_vlan_tci;
        }
 
        enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE;
-       req.ethertype = htons(ETH_P_IP);
+       req->ethertype = htons(ETH_P_IP);
 
        if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) {
                enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR |
                           CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR |
                           CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE;
-               req.ip_addr_type = CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
-               req.dst_ipaddr[0] = tun_key->u.ipv4.dst;
-               req.src_ipaddr[0] = tun_key->u.ipv4.src;
+               req->ip_addr_type =
+                       CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
+               req->dst_ipaddr[0] = tun_key->u.ipv4.dst;
+               req->src_ipaddr[0] = tun_key->u.ipv4.src;
        }
 
        if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) {
                enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT;
-               req.dst_port = tun_key->tp_dst;
+               req->dst_port = tun_key->tp_dst;
        }
 
        /* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc
         * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16.
         */
-       req.l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
-       req.enables = cpu_to_le32(enables);
+       req->l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
+       req->enables = cpu_to_le32(enables);
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-       if (!rc) {
-               resp = bnxt_get_hwrm_resp_addr(bp, &req);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send_silent(bp, req);
+       if (!rc)
                *decap_filter_handle = resp->decap_filter_id;
-       } else {
+       hwrm_req_drop(bp, req);
+exit:
+       if (rc)
                netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
-       }
-       mutex_unlock(&bp->hwrm_cmd_lock);
 
        return rc;
 }
@@ -856,13 +862,14 @@ static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
 static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
                                      __le32 decap_filter_handle)
 {
-       struct hwrm_cfa_decap_filter_free_input req = { 0 };
+       struct hwrm_cfa_decap_filter_free_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_FREE, -1, -1);
-       req.decap_filter_id = decap_filter_handle;
-
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_DECAP_FILTER_FREE);
+       if (!rc) {
+               req->decap_filter_id = decap_filter_handle;
+               rc = hwrm_req_send(bp, req);
+       }
        if (rc)
                netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
 
@@ -874,18 +881,18 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
                                       struct bnxt_tc_l2_key *l2_info,
                                       __le32 *encap_record_handle)
 {
-       struct hwrm_cfa_encap_record_alloc_input req = { 0 };
        struct hwrm_cfa_encap_record_alloc_output *resp;
-       struct hwrm_cfa_encap_data_vxlan *encap =
-                       (struct hwrm_cfa_encap_data_vxlan *)&req.encap_data;
-       struct hwrm_vxlan_ipv4_hdr *encap_ipv4 =
-                               (struct hwrm_vxlan_ipv4_hdr *)encap->l3;
+       struct hwrm_cfa_encap_record_alloc_input *req;
+       struct hwrm_cfa_encap_data_vxlan *encap;
+       struct hwrm_vxlan_ipv4_hdr *encap_ipv4;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_ALLOC, -1, -1);
-
-       req.encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
+       rc = hwrm_req_init(bp, req, HWRM_CFA_ENCAP_RECORD_ALLOC);
+       if (rc)
+               goto exit;
 
+       encap = (struct hwrm_cfa_encap_data_vxlan *)&req->encap_data;
+       req->encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
        ether_addr_copy(encap->dst_mac_addr, l2_info->dmac);
        ether_addr_copy(encap->src_mac_addr, l2_info->smac);
        if (l2_info->num_vlans) {
@@ -894,6 +901,7 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
                encap->ovlan_tpid = l2_info->inner_vlan_tpid;
        }
 
+       encap_ipv4 = (struct hwrm_vxlan_ipv4_hdr *)encap->l3;
        encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT;
        encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT;
        encap_ipv4->ttl = encap_key->ttl;
@@ -905,15 +913,14 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
        encap->dst_port = encap_key->tp_dst;
        encap->vni = tunnel_id_to_key32(encap_key->tun_id);
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-       if (!rc) {
-               resp = bnxt_get_hwrm_resp_addr(bp, &req);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send_silent(bp, req);
+       if (!rc)
                *encap_record_handle = resp->encap_record_id;
-       } else {
+       hwrm_req_drop(bp, req);
+exit:
+       if (rc)
                netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
-       }
-       mutex_unlock(&bp->hwrm_cmd_lock);
 
        return rc;
 }
@@ -921,13 +928,14 @@ static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
 static int hwrm_cfa_encap_record_free(struct bnxt *bp,
                                      __le32 encap_record_handle)
 {
-       struct hwrm_cfa_encap_record_free_input req = { 0 };
+       struct hwrm_cfa_encap_record_free_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_FREE, -1, -1);
-       req.encap_record_id = encap_record_handle;
-
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_ENCAP_RECORD_FREE);
+       if (!rc) {
+               req->encap_record_id = encap_record_handle;
+               rc = hwrm_req_send(bp, req);
+       }
        if (rc)
                netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
 
@@ -1673,14 +1681,20 @@ static int
 bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
                             struct bnxt_tc_stats_batch stats_batch[])
 {
-       struct hwrm_cfa_flow_stats_input req = { 0 };
        struct hwrm_cfa_flow_stats_output *resp;
-       __le16 *req_flow_handles = &req.flow_handle_0;
-       __le32 *req_flow_ids = &req.flow_id_0;
+       struct hwrm_cfa_flow_stats_input *req;
+       __le16 *req_flow_handles;
+       __le32 *req_flow_ids;
        int rc, i;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
-       req.num_flows = cpu_to_le16(num_flows);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_STATS);
+       if (rc)
+               goto exit;
+
+       req_flow_handles = &req->flow_handle_0;
+       req_flow_ids = &req->flow_id_0;
+
+       req->num_flows = cpu_to_le16(num_flows);
        for (i = 0; i < num_flows; i++) {
                struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
 
@@ -1688,13 +1702,12 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
                                        &req_flow_handles[i], &req_flow_ids[i]);
        }
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc) {
                __le64 *resp_packets;
                __le64 *resp_bytes;
 
-               resp = bnxt_get_hwrm_resp_addr(bp, &req);
                resp_packets = &resp->packet_0;
                resp_bytes = &resp->byte_0;
 
@@ -1704,10 +1717,11 @@ bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
                        stats_batch[i].hw_stats.bytes =
                                                le64_to_cpu(resp_bytes[i]);
                }
-       } else {
-               netdev_info(bp->dev, "error rc=%d\n", rc);
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
+exit:
+       if (rc)
+               netdev_info(bp->dev, "error rc=%d\n", rc);
 
        return rc;
 }
index 187ff64..fde0c3e 100644 (file)
@@ -22,6 +22,7 @@
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
+#include "bnxt_hwrm.h"
 #include "bnxt_ulp.h"
 
 static int bnxt_register_dev(struct bnxt_en_dev *edev, int ulp_id,
@@ -237,27 +238,33 @@ static int bnxt_send_msg(struct bnxt_en_dev *edev, int ulp_id,
 {
        struct net_device *dev = edev->net;
        struct bnxt *bp = netdev_priv(dev);
+       struct output *resp;
        struct input *req;
+       u32 resp_len;
        int rc;
 
        if (ulp_id != BNXT_ROCE_ULP && bp->fw_reset_state)
                return -EBUSY;
 
-       mutex_lock(&bp->hwrm_cmd_lock);
-       req = fw_msg->msg;
-       req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);
-       rc = _hwrm_send_message(bp, fw_msg->msg, fw_msg->msg_len,
-                               fw_msg->timeout);
-       if (!rc) {
-               struct output *resp = bp->hwrm_cmd_resp_addr;
-               u32 len = le16_to_cpu(resp->resp_len);
+       rc = hwrm_req_init(bp, req, 0 /* don't care */);
+       if (rc)
+               return rc;
 
-               if (fw_msg->resp_max_len < len)
-                       len = fw_msg->resp_max_len;
+       rc = hwrm_req_replace(bp, req, fw_msg->msg, fw_msg->msg_len);
+       if (rc)
+               return rc;
 
-               memcpy(fw_msg->resp, resp, len);
+       hwrm_req_timeout(bp, req, fw_msg->timeout);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
+       resp_len = le16_to_cpu(resp->resp_len);
+       if (resp_len) {
+               if (fw_msg->resp_max_len < resp_len)
+                       resp_len = fw_msg->resp_max_len;
+
+               memcpy(fw_msg->resp, resp, resp_len);
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
index dd66302..9401936 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
+#include "bnxt_hwrm.h"
 #include "bnxt_vfr.h"
 #include "bnxt_devlink.h"
 #include "bnxt_tc.h"
 static int hwrm_cfa_vfr_alloc(struct bnxt *bp, u16 vf_idx,
                              u16 *tx_cfa_action, u16 *rx_cfa_code)
 {
-       struct hwrm_cfa_vfr_alloc_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_cfa_vfr_alloc_input req = { 0 };
+       struct hwrm_cfa_vfr_alloc_output *resp;
+       struct hwrm_cfa_vfr_alloc_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_ALLOC, -1, -1);
-       req.vf_id = cpu_to_le16(vf_idx);
-       sprintf(req.vfr_name, "vfr%d", vf_idx);
-
-       mutex_lock(&bp->hwrm_cmd_lock);
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_VFR_ALLOC);
        if (!rc) {
-               *tx_cfa_action = le16_to_cpu(resp->tx_cfa_action);
-               *rx_cfa_code = le16_to_cpu(resp->rx_cfa_code);
-               netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x",
-                          *tx_cfa_action, *rx_cfa_code);
-       } else {
-               netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc);
+               req->vf_id = cpu_to_le16(vf_idx);
+               sprintf(req->vfr_name, "vfr%d", vf_idx);
+
+               resp = hwrm_req_hold(bp, req);
+               rc = hwrm_req_send(bp, req);
+               if (!rc) {
+                       *tx_cfa_action = le16_to_cpu(resp->tx_cfa_action);
+                       *rx_cfa_code = le16_to_cpu(resp->rx_cfa_code);
+                       netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x",
+                                  *tx_cfa_action, *rx_cfa_code);
+               }
+               hwrm_req_drop(bp, req);
        }
-
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       if (rc)
+               netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc);
        return rc;
 }
 
 static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx)
 {
-       struct hwrm_cfa_vfr_free_input req = { 0 };
+       struct hwrm_cfa_vfr_free_input *req;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_FREE, -1, -1);
-       sprintf(req.vfr_name, "vfr%d", vf_idx);
-
-       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       rc = hwrm_req_init(bp, req, HWRM_CFA_VFR_FREE);
+       if (!rc) {
+               sprintf(req->vfr_name, "vfr%d", vf_idx);
+               rc = hwrm_req_send(bp, req);
+       }
        if (rc)
                netdev_info(bp->dev, "%s error rc=%d\n", __func__, rc);
        return rc;
@@ -67,17 +70,18 @@ static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx)
 static int bnxt_hwrm_vfr_qcfg(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
                              u16 *max_mtu)
 {
-       struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
-       struct hwrm_func_qcfg_input req = {0};
+       struct hwrm_func_qcfg_output *resp;
+       struct hwrm_func_qcfg_input *req;
        u16 mtu;
        int rc;
 
-       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
-       req.fid = cpu_to_le16(bp->pf.vf[vf_rep->vf_idx].fw_fid);
-
-       mutex_lock(&bp->hwrm_cmd_lock);
+       rc = hwrm_req_init(bp, req, HWRM_FUNC_QCFG);
+       if (rc)
+               return rc;
 
-       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       req->fid = cpu_to_le16(bp->pf.vf[vf_rep->vf_idx].fw_fid);
+       resp = hwrm_req_hold(bp, req);
+       rc = hwrm_req_send(bp, req);
        if (!rc) {
                mtu = le16_to_cpu(resp->max_mtu_configured);
                if (!mtu)
@@ -85,7 +89,7 @@ static int bnxt_hwrm_vfr_qcfg(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
                else
                        *max_mtu = mtu;
        }
-       mutex_unlock(&bp->hwrm_cmd_lock);
+       hwrm_req_drop(bp, req);
        return rc;
 }
 
index bee6e09..c8083df 100644 (file)
@@ -87,7 +87,7 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
                        dma_unmap_single(&pdev->dev,
                                         dma_unmap_addr(tx_buf, mapping),
                                         dma_unmap_len(tx_buf, len),
-                                        PCI_DMA_TODEVICE);
+                                        DMA_TO_DEVICE);
                        xdp_return_frame(tx_buf->xdpf);
                        tx_buf->action = 0;
                        tx_buf->xdpf = NULL;
index db74241..23c7595 100644 (file)
@@ -828,7 +828,9 @@ static void bcmgenet_set_msglevel(struct net_device *dev, u32 level)
 }
 
 static int bcmgenet_get_coalesce(struct net_device *dev,
-                                struct ethtool_coalesce *ec)
+                                struct ethtool_coalesce *ec,
+                                struct kernel_ethtool_coalesce *kernel_coal,
+                                struct netlink_ext_ack *extack)
 {
        struct bcmgenet_priv *priv = netdev_priv(dev);
        struct bcmgenet_rx_ring *ring;
@@ -890,7 +892,9 @@ static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring,
 }
 
 static int bcmgenet_set_coalesce(struct net_device *dev,
-                                struct ethtool_coalesce *ec)
+                                struct ethtool_coalesce *ec,
+                                struct kernel_ethtool_coalesce *kernel_coal,
+                                struct netlink_ext_ack *extack)
 {
        struct bcmgenet_priv *priv = netdev_priv(dev);
        unsigned int i;
@@ -3659,7 +3663,7 @@ static const struct net_device_ops bcmgenet_netdev_ops = {
        .ndo_tx_timeout         = bcmgenet_timeout,
        .ndo_set_rx_mode        = bcmgenet_set_rx_mode,
        .ndo_set_mac_address    = bcmgenet_set_mac_addr,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_set_features       = bcmgenet_set_features,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = bcmgenet_poll_controller,
@@ -3972,8 +3976,6 @@ static int bcmgenet_probe(struct platform_device *pdev)
         */
        dev->needed_headroom += 64;
 
-       netdev_boot_setup_check(dev);
-
        priv->dev = dev;
        priv->pdev = pdev;
 
index 5b4568c..f38f40e 100644 (file)
@@ -2136,7 +2136,7 @@ static const struct net_device_ops sbmac_netdev_ops = {
        .ndo_start_xmit         = sbmac_start_tx,
        .ndo_set_rx_mode        = sbmac_set_rx_mode,
        .ndo_tx_timeout         = sbmac_tx_timeout,
-       .ndo_do_ioctl           = sbmac_mii_ioctl,
+       .ndo_eth_ioctl          = sbmac_mii_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index b0e4964..8a238e3 100644 (file)
@@ -6564,10 +6564,8 @@ static void tg3_tx(struct tg3_napi *tnapi)
                        skb_tstamp_tx(skb, &timestamp);
                }
 
-               pci_unmap_single(tp->pdev,
-                                dma_unmap_addr(ri, mapping),
-                                skb_headlen(skb),
-                                PCI_DMA_TODEVICE);
+               dma_unmap_single(&tp->pdev->dev, dma_unmap_addr(ri, mapping),
+                                skb_headlen(skb), DMA_TO_DEVICE);
 
                ri->skb = NULL;
 
@@ -6584,10 +6582,10 @@ static void tg3_tx(struct tg3_napi *tnapi)
                        if (unlikely(ri->skb != NULL || sw_idx == hw_idx))
                                tx_bug = 1;
 
-                       pci_unmap_page(tp->pdev,
+                       dma_unmap_page(&tp->pdev->dev,
                                       dma_unmap_addr(ri, mapping),
                                       skb_frag_size(&skb_shinfo(skb)->frags[i]),
-                                      PCI_DMA_TODEVICE);
+                                      DMA_TO_DEVICE);
 
                        while (ri->fragmented) {
                                ri->fragmented = false;
@@ -6646,8 +6644,8 @@ static void tg3_rx_data_free(struct tg3 *tp, struct ring_info *ri, u32 map_sz)
        if (!ri->data)
                return;
 
-       pci_unmap_single(tp->pdev, dma_unmap_addr(ri, mapping),
-                        map_sz, PCI_DMA_FROMDEVICE);
+       dma_unmap_single(&tp->pdev->dev, dma_unmap_addr(ri, mapping), map_sz,
+                        DMA_FROM_DEVICE);
        tg3_frag_free(skb_size <= PAGE_SIZE, ri->data);
        ri->data = NULL;
 }
@@ -6711,11 +6709,9 @@ static int tg3_alloc_rx_data(struct tg3 *tp, struct tg3_rx_prodring_set *tpr,
        if (!data)
                return -ENOMEM;
 
-       mapping = pci_map_single(tp->pdev,
-                                data + TG3_RX_OFFSET(tp),
-                                data_size,
-                                PCI_DMA_FROMDEVICE);
-       if (unlikely(pci_dma_mapping_error(tp->pdev, mapping))) {
+       mapping = dma_map_single(&tp->pdev->dev, data + TG3_RX_OFFSET(tp),
+                                data_size, DMA_FROM_DEVICE);
+       if (unlikely(dma_mapping_error(&tp->pdev->dev, mapping))) {
                tg3_frag_free(skb_size <= PAGE_SIZE, data);
                return -EIO;
        }
@@ -6882,8 +6878,8 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
                        if (skb_size < 0)
                                goto drop_it;
 
-                       pci_unmap_single(tp->pdev, dma_addr, skb_size,
-                                        PCI_DMA_FROMDEVICE);
+                       dma_unmap_single(&tp->pdev->dev, dma_addr, skb_size,
+                                        DMA_FROM_DEVICE);
 
                        /* Ensure that the update to the data happens
                         * after the usage of the old DMA mapping.
@@ -6908,11 +6904,13 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
                                goto drop_it_no_recycle;
 
                        skb_reserve(skb, TG3_RAW_IP_ALIGN);
-                       pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
+                       dma_sync_single_for_cpu(&tp->pdev->dev, dma_addr, len,
+                                               DMA_FROM_DEVICE);
                        memcpy(skb->data,
                               data + TG3_RX_OFFSET(tp),
                               len);
-                       pci_dma_sync_single_for_device(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
+                       dma_sync_single_for_device(&tp->pdev->dev, dma_addr,
+                                                  len, DMA_FROM_DEVICE);
                }
 
                skb_put(skb, len);
@@ -7762,10 +7760,8 @@ static void tg3_tx_skb_unmap(struct tg3_napi *tnapi, u32 entry, int last)
        skb = txb->skb;
        txb->skb = NULL;
 
-       pci_unmap_single(tnapi->tp->pdev,
-                        dma_unmap_addr(txb, mapping),
-                        skb_headlen(skb),
-                        PCI_DMA_TODEVICE);
+       dma_unmap_single(&tnapi->tp->pdev->dev, dma_unmap_addr(txb, mapping),
+                        skb_headlen(skb), DMA_TO_DEVICE);
 
        while (txb->fragmented) {
                txb->fragmented = false;
@@ -7779,9 +7775,9 @@ static void tg3_tx_skb_unmap(struct tg3_napi *tnapi, u32 entry, int last)
                entry = NEXT_TX(entry);
                txb = &tnapi->tx_buffers[entry];
 
-               pci_unmap_page(tnapi->tp->pdev,
+               dma_unmap_page(&tnapi->tp->pdev->dev,
                               dma_unmap_addr(txb, mapping),
-                              skb_frag_size(frag), PCI_DMA_TODEVICE);
+                              skb_frag_size(frag), DMA_TO_DEVICE);
 
                while (txb->fragmented) {
                        txb->fragmented = false;
@@ -7816,10 +7812,10 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi,
                ret = -1;
        } else {
                /* New SKB is guaranteed to be linear. */
-               new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len,
-                                         PCI_DMA_TODEVICE);
+               new_addr = dma_map_single(&tp->pdev->dev, new_skb->data,
+                                         new_skb->len, DMA_TO_DEVICE);
                /* Make sure the mapping succeeded */
-               if (pci_dma_mapping_error(tp->pdev, new_addr)) {
+               if (dma_mapping_error(&tp->pdev->dev, new_addr)) {
                        dev_kfree_skb_any(new_skb);
                        ret = -1;
                } else {
@@ -8043,8 +8039,9 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
        len = skb_headlen(skb);
 
-       mapping = pci_map_single(tp->pdev, skb->data, len, PCI_DMA_TODEVICE);
-       if (pci_dma_mapping_error(tp->pdev, mapping))
+       mapping = dma_map_single(&tp->pdev->dev, skb->data, len,
+                                DMA_TO_DEVICE);
+       if (dma_mapping_error(&tp->pdev->dev, mapping))
                goto drop;
 
 
@@ -13499,8 +13496,8 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, bool tso_loopback)
        for (i = data_off; i < tx_len; i++)
                tx_data[i] = (u8) (i & 0xff);
 
-       map = pci_map_single(tp->pdev, skb->data, tx_len, PCI_DMA_TODEVICE);
-       if (pci_dma_mapping_error(tp->pdev, map)) {
+       map = dma_map_single(&tp->pdev->dev, skb->data, tx_len, DMA_TO_DEVICE);
+       if (dma_mapping_error(&tp->pdev->dev, map)) {
                dev_kfree_skb(skb);
                return -EIO;
        }
@@ -13598,8 +13595,8 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, bool tso_loopback)
                } else
                        goto out;
 
-               pci_dma_sync_single_for_cpu(tp->pdev, map, rx_len,
-                                           PCI_DMA_FROMDEVICE);
+               dma_sync_single_for_cpu(&tp->pdev->dev, map, rx_len,
+                                       DMA_FROM_DEVICE);
 
                rx_data += TG3_RX_OFFSET(tp);
                for (i = data_off; i < rx_len; i++, val++) {
@@ -14040,7 +14037,10 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        return -EOPNOTSUPP;
 }
 
-static int tg3_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int tg3_get_coalesce(struct net_device *dev,
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct tg3 *tp = netdev_priv(dev);
 
@@ -14048,7 +14048,10 @@ static int tg3_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
        return 0;
 }
 
-static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int tg3_set_coalesce(struct net_device *dev,
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct tg3 *tp = netdev_priv(dev);
        u32 max_rxcoal_tick_int = 0, max_txcoal_tick_int = 0;
@@ -14290,7 +14293,7 @@ static const struct net_device_ops tg3_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = tg3_set_rx_mode,
        .ndo_set_mac_address    = tg3_set_mac_addr,
-       .ndo_do_ioctl           = tg3_ioctl,
+       .ndo_eth_ioctl          = tg3_ioctl,
        .ndo_tx_timeout         = tg3_tx_timeout,
        .ndo_change_mtu         = tg3_change_mtu,
        .ndo_fix_features       = tg3_fix_features,
@@ -17755,11 +17758,11 @@ static int tg3_init_one(struct pci_dev *pdev,
 
        /* Configure DMA attributes. */
        if (dma_mask > DMA_BIT_MASK(32)) {
-               err = pci_set_dma_mask(pdev, dma_mask);
+               err = dma_set_mask(&pdev->dev, dma_mask);
                if (!err) {
                        features |= NETIF_F_HIGHDMA;
-                       err = pci_set_consistent_dma_mask(pdev,
-                                                         persist_dma_mask);
+                       err = dma_set_coherent_mask(&pdev->dev,
+                                                   persist_dma_mask);
                        if (err < 0) {
                                dev_err(&pdev->dev, "Unable to obtain 64 bit "
                                        "DMA for consistent allocations\n");
@@ -17768,7 +17771,7 @@ static int tg3_init_one(struct pci_dev *pdev,
                }
        }
        if (err || dma_mask == DMA_BIT_MASK(32)) {
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
                if (err) {
                        dev_err(&pdev->dev,
                                "No usable DMA configuration, aborting\n");
index 265c2fa..391b85f 100644 (file)
@@ -307,8 +307,10 @@ bnad_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wolinfo)
        wolinfo->wolopts = 0;
 }
 
-static int
-bnad_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce)
+static int bnad_get_coalesce(struct net_device *netdev,
+                            struct ethtool_coalesce *coalesce,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct bnad *bnad = netdev_priv(netdev);
        unsigned long flags;
@@ -328,8 +330,10 @@ bnad_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce)
        return 0;
 }
 
-static int
-bnad_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coalesce)
+static int bnad_set_coalesce(struct net_device *netdev,
+                            struct ethtool_coalesce *coalesce,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct bnad *bnad = netdev_priv(netdev);
        unsigned long flags;
index e432a68..5b2a461 100644 (file)
@@ -22,6 +22,7 @@ if NET_VENDOR_CADENCE
 config MACB
        tristate "Cadence MACB/GEM support"
        depends on HAS_DMA && COMMON_CLK
+       depends on PTP_1588_CLOCK_OPTIONAL
        select PHYLINK
        select CRC32
        help
index 7d2fe13..d13fb1d 100644 (file)
@@ -3664,7 +3664,7 @@ static const struct net_device_ops macb_netdev_ops = {
        .ndo_start_xmit         = macb_start_xmit,
        .ndo_set_rx_mode        = macb_set_rx_mode,
        .ndo_get_stats          = macb_get_stats,
-       .ndo_do_ioctl           = macb_ioctl,
+       .ndo_eth_ioctl          = macb_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_change_mtu         = macb_change_mtu,
        .ndo_set_mac_address    = eth_mac_addr,
@@ -4323,7 +4323,7 @@ static const struct net_device_ops at91ether_netdev_ops = {
        .ndo_get_stats          = macb_get_stats,
        .ndo_set_rx_mode        = macb_set_rx_mode,
        .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_do_ioctl           = macb_ioctl,
+       .ndo_eth_ioctl          = macb_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = at91ether_poll_controller,
@@ -4533,6 +4533,14 @@ static const struct macb_config sama5d2_config = {
        .usrio = &macb_default_usrio,
 };
 
+static const struct macb_config sama5d29_config = {
+       .caps = MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_GEM_HAS_PTP,
+       .dma_burst_length = 16,
+       .clk_init = macb_clk_init,
+       .init = macb_init,
+       .usrio = &macb_default_usrio,
+};
+
 static const struct macb_config sama5d3_config = {
        .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE
              | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII | MACB_CAPS_JUMBO,
@@ -4610,6 +4618,7 @@ static const struct of_device_id macb_dt_ids[] = {
        { .compatible = "cdns,gem", .data = &pc302gem_config },
        { .compatible = "cdns,sam9x60-macb", .data = &at91sam9260_config },
        { .compatible = "atmel,sama5d2-gem", .data = &sama5d2_config },
+       { .compatible = "atmel,sama5d29-gem", .data = &sama5d29_config },
        { .compatible = "atmel,sama5d3-gem", .data = &sama5d3_config },
        { .compatible = "atmel,sama5d3-macb", .data = &sama5d3macb_config },
        { .compatible = "atmel,sama5d4-gem", .data = &sama5d4_config },
index 4875cda..1c76c95 100644 (file)
@@ -66,7 +66,7 @@ config LIQUIDIO
        tristate "Cavium LiquidIO support"
        depends on 64BIT && PCI
        depends on PCI
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK_OPTIONAL
        select FW_LOADER
        select LIBCRC32C
        select NET_DEVLINK
@@ -91,7 +91,7 @@ config OCTEON_MGMT_ETHERNET
 config LIQUIDIO_VF
        tristate "Cavium LiquidIO VF support"
        depends on 64BIT && PCI_MSI
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK_OPTIONAL
        help
          This driver supports Cavium LiquidIO Intelligent Server Adapter
          based on CN23XX chips.
index 66f2c55..2b97478 100644 (file)
@@ -2108,7 +2108,9 @@ static int octnet_set_intrmod_cfg(struct lio *lio,
 }
 
 static int lio_get_intr_coalesce(struct net_device *netdev,
-                                struct ethtool_coalesce *intr_coal)
+                                struct ethtool_coalesce *intr_coal,
+                                struct kernel_ethtool_coalesce *kernel_coal,
+                                struct netlink_ext_ack *extack)
 {
        struct lio *lio = GET_LIO(netdev);
        struct octeon_device *oct = lio->oct_dev;
@@ -2412,7 +2414,9 @@ oct_cfg_tx_intrcnt(struct lio *lio,
 }
 
 static int lio_set_intr_coalesce(struct net_device *netdev,
-                                struct ethtool_coalesce *intr_coal)
+                                struct ethtool_coalesce *intr_coal,
+                                struct kernel_ethtool_coalesce *kernel_coal,
+                                struct netlink_ext_ack *extack)
 {
        struct lio *lio = GET_LIO(netdev);
        int ret;
index 591229b..2907e13 100644 (file)
@@ -1457,7 +1457,7 @@ static void free_netsgbuf(void *buf)
        while (frags--) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                               g->sg[(i >> 2)].ptr[(i & 3)],
                               skb_frag_size(frag), DMA_TO_DEVICE);
                i++;
@@ -1500,7 +1500,7 @@ static void free_netsgbuf_with_resp(void *buf)
        while (frags--) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                               g->sg[(i >> 2)].ptr[(i & 3)],
                               skb_frag_size(frag), DMA_TO_DEVICE);
                i++;
@@ -3223,7 +3223,7 @@ static const struct net_device_ops lionetdevops = {
        .ndo_vlan_rx_add_vid    = liquidio_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = liquidio_vlan_rx_kill_vid,
        .ndo_change_mtu         = liquidio_change_mtu,
-       .ndo_do_ioctl           = liquidio_ioctl,
+       .ndo_eth_ioctl          = liquidio_ioctl,
        .ndo_fix_features       = liquidio_fix_features,
        .ndo_set_features       = liquidio_set_features,
        .ndo_set_vf_mac         = liquidio_set_vf_mac,
@@ -3750,7 +3750,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
        }
 
        devlink = devlink_alloc(&liquidio_devlink_ops,
-                               sizeof(struct lio_devlink_priv));
+                               sizeof(struct lio_devlink_priv),
+                               &octeon_dev->pci_dev->dev);
        if (!devlink) {
                dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n");
                goto setup_nic_dev_free;
@@ -3759,7 +3760,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
        lio_devlink = devlink_priv(devlink);
        lio_devlink->oct = octeon_dev;
 
-       if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) {
+       if (devlink_register(devlink)) {
                devlink_free(devlink);
                dev_err(&octeon_dev->pci_dev->dev,
                        "devlink registration failed\n");
index ffddb31..c6fe0f2 100644 (file)
@@ -843,7 +843,7 @@ static void free_netsgbuf(void *buf)
        while (frags--) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                               g->sg[(i >> 2)].ptr[(i & 3)],
                               skb_frag_size(frag), DMA_TO_DEVICE);
                i++;
@@ -887,7 +887,7 @@ static void free_netsgbuf_with_resp(void *buf)
        while (frags--) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                               g->sg[(i >> 2)].ptr[(i & 3)],
                               skb_frag_size(frag), DMA_TO_DEVICE);
                i++;
@@ -1889,7 +1889,7 @@ static const struct net_device_ops lionetdevops = {
        .ndo_vlan_rx_add_vid    = liquidio_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = liquidio_vlan_rx_kill_vid,
        .ndo_change_mtu         = liquidio_change_mtu,
-       .ndo_do_ioctl           = liquidio_ioctl,
+       .ndo_eth_ioctl          = liquidio_ioctl,
        .ndo_fix_features       = liquidio_fix_features,
        .ndo_set_features       = liquidio_set_features,
 };
index 48ff6fb..30463a6 100644 (file)
@@ -1373,7 +1373,7 @@ static const struct net_device_ops octeon_mgmt_ops = {
        .ndo_start_xmit =               octeon_mgmt_xmit,
        .ndo_set_rx_mode =              octeon_mgmt_set_rx_filtering,
        .ndo_set_mac_address =          octeon_mgmt_set_mac_address,
-       .ndo_do_ioctl =                 octeon_mgmt_ioctl,
+       .ndo_eth_ioctl =                        octeon_mgmt_ioctl,
        .ndo_change_mtu =               octeon_mgmt_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller =          octeon_mgmt_poll_controller,
index 9361f96..691e147 100644 (file)
@@ -1322,18 +1322,12 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_disable_device;
        }
 
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
        if (err) {
                dev_err(dev, "Unable to get usable DMA configuration\n");
                goto err_release_regions;
        }
 
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
-       if (err) {
-               dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
-               goto err_release_regions;
-       }
-
        /* MAP PF's configuration registers */
        nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
        if (!nic->reg_base) {
index 2f218fb..7f28821 100644 (file)
@@ -456,7 +456,9 @@ static void nicvf_get_regs(struct net_device *dev,
 }
 
 static int nicvf_get_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *cmd)
+                             struct ethtool_coalesce *cmd,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct nicvf *nic = netdev_priv(netdev);
 
index e2b2901..d1667b7 100644 (file)
@@ -2096,7 +2096,7 @@ static const struct net_device_ops nicvf_netdev_ops = {
        .ndo_fix_features       = nicvf_fix_features,
        .ndo_set_features       = nicvf_set_features,
        .ndo_bpf                = nicvf_xdp,
-       .ndo_do_ioctl           = nicvf_ioctl,
+       .ndo_eth_ioctl           = nicvf_ioctl,
        .ndo_set_rx_mode        = nicvf_set_rx_mode,
 };
 
@@ -2130,18 +2130,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_disable_device;
        }
 
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
        if (err) {
                dev_err(dev, "Unable to get usable DMA configuration\n");
                goto err_release_regions;
        }
 
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
-       if (err) {
-               dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n");
-               goto err_release_regions;
-       }
-
        qcount = netif_get_num_default_rss_queues();
 
        /* Restrict multiqset support only for host bound VFs */
index 8ba0e08..c931ec8 100644 (file)
@@ -69,6 +69,7 @@ config CHELSIO_T3
 config CHELSIO_T4
        tristate "Chelsio Communications T4/T5/T6 Ethernet support"
        depends on PCI && (IPV6 || IPV6=n) && (TLS || TLS=n)
+       depends on PTP_1588_CLOCK_OPTIONAL
        select FW_LOADER
        select MDIO
        select ZLIB_DEFLATE
index 512da98..73c0161 100644 (file)
@@ -748,7 +748,9 @@ static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
        return 0;
 }
 
-static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+                       struct kernel_ethtool_coalesce *kernel_coal,
+                       struct netlink_ext_ack *extack)
 {
        struct adapter *adapter = dev->ml_priv;
 
@@ -759,7 +761,9 @@ static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
        return 0;
 }
 
-static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+                       struct kernel_ethtool_coalesce *kernel_coal,
+                       struct netlink_ext_ack *extack)
 {
        struct adapter *adapter = dev->ml_priv;
 
@@ -924,7 +928,7 @@ static const struct net_device_ops cxgb_netdev_ops = {
        .ndo_get_stats          = t1_get_stats,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = t1_set_rxmode,
-       .ndo_do_ioctl           = t1_ioctl,
+       .ndo_eth_ioctl          = t1_ioctl,
        .ndo_change_mtu         = t1_change_mtu,
        .ndo_set_mac_address    = t1_set_mac_addr,
        .ndo_fix_features       = t1_fix_features,
index 57f210c..38e4770 100644 (file)
@@ -1996,7 +1996,9 @@ static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
        return 0;
 }
 
-static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+                       struct kernel_ethtool_coalesce *kernel_coal,
+                       struct netlink_ext_ack *extack)
 {
        struct port_info *pi = netdev_priv(dev);
        struct adapter *adapter = pi->adapter;
@@ -2017,7 +2019,9 @@ static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
        return 0;
 }
 
-static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+                       struct kernel_ethtool_coalesce *kernel_coal,
+                       struct netlink_ext_ack *extack)
 {
        struct port_info *pi = netdev_priv(dev);
        struct adapter *adapter = pi->adapter;
@@ -2135,13 +2139,18 @@ static int in_range(int val, int lo, int hi)
        return val < 0 || (val <= hi && val >= lo);
 }
 
-static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
+static int cxgb_siocdevprivate(struct net_device *dev,
+                              struct ifreq *ifreq,
+                              void __user *useraddr,
+                              int cmd)
 {
        struct port_info *pi = netdev_priv(dev);
        struct adapter *adapter = pi->adapter;
-       u32 cmd;
        int ret;
 
+       if (cmd != SIOCCHIOCTL)
+               return -EOPNOTSUPP;
+
        if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
                return -EFAULT;
 
@@ -2546,8 +2555,6 @@ static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
                fallthrough;
        case SIOCGMIIPHY:
                return mdio_mii_ioctl(&pi->phy.mdio, data, cmd);
-       case SIOCCHIOCTL:
-               return cxgb_extension_ioctl(dev, req->ifr_data);
        default:
                return -EOPNOTSUPP;
        }
@@ -3181,7 +3188,8 @@ static const struct net_device_ops cxgb_netdev_ops = {
        .ndo_get_stats          = cxgb_get_stats,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = cxgb_set_rxmode,
-       .ndo_do_ioctl           = cxgb_ioctl,
+       .ndo_eth_ioctl          = cxgb_ioctl,
+       .ndo_siocdevprivate     = cxgb_siocdevprivate,
        .ndo_change_mtu         = cxgb_change_mtu,
        .ndo_set_mac_address    = cxgb_set_mac_addr,
        .ndo_fix_features       = cxgb_fix_features,
@@ -3231,15 +3239,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto out_disable_device;
        }
 
-       if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+       if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
                pci_using_dac = 1;
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-               if (err) {
-                       dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
-                              "coherent allocations\n");
-                       goto out_release_regions;
-               }
-       } else if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) != 0) {
+       } else if ((err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) != 0) {
                dev_err(&pdev->dev, "no usable DMA configuration\n");
                goto out_release_regions;
        }
index cb5c79c..e21a2e6 100644 (file)
@@ -244,8 +244,8 @@ static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
        frag_idx = d->fragidx;
 
        if (frag_idx == 0 && skb_headlen(skb)) {
-               pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
-                                skb_headlen(skb), PCI_DMA_TODEVICE);
+               dma_unmap_single(&pdev->dev, be64_to_cpu(sgp->addr[0]),
+                                skb_headlen(skb), DMA_TO_DEVICE);
                j = 1;
        }
 
@@ -253,9 +253,9 @@ static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
        nfrags = skb_shinfo(skb)->nr_frags;
 
        while (frag_idx < nfrags && curflit < WR_FLITS) {
-               pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
+               dma_unmap_page(&pdev->dev, be64_to_cpu(sgp->addr[j]),
                               skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]),
-                              PCI_DMA_TODEVICE);
+                              DMA_TO_DEVICE);
                j ^= 1;
                if (j == 0) {
                        sgp++;
@@ -355,15 +355,14 @@ static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
        if (q->use_pages && d->pg_chunk.page) {
                (*d->pg_chunk.p_cnt)--;
                if (!*d->pg_chunk.p_cnt)
-                       pci_unmap_page(pdev,
-                                      d->pg_chunk.mapping,
-                                      q->alloc_size, PCI_DMA_FROMDEVICE);
+                       dma_unmap_page(&pdev->dev, d->pg_chunk.mapping,
+                                      q->alloc_size, DMA_FROM_DEVICE);
 
                put_page(d->pg_chunk.page);
                d->pg_chunk.page = NULL;
        } else {
-               pci_unmap_single(pdev, dma_unmap_addr(d, dma_addr),
-                                q->buf_size, PCI_DMA_FROMDEVICE);
+               dma_unmap_single(&pdev->dev, dma_unmap_addr(d, dma_addr),
+                                q->buf_size, DMA_FROM_DEVICE);
                kfree_skb(d->skb);
                d->skb = NULL;
        }
@@ -414,8 +413,8 @@ static inline int add_one_rx_buf(void *va, unsigned int len,
 {
        dma_addr_t mapping;
 
-       mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
-       if (unlikely(pci_dma_mapping_error(pdev, mapping)))
+       mapping = dma_map_single(&pdev->dev, va, len, DMA_FROM_DEVICE);
+       if (unlikely(dma_mapping_error(&pdev->dev, mapping)))
                return -ENOMEM;
 
        dma_unmap_addr_set(sd, dma_addr, mapping);
@@ -453,9 +452,9 @@ static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
                q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
                                    SGE_PG_RSVD;
                q->pg_chunk.offset = 0;
-               mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
-                                      0, q->alloc_size, PCI_DMA_FROMDEVICE);
-               if (unlikely(pci_dma_mapping_error(adapter->pdev, mapping))) {
+               mapping = dma_map_page(&adapter->pdev->dev, q->pg_chunk.page,
+                                      0, q->alloc_size, DMA_FROM_DEVICE);
+               if (unlikely(dma_mapping_error(&adapter->pdev->dev, mapping))) {
                        __free_pages(q->pg_chunk.page, order);
                        q->pg_chunk.page = NULL;
                        return -EIO;
@@ -522,9 +521,9 @@ nomem:                              q->alloc_failed++;
                        dma_unmap_addr_set(sd, dma_addr, mapping);
 
                        add_one_rx_chunk(mapping, d, q->gen);
-                       pci_dma_sync_single_for_device(adap->pdev, mapping,
-                                               q->buf_size - SGE_PG_RSVD,
-                                               PCI_DMA_FROMDEVICE);
+                       dma_sync_single_for_device(&adap->pdev->dev, mapping,
+                                                  q->buf_size - SGE_PG_RSVD,
+                                                  DMA_FROM_DEVICE);
                } else {
                        void *buf_start;
 
@@ -793,13 +792,13 @@ static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
                skb = alloc_skb(len, GFP_ATOMIC);
                if (likely(skb != NULL)) {
                        __skb_put(skb, len);
-                       pci_dma_sync_single_for_cpu(adap->pdev,
-                                           dma_unmap_addr(sd, dma_addr), len,
-                                           PCI_DMA_FROMDEVICE);
+                       dma_sync_single_for_cpu(&adap->pdev->dev,
+                                               dma_unmap_addr(sd, dma_addr),
+                                               len, DMA_FROM_DEVICE);
                        memcpy(skb->data, sd->skb->data, len);
-                       pci_dma_sync_single_for_device(adap->pdev,
-                                           dma_unmap_addr(sd, dma_addr), len,
-                                           PCI_DMA_FROMDEVICE);
+                       dma_sync_single_for_device(&adap->pdev->dev,
+                                                  dma_unmap_addr(sd, dma_addr),
+                                                  len, DMA_FROM_DEVICE);
                } else if (!drop_thres)
                        goto use_orig_buf;
 recycle:
@@ -813,8 +812,8 @@ recycle:
                goto recycle;
 
 use_orig_buf:
-       pci_unmap_single(adap->pdev, dma_unmap_addr(sd, dma_addr),
-                        fl->buf_size, PCI_DMA_FROMDEVICE);
+       dma_unmap_single(&adap->pdev->dev, dma_unmap_addr(sd, dma_addr),
+                        fl->buf_size, DMA_FROM_DEVICE);
        skb = sd->skb;
        skb_put(skb, len);
        __refill_fl(adap, fl);
@@ -854,12 +853,11 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
                newskb = alloc_skb(len, GFP_ATOMIC);
                if (likely(newskb != NULL)) {
                        __skb_put(newskb, len);
-                       pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
-                                           PCI_DMA_FROMDEVICE);
+                       dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr,
+                                               len, DMA_FROM_DEVICE);
                        memcpy(newskb->data, sd->pg_chunk.va, len);
-                       pci_dma_sync_single_for_device(adap->pdev, dma_addr,
-                                                      len,
-                                                      PCI_DMA_FROMDEVICE);
+                       dma_sync_single_for_device(&adap->pdev->dev, dma_addr,
+                                                  len, DMA_FROM_DEVICE);
                } else if (!drop_thres)
                        return NULL;
 recycle:
@@ -883,14 +881,12 @@ recycle:
                goto recycle;
        }
 
-       pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
-                                   PCI_DMA_FROMDEVICE);
+       dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr, len,
+                               DMA_FROM_DEVICE);
        (*sd->pg_chunk.p_cnt)--;
        if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
-               pci_unmap_page(adap->pdev,
-                              sd->pg_chunk.mapping,
-                              fl->alloc_size,
-                              PCI_DMA_FROMDEVICE);
+               dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
+                              fl->alloc_size, DMA_FROM_DEVICE);
        if (!skb) {
                __skb_put(newskb, SGE_RX_PULL_LEN);
                memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
@@ -968,9 +964,9 @@ static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb,
        const struct skb_shared_info *si;
 
        if (skb_headlen(skb)) {
-               *addr = pci_map_single(pdev, skb->data, skb_headlen(skb),
-                                      PCI_DMA_TODEVICE);
-               if (pci_dma_mapping_error(pdev, *addr))
+               *addr = dma_map_single(&pdev->dev, skb->data,
+                                      skb_headlen(skb), DMA_TO_DEVICE);
+               if (dma_mapping_error(&pdev->dev, *addr))
                        goto out_err;
                addr++;
        }
@@ -981,7 +977,7 @@ static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb,
        for (fp = si->frags; fp < end; fp++) {
                *addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp),
                                         DMA_TO_DEVICE);
-               if (pci_dma_mapping_error(pdev, *addr))
+               if (dma_mapping_error(&pdev->dev, *addr))
                        goto unwind;
                addr++;
        }
@@ -992,7 +988,8 @@ unwind:
                dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp),
                               DMA_TO_DEVICE);
 
-       pci_unmap_single(pdev, addr[-1], skb_headlen(skb), PCI_DMA_TODEVICE);
+       dma_unmap_single(&pdev->dev, addr[-1], skb_headlen(skb),
+                        DMA_TO_DEVICE);
 out_err:
        return -ENOMEM;
 }
@@ -1592,13 +1589,14 @@ static void deferred_unmap_destructor(struct sk_buff *skb)
        p = dui->addr;
 
        if (skb_tail_pointer(skb) - skb_transport_header(skb))
-               pci_unmap_single(dui->pdev, *p++, skb_tail_pointer(skb) -
-                                skb_transport_header(skb), PCI_DMA_TODEVICE);
+               dma_unmap_single(&dui->pdev->dev, *p++,
+                                skb_tail_pointer(skb) - skb_transport_header(skb),
+                                DMA_TO_DEVICE);
 
        si = skb_shinfo(skb);
        for (i = 0; i < si->nr_frags; i++)
-               pci_unmap_page(dui->pdev, *p++, skb_frag_size(&si->frags[i]),
-                              PCI_DMA_TODEVICE);
+               dma_unmap_page(&dui->pdev->dev, *p++,
+                              skb_frag_size(&si->frags[i]), DMA_TO_DEVICE);
 }
 
 static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
@@ -2153,17 +2151,14 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 
        fl->credits--;
 
-       pci_dma_sync_single_for_cpu(adap->pdev,
-                                   dma_unmap_addr(sd, dma_addr),
-                                   fl->buf_size - SGE_PG_RSVD,
-                                   PCI_DMA_FROMDEVICE);
+       dma_sync_single_for_cpu(&adap->pdev->dev,
+                               dma_unmap_addr(sd, dma_addr),
+                               fl->buf_size - SGE_PG_RSVD, DMA_FROM_DEVICE);
 
        (*sd->pg_chunk.p_cnt)--;
        if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
-               pci_unmap_page(adap->pdev,
-                              sd->pg_chunk.mapping,
-                              fl->alloc_size,
-                              PCI_DMA_FROMDEVICE);
+               dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
+                              fl->alloc_size, DMA_FROM_DEVICE);
 
        if (!skb) {
                put_page(sd->pg_chunk.page);
index 83ed10a..5903bdb 100644 (file)
@@ -1147,7 +1147,9 @@ static int set_dbqtimer_tickval(struct net_device *dev,
 }
 
 static int set_coalesce(struct net_device *dev,
-                       struct ethtool_coalesce *coalesce)
+                       struct ethtool_coalesce *coalesce,
+                       struct kernel_ethtool_coalesce *kernel_coal,
+                       struct netlink_ext_ack *extack)
 {
        int ret;
 
@@ -1163,7 +1165,9 @@ static int set_coalesce(struct net_device *dev,
                                    coalesce->tx_coalesce_usecs);
 }
 
-static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
+static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c,
+                       struct kernel_ethtool_coalesce *kernel_coal,
+                       struct netlink_ext_ack *extack)
 {
        const struct port_info *pi = netdev_priv(dev);
        const struct adapter *adap = pi->adapter;
index 6260b3b..786ceae 100644 (file)
@@ -1441,7 +1441,7 @@ static int cxgb4_set_hash_filter(struct net_device *dev,
        } else if (iconf & USE_ENC_IDX_F) {
                if (f->fs.val.encap_vld) {
                        struct port_info *pi = netdev_priv(f->dev);
-                       u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+                       static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
 
                        /* allocate MPS TCAM entry */
                        ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
@@ -1688,7 +1688,7 @@ int __cxgb4_set_filter(struct net_device *dev, int ftid,
        } else if (iconf & USE_ENC_IDX_F) {
                if (f->fs.val.encap_vld) {
                        struct port_info *pi = netdev_priv(f->dev);
-                       u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+                       static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
 
                        /* allocate MPS TCAM entry */
                        ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
index 710cb00..0d9cda4 100644 (file)
@@ -3872,7 +3872,7 @@ static const struct net_device_ops cxgb4_netdev_ops = {
        .ndo_set_mac_address  = cxgb_set_mac_addr,
        .ndo_set_features     = cxgb_set_features,
        .ndo_validate_addr    = eth_validate_addr,
-       .ndo_do_ioctl         = cxgb_ioctl,
+       .ndo_eth_ioctl         = cxgb_ioctl,
        .ndo_change_mtu       = cxgb_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller  = cxgb_netpoll,
@@ -4008,7 +4008,7 @@ static void adap_free_hma_mem(struct adapter *adapter)
 
        if (adapter->hma.flags & HMA_DMA_MAPPED_FLAG) {
                dma_unmap_sg(adapter->pdev_dev, adapter->hma.sgt->sgl,
-                            adapter->hma.sgt->nents, PCI_DMA_BIDIRECTIONAL);
+                            adapter->hma.sgt->nents, DMA_BIDIRECTIONAL);
                adapter->hma.flags &= ~HMA_DMA_MAPPED_FLAG;
        }
 
@@ -6163,8 +6163,7 @@ static void print_port_info(const struct net_device *dev)
                --bufp;
        sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
 
-       netdev_info(dev, "%s: Chelsio %s (%s) %s\n",
-                   dev->name, adap->params.vpd.id, adap->name, buf);
+       netdev_info(dev, "Chelsio %s %s\n", adap->params.vpd.id, buf);
 }
 
 /*
@@ -6688,16 +6687,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                return 0;
        }
 
-       if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+       if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
                highdma = true;
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-               if (err) {
-                       dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
-                               "coherent allocations\n");
-                       goto out_free_adapter;
-               }
        } else {
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
                if (err) {
                        dev_err(&pdev->dev, "no usable DMA configuration\n");
                        goto out_free_adapter;
index 6a099cb..fa5b596 100644 (file)
@@ -443,7 +443,7 @@ static void free_rx_bufs(struct adapter *adap, struct sge_fl *q, int n)
                if (is_buf_mapped(d))
                        dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
                                       get_buf_size(adap, d),
-                                      PCI_DMA_FROMDEVICE);
+                                      DMA_FROM_DEVICE);
                put_page(d->page);
                d->page = NULL;
                if (++q->cidx == q->size)
@@ -469,7 +469,7 @@ static void unmap_rx_buf(struct adapter *adap, struct sge_fl *q)
 
        if (is_buf_mapped(d))
                dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
-                              get_buf_size(adap, d), PCI_DMA_FROMDEVICE);
+                              get_buf_size(adap, d), DMA_FROM_DEVICE);
        d->page = NULL;
        if (++q->cidx == q->size)
                q->cidx = 0;
@@ -566,7 +566,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 
                mapping = dma_map_page(adap->pdev_dev, pg, 0,
                                       PAGE_SIZE << s->fl_pg_order,
-                                      PCI_DMA_FROMDEVICE);
+                                      DMA_FROM_DEVICE);
                if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
                        __free_pages(pg, s->fl_pg_order);
                        q->mapping_err++;
@@ -596,7 +596,7 @@ alloc_small_pages:
                }
 
                mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE,
-                                      PCI_DMA_FROMDEVICE);
+                                      DMA_FROM_DEVICE);
                if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
                        put_page(pg);
                        q->mapping_err++;
index 2820a0b..49b76fd 100644 (file)
@@ -1647,7 +1647,9 @@ static int cxgb4vf_set_ringparam(struct net_device *dev,
  * interrupt holdoff timer to be read on all of the device's Queue Sets.
  */
 static int cxgb4vf_get_coalesce(struct net_device *dev,
-                               struct ethtool_coalesce *coalesce)
+                               struct ethtool_coalesce *coalesce,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        const struct port_info *pi = netdev_priv(dev);
        const struct adapter *adapter = pi->adapter;
@@ -1667,7 +1669,9 @@ static int cxgb4vf_get_coalesce(struct net_device *dev,
  * the interrupt holdoff timer on any of the device's Queue Sets.
  */
 static int cxgb4vf_set_coalesce(struct net_device *dev,
-                               struct ethtool_coalesce *coalesce)
+                               struct ethtool_coalesce *coalesce,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        const struct port_info *pi = netdev_priv(dev);
        struct adapter *adapter = pi->adapter;
@@ -2837,7 +2841,7 @@ static const struct net_device_ops cxgb4vf_netdev_ops     = {
        .ndo_set_rx_mode        = cxgb4vf_set_rxmode,
        .ndo_set_mac_address    = cxgb4vf_set_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = cxgb4vf_do_ioctl,
+       .ndo_eth_ioctl          = cxgb4vf_do_ioctl,
        .ndo_change_mtu         = cxgb4vf_change_mtu,
        .ndo_fix_features       = cxgb4vf_fix_features,
        .ndo_set_features       = cxgb4vf_set_features,
@@ -2917,17 +2921,11 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
         * Set up our DMA mask: try for 64-bit address masking first and
         * fall back to 32-bit if we can't get 64 bits ...
         */
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
        if (err == 0) {
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-               if (err) {
-                       dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
-                               " coherent allocations\n");
-                       goto err_release_regions;
-               }
                pci_using_dac = 1;
        } else {
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
                if (err != 0) {
                        dev_err(&pdev->dev, "no usable DMA configuration\n");
                        goto err_release_regions;
index 7bc80ee..0295b24 100644 (file)
@@ -478,7 +478,7 @@ static void free_rx_bufs(struct adapter *adapter, struct sge_fl *fl, int n)
                if (is_buf_mapped(sdesc))
                        dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc),
                                       get_buf_size(adapter, sdesc),
-                                      PCI_DMA_FROMDEVICE);
+                                      DMA_FROM_DEVICE);
                put_page(sdesc->page);
                sdesc->page = NULL;
                if (++fl->cidx == fl->size)
@@ -507,7 +507,7 @@ static void unmap_rx_buf(struct adapter *adapter, struct sge_fl *fl)
        if (is_buf_mapped(sdesc))
                dma_unmap_page(adapter->pdev_dev, get_buf_addr(sdesc),
                               get_buf_size(adapter, sdesc),
-                              PCI_DMA_FROMDEVICE);
+                              DMA_FROM_DEVICE);
        sdesc->page = NULL;
        if (++fl->cidx == fl->size)
                fl->cidx = 0;
@@ -644,7 +644,7 @@ static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl,
 
                dma_addr = dma_map_page(adapter->pdev_dev, page, 0,
                                        PAGE_SIZE << s->fl_pg_order,
-                                       PCI_DMA_FROMDEVICE);
+                                       DMA_FROM_DEVICE);
                if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
                        /*
                         * We've run out of DMA mapping space.  Free up the
@@ -682,7 +682,7 @@ alloc_small_pages:
                poison_buf(page, PAGE_SIZE);
 
                dma_addr = dma_map_page(adapter->pdev_dev, page, 0, PAGE_SIZE,
-                                      PCI_DMA_FROMDEVICE);
+                                      DMA_FROM_DEVICE);
                if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
                        put_page(page);
                        break;
index d8af9e6..dac1764 100644 (file)
@@ -6,7 +6,7 @@
 config NET_VENDOR_CIRRUS
        bool "Cirrus devices"
        default y
-       depends on ISA || EISA || ARM || MAC
+       depends on ISA || EISA || ARM || MAC || COMPILE_TEST
        help
          If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -18,9 +18,16 @@ config NET_VENDOR_CIRRUS
 if NET_VENDOR_CIRRUS
 
 config CS89x0
-       tristate "CS89x0 support"
-       depends on ISA || EISA || ARM
+       tristate
+
+config CS89x0_ISA
+       tristate "CS89x0 ISA driver support"
+       depends on HAS_IOPORT_MAP
+       depends on ISA
        depends on !PPC32
+       depends on CS89x0_PLATFORM=n
+       select NETDEV_LEGACY_INIT
+       select CS89x0
        help
          Support for CS89x0 chipset based Ethernet cards. If you have a
          network (Ethernet) card of this type, say Y and read the file
@@ -30,15 +37,15 @@ config CS89x0
          will be called cs89x0.
 
 config CS89x0_PLATFORM
-       bool "CS89x0 platform driver support" if HAS_IOPORT_MAP
-       default !HAS_IOPORT_MAP
-       depends on CS89x0
+       tristate "CS89x0 platform driver support"
+       depends on ARM || COMPILE_TEST
+       select CS89x0
        help
-         Say Y to compile the cs89x0 driver as a platform driver. This
-         makes this driver suitable for use on certain evaluation boards
-         such as the iMX21ADS.
+         Say Y to compile the cs89x0 platform driver. This makes this driver
+         suitable for use on certain evaluation boards such as the iMX21ADS.
 
-         If you are unsure, say N.
+         To compile this driver as a module, choose M here. The module
+         will be called cs89x0.
 
 config EP93XX_ETH
        tristate "EP93xx Ethernet support"
index 33ace33..d0c4c8b 100644 (file)
@@ -104,7 +104,7 @@ static char version[] __initdata =
  * them to system IRQ numbers. This mapping is card specific and is set to
  * the configuration of the Cirrus Eval board for this chip.
  */
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
 static unsigned int netcard_portlist[] __used __initdata = {
        0x300, 0x320, 0x340, 0x360, 0x200, 0x220, 0x240,
        0x260, 0x280, 0x2a0, 0x2c0, 0x2e0, 0
@@ -292,7 +292,7 @@ write_irq(struct net_device *dev, int chip_type, int irq)
        int i;
 
        if (chip_type == CS8900) {
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
                /* Search the mapping table for the corresponding IRQ pin. */
                for (i = 0; i != ARRAY_SIZE(cs8900_irq_map); i++)
                        if (cs8900_irq_map[i] == irq)
@@ -859,7 +859,7 @@ net_open(struct net_device *dev)
                        goto bad_out;
                }
        } else {
-#if !defined(CONFIG_CS89x0_PLATFORM)
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
                if (((1 << dev->irq) & lp->irq_map) == 0) {
                        pr_err("%s: IRQ %d is not in our map of allowable IRQs, which is %x\n",
                               dev->name, dev->irq, lp->irq_map);
@@ -1523,7 +1523,7 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular)
                        dev->irq = i;
        } else {
                i = lp->isa_config & INT_NO_MASK;
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
                if (lp->chip_type == CS8900) {
                        /* Translate the IRQ using the IRQ mapping table. */
                        if (i >= ARRAY_SIZE(cs8900_irq_map))
@@ -1576,7 +1576,7 @@ out1:
        return retval;
 }
 
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
 /*
  * This function converts the I/O port address used by the cs89x0_probe() and
  * init_module() functions to the I/O memory address used by the
@@ -1682,11 +1682,7 @@ out:
        pr_warn("no cs8900 or cs8920 detected.  Be sure to disable PnP with SETUP\n");
        return ERR_PTR(err);
 }
-#endif
-#endif
-
-#if defined(MODULE) && !defined(CONFIG_CS89x0_PLATFORM)
-
+#else
 static struct net_device *dev_cs89x0;
 
 /* Support the 'debug' module parm even if we're compiled for non-debug to
@@ -1757,9 +1753,9 @@ MODULE_LICENSE("GPL");
  * (hw or software util)
  */
 
-int __init init_module(void)
+static int __init cs89x0_isa_init_module(void)
 {
-       struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
+       struct net_device *dev;
        struct net_local *lp;
        int ret = 0;
 
@@ -1768,6 +1764,7 @@ int __init init_module(void)
 #else
        debug = 0;
 #endif
+       dev = alloc_etherdev(sizeof(struct net_local));
        if (!dev)
                return -ENOMEM;
 
@@ -1826,9 +1823,9 @@ out:
        free_netdev(dev);
        return ret;
 }
+module_init(cs89x0_isa_init_module);
 
-void __exit
-cleanup_module(void)
+static void __exit cs89x0_isa_cleanup_module(void)
 {
        struct net_local *lp = netdev_priv(dev_cs89x0);
 
@@ -1838,9 +1835,11 @@ cleanup_module(void)
        release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT);
        free_netdev(dev_cs89x0);
 }
-#endif /* MODULE && !CONFIG_CS89x0_PLATFORM */
+module_exit(cs89x0_isa_cleanup_module);
+#endif /* MODULE */
+#endif /* CONFIG_CS89x0_ISA */
 
-#ifdef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_PLATFORM)
 static int __init cs89x0_platform_probe(struct platform_device *pdev)
 {
        struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
index 9f5e5ec..072fac5 100644 (file)
@@ -733,7 +733,7 @@ static const struct net_device_ops ep93xx_netdev_ops = {
        .ndo_open               = ep93xx_open,
        .ndo_stop               = ep93xx_close,
        .ndo_start_xmit         = ep93xx_xmit,
-       .ndo_do_ioctl           = ep93xx_ioctl,
+       .ndo_eth_ioctl          = ep93xx_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
 };
index 1a9803f..12ffc14 100644 (file)
@@ -298,7 +298,9 @@ static void enic_set_msglevel(struct net_device *netdev, u32 value)
 }
 
 static int enic_get_coalesce(struct net_device *netdev,
-       struct ethtool_coalesce *ecmd)
+                            struct ethtool_coalesce *ecmd,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct enic *enic = netdev_priv(netdev);
        struct enic_rx_coal *rxcoal = &enic->rx_coalesce_setting;
@@ -343,7 +345,9 @@ static int enic_coalesce_valid(struct enic *enic,
 }
 
 static int enic_set_coalesce(struct net_device *netdev,
-       struct ethtool_coalesce *ecmd)
+                            struct ethtool_coalesce *ecmd,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct enic *enic = netdev_priv(netdev);
        u32 tx_coalesce_usecs;
index c2ebb33..6e745ca 100644 (file)
@@ -2144,7 +2144,9 @@ static int gmac_set_ringparam(struct net_device *netdev,
 }
 
 static int gmac_get_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *ecmd)
+                            struct ethtool_coalesce *ecmd,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct gemini_ethernet_port *port = netdev_priv(netdev);
 
@@ -2156,7 +2158,9 @@ static int gmac_get_coalesce(struct net_device *netdev,
 }
 
 static int gmac_set_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *ecmd)
+                            struct ethtool_coalesce *ecmd,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct gemini_ethernet_port *port = netdev_priv(netdev);
 
index 2a8bf53..e842de6 100644 (file)
@@ -1372,7 +1372,7 @@ static const struct net_device_ops dm9000_netdev_ops = {
        .ndo_start_xmit         = dm9000_start_xmit,
        .ndo_tx_timeout         = dm9000_timeout,
        .ndo_set_rx_mode        = dm9000_hash_table,
-       .ndo_do_ioctl           = dm9000_ioctl,
+       .ndo_eth_ioctl          = dm9000_ioctl,
        .ndo_set_features       = dm9000_set_features,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
index b125d7f..36ab4cb 100644 (file)
     =========================================================================
 */
 
+#include <linux/compat.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -902,7 +903,8 @@ static int     de4x5_close(struct net_device *dev);
 static struct  net_device_stats *de4x5_get_stats(struct net_device *dev);
 static void    de4x5_local_stats(struct net_device *dev, char *buf, int pkt_len);
 static void    set_multicast_list(struct net_device *dev);
-static int     de4x5_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int     de4x5_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                                   void __user *data, int cmd);
 
 /*
 ** Private functions
@@ -1084,7 +1086,7 @@ static const struct net_device_ops de4x5_netdev_ops = {
     .ndo_start_xmit    = de4x5_queue_pkt,
     .ndo_get_stats     = de4x5_get_stats,
     .ndo_set_rx_mode   = set_multicast_list,
-    .ndo_do_ioctl      = de4x5_ioctl,
+    .ndo_siocdevprivate        = de4x5_siocdevprivate,
     .ndo_set_mac_address= eth_mac_addr,
     .ndo_validate_addr = eth_validate_addr,
 };
@@ -5357,7 +5359,7 @@ de4x5_dbg_rx(struct sk_buff *skb, int len)
 ** this function is only used for my testing.
 */
 static int
-de4x5_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+de4x5_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd)
 {
     struct de4x5_private *lp = netdev_priv(dev);
     struct de4x5_ioctl *ioc = (struct de4x5_ioctl *) &rq->ifr_ifru;
@@ -5371,6 +5373,9 @@ de4x5_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
     } tmp;
     u_long flags = 0;
 
+    if (cmd != SIOCDEVPRIVATE || in_compat_syscall())
+       return -EOPNOTSUPP;
+
     switch(ioc->cmd) {
     case DE4X5_GET_HWADDR:           /* Get the hardware address */
        ioc->len = ETH_ALEN;
index 0116047..55d6fc9 100644 (file)
@@ -362,7 +362,7 @@ void tulip_select_media(struct net_device *dev, int startup)
                        iowrite32(0x33, ioaddr + CSR12);
                        new_csr6 = 0x01860000;
                        /* Trigger autonegotiation. */
-                       iowrite32(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8);
+                       iowrite32(0x0001F868, ioaddr + 0xB8);
                } else {
                        iowrite32(0x32, ioaddr + CSR12);
                        new_csr6 = 0x00420000;
index c1dcd6c..fcedd73 100644 (file)
@@ -1271,7 +1271,7 @@ static const struct net_device_ops tulip_netdev_ops = {
        .ndo_tx_timeout         = tulip_tx_timeout,
        .ndo_stop               = tulip_close,
        .ndo_get_stats          = tulip_get_stats,
-       .ndo_do_ioctl           = private_ioctl,
+       .ndo_eth_ioctl          = private_ioctl,
        .ndo_set_rx_mode        = set_rx_mode,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
index 1876f15..85b9909 100644 (file)
@@ -341,7 +341,7 @@ static const struct net_device_ops netdev_ops = {
        .ndo_start_xmit         = start_tx,
        .ndo_get_stats          = get_stats,
        .ndo_set_rx_mode        = set_rx_mode,
-       .ndo_do_ioctl           = netdev_ioctl,
+       .ndo_eth_ioctl          = netdev_ioctl,
        .ndo_tx_timeout         = tx_timeout,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
index 734acb8..202ecb1 100644 (file)
@@ -95,7 +95,7 @@ static const struct net_device_ops netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_set_rx_mode        = set_multicast,
-       .ndo_do_ioctl           = rio_ioctl,
+       .ndo_eth_ioctl          = rio_ioctl,
        .ndo_tx_timeout         = rio_tx_timeout,
 };
 
index ee0ca71..c36d186 100644 (file)
@@ -479,7 +479,7 @@ static const struct net_device_ops netdev_ops = {
        .ndo_start_xmit         = start_tx,
        .ndo_get_stats          = get_stats,
        .ndo_set_rx_mode        = set_rx_mode,
-       .ndo_do_ioctl           = netdev_ioctl,
+       .ndo_eth_ioctl          = netdev_ioctl,
        .ndo_tx_timeout         = tx_timeout,
        .ndo_change_mtu         = change_mtu,
        .ndo_set_mac_address    = sundance_set_mac_addr,
index 48c6eb1..6c51cf9 100644 (file)
@@ -742,7 +742,7 @@ static const struct net_device_ops dnet_netdev_ops = {
        .ndo_stop               = dnet_close,
        .ndo_get_stats          = dnet_get_stats,
        .ndo_start_xmit         = dnet_start_xmit,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 };
index 7c99217..b2d4fb3 100644 (file)
@@ -488,15 +488,7 @@ static int ec_bhf_probe(struct pci_dev *dev, const struct pci_device_id *id)
 
        pci_set_master(dev);
 
-       err = pci_set_dma_mask(dev, DMA_BIT_MASK(32));
-       if (err) {
-               dev_err(&dev->dev,
-                       "Required dma mask not supported, failed to initialize device\n");
-               err = -EIO;
-               goto err_disable_dev;
-       }
-
-       err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(32));
+       err = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32));
        if (err) {
                dev_err(&dev->dev,
                        "Required dma mask not supported, failed to initialize device\n");
index 99cc1c4..f995530 100644 (file)
@@ -315,7 +315,9 @@ static int be_read_dump_data(struct be_adapter *adapter, u32 dump_len,
 }
 
 static int be_get_coalesce(struct net_device *netdev,
-                          struct ethtool_coalesce *et)
+                          struct ethtool_coalesce *et,
+                          struct kernel_ethtool_coalesce *kernel_coal,
+                          struct netlink_ext_ack *extack)
 {
        struct be_adapter *adapter = netdev_priv(netdev);
        struct be_aic_obj *aic = &adapter->aic_obj[0];
@@ -338,7 +340,9 @@ static int be_get_coalesce(struct net_device *netdev,
  * eqd cmd is issued in the worker thread.
  */
 static int be_set_coalesce(struct net_device *netdev,
-                          struct ethtool_coalesce *et)
+                          struct ethtool_coalesce *et,
+                          struct kernel_ethtool_coalesce *kernel_coal,
+                          struct netlink_ext_ack *extack)
 {
        struct be_adapter *adapter = netdev_priv(netdev);
        struct be_aic_obj *aic = &adapter->aic_obj[0];
index e1b43b0..ed1ed48 100644 (file)
@@ -1009,7 +1009,7 @@ static const struct ethtool_ops ethoc_ethtool_ops = {
 static const struct net_device_ops ethoc_netdev_ops = {
        .ndo_open = ethoc_open,
        .ndo_stop = ethoc_stop,
-       .ndo_do_ioctl = ethoc_ioctl,
+       .ndo_eth_ioctl = ethoc_ioctl,
        .ndo_set_mac_address = ethoc_set_mac_address,
        .ndo_set_rx_mode = ethoc_set_multicast_list,
        .ndo_change_mtu = ethoc_change_mtu,
index 11dbbfd..ff76e40 100644 (file)
@@ -1616,7 +1616,7 @@ static const struct net_device_ops ftgmac100_netdev_ops = {
        .ndo_start_xmit         = ftgmac100_hard_start_xmit,
        .ndo_set_mac_address    = ftgmac100_set_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = phy_do_ioctl,
+       .ndo_eth_ioctl          = phy_do_ioctl,
        .ndo_tx_timeout         = ftgmac100_tx_timeout,
        .ndo_set_rx_mode        = ftgmac100_set_rx_mode,
        .ndo_set_features       = ftgmac100_set_features,
index 5a1a8f2..8a341e2 100644 (file)
@@ -1043,7 +1043,7 @@ static const struct net_device_ops ftmac100_netdev_ops = {
        .ndo_start_xmit         = ftmac100_hard_start_xmit,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = ftmac100_do_ioctl,
+       .ndo_eth_ioctl          = ftmac100_do_ioctl,
 };
 
 /******************************************************************************
index 0f141c1..25c91b3 100644 (file)
@@ -463,7 +463,7 @@ static const struct net_device_ops netdev_ops = {
        .ndo_start_xmit         = start_tx,
        .ndo_get_stats          = get_stats,
        .ndo_set_rx_mode        = set_rx_mode,
-       .ndo_do_ioctl           = mii_ioctl,
+       .ndo_eth_ioctl          = mii_ioctl,
        .ndo_tx_timeout         = fealnx_tx_timeout,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
index 2d1abdd..e04e1c5 100644 (file)
@@ -25,10 +25,10 @@ config FEC
        depends on (M523x || M527x || M5272 || M528x || M520x || M532x || \
                   ARCH_MXC || SOC_IMX28 || COMPILE_TEST)
        default ARCH_MXC || SOC_IMX28 if ARM
+       depends on PTP_1588_CLOCK_OPTIONAL
        select CRC32
        select PHYLIB
        imply NET_SELFTESTS
-       imply PTP_1588_CLOCK
        help
          Say Y here if you want to use the built-in 10/100 Fast ethernet
          controller on some Motorola ColdFire and Freescale i.MX processors.
index 626ec58..0e1439f 100644 (file)
@@ -4,7 +4,6 @@ menuconfig FSL_DPAA_ETH
        depends on FSL_DPAA && FSL_FMAN
        select PHYLIB
        select FIXED_PHY
-       select FSL_FMAN_MAC
        help
          Data Path Acceleration Architecture Ethernet driver,
          supporting the Freescale QorIQ chips.
index e682656..685d2d8 100644 (file)
@@ -3157,7 +3157,7 @@ static const struct net_device_ops dpaa_ops = {
        .ndo_set_mac_address = dpaa_set_mac_address,
        .ndo_validate_addr = eth_validate_addr,
        .ndo_set_rx_mode = dpaa_set_rx_mode,
-       .ndo_do_ioctl = dpaa_ioctl,
+       .ndo_eth_ioctl = dpaa_ioctl,
        .ndo_setup_tc = dpaa_setup_tc,
        .ndo_change_mtu = dpaa_change_mtu,
        .ndo_bpf = dpaa_xdp,
index 1268996..763d2c7 100644 (file)
@@ -513,7 +513,9 @@ static int dpaa_get_ts_info(struct net_device *net_dev,
 }
 
 static int dpaa_get_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *c)
+                            struct ethtool_coalesce *c,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct qman_portal *portal;
        u32 period;
@@ -530,7 +532,9 @@ static int dpaa_get_coalesce(struct net_device *dev,
 }
 
 static int dpaa_set_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *c)
+                            struct ethtool_coalesce *c,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        const cpumask_t *cpus = qman_affine_cpus();
        bool needs_revert[NR_CPUS] = {false};
index c2ef740..3d9842a 100644 (file)
@@ -11,7 +11,7 @@ fsl-dpaa2-eth-objs    := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpa
 fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o
 fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
 fsl-dpaa2-ptp-objs     := dpaa2-ptp.o dprtc.o
-fsl-dpaa2-switch-objs  := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o
+fsl-dpaa2-switch-objs  := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o dpaa2-mac.o dpmac.o
 
 # Needed by the tracing framework
 CFLAGS_dpaa2-eth.o := -I$(src)
index 8336962..605a39f 100644 (file)
@@ -68,7 +68,7 @@ dpaa2_eth_dl_trap_item_lookup(struct dpaa2_eth_priv *priv, u16 trap_id)
 struct dpaa2_eth_trap_item *dpaa2_eth_dl_get_trap(struct dpaa2_eth_priv *priv,
                                                  struct dpaa2_fapr *fapr)
 {
-       struct dpaa2_faf_error_bit {
+       static const struct dpaa2_faf_error_bit {
                int position;
                enum devlink_trap_generic_id trap_id;
        } faf_bits[] = {
@@ -196,7 +196,8 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv)
        struct dpaa2_eth_devlink_priv *dl_priv;
        int err;
 
-       priv->devlink = devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv));
+       priv->devlink =
+               devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv), dev);
        if (!priv->devlink) {
                dev_err(dev, "devlink_alloc failed\n");
                return -ENOMEM;
@@ -204,7 +205,7 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv)
        dl_priv = devlink_priv(priv->devlink);
        dl_priv->dpaa2_priv = priv;
 
-       err = devlink_register(priv->devlink, dev);
+       err = devlink_register(priv->devlink);
        if (err) {
                dev_err(dev, "devlink_register() = %d\n", err);
                goto devlink_free;
index 9733523..7065c71 100644 (file)
@@ -2594,7 +2594,7 @@ static const struct net_device_ops dpaa2_eth_ops = {
        .ndo_get_stats64 = dpaa2_eth_get_stats,
        .ndo_set_rx_mode = dpaa2_eth_set_rx_mode,
        .ndo_set_features = dpaa2_eth_set_features,
-       .ndo_do_ioctl = dpaa2_eth_ioctl,
+       .ndo_eth_ioctl = dpaa2_eth_ioctl,
        .ndo_change_mtu = dpaa2_eth_change_mtu,
        .ndo_bpf = dpaa2_eth_xdp,
        .ndo_xdp_xmit = dpaa2_eth_xdp_xmit,
@@ -4138,7 +4138,7 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv)
        int err;
 
        dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent);
-       dpmac_dev = fsl_mc_get_endpoint(dpni_dev);
+       dpmac_dev = fsl_mc_get_endpoint(dpni_dev, 0);
 
        if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER)
                return PTR_ERR(dpmac_dev);
index ad5e374..2da5f88 100644 (file)
@@ -72,12 +72,12 @@ static void dpaa2_eth_get_drvinfo(struct net_device *net_dev,
 {
        struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 
-       strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+       strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
 
        snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
                 "%u.%u", priv->dpni_ver_major, priv->dpni_ver_minor);
 
-       strlcpy(drvinfo->bus_info, dev_name(net_dev->dev.parent->parent),
+       strscpy(drvinfo->bus_info, dev_name(net_dev->dev.parent->parent),
                sizeof(drvinfo->bus_info));
 }
 
@@ -191,11 +191,11 @@ static void dpaa2_eth_get_strings(struct net_device *netdev, u32 stringset,
        switch (stringset) {
        case ETH_SS_STATS:
                for (i = 0; i < DPAA2_ETH_NUM_STATS; i++) {
-                       strlcpy(p, dpaa2_ethtool_stats[i], ETH_GSTRING_LEN);
+                       strscpy(p, dpaa2_ethtool_stats[i], ETH_GSTRING_LEN);
                        p += ETH_GSTRING_LEN;
                }
                for (i = 0; i < DPAA2_ETH_NUM_EXTRA_STATS; i++) {
-                       strlcpy(p, dpaa2_ethtool_extras[i], ETH_GSTRING_LEN);
+                       strscpy(p, dpaa2_ethtool_extras[i], ETH_GSTRING_LEN);
                        p += ETH_GSTRING_LEN;
                }
                if (dpaa2_eth_has_mac(priv))
index 70e0432..720c923 100644 (file)
@@ -15,18 +15,18 @@ static struct {
        enum dpsw_counter id;
        char name[ETH_GSTRING_LEN];
 } dpaa2_switch_ethtool_counters[] =  {
-       {DPSW_CNT_ING_FRAME,            "rx frames"},
-       {DPSW_CNT_ING_BYTE,             "rx bytes"},
-       {DPSW_CNT_ING_FLTR_FRAME,       "rx filtered frames"},
-       {DPSW_CNT_ING_FRAME_DISCARD,    "rx discarded frames"},
-       {DPSW_CNT_ING_BCAST_FRAME,      "rx b-cast frames"},
-       {DPSW_CNT_ING_BCAST_BYTES,      "rx b-cast bytes"},
-       {DPSW_CNT_ING_MCAST_FRAME,      "rx m-cast frames"},
-       {DPSW_CNT_ING_MCAST_BYTE,       "rx m-cast bytes"},
-       {DPSW_CNT_EGR_FRAME,            "tx frames"},
-       {DPSW_CNT_EGR_BYTE,             "tx bytes"},
-       {DPSW_CNT_EGR_FRAME_DISCARD,    "tx discarded frames"},
-       {DPSW_CNT_ING_NO_BUFF_DISCARD,  "rx discarded no buffer frames"},
+       {DPSW_CNT_ING_FRAME,            "[hw] rx frames"},
+       {DPSW_CNT_ING_BYTE,             "[hw] rx bytes"},
+       {DPSW_CNT_ING_FLTR_FRAME,       "[hw] rx filtered frames"},
+       {DPSW_CNT_ING_FRAME_DISCARD,    "[hw] rx discarded frames"},
+       {DPSW_CNT_ING_BCAST_FRAME,      "[hw] rx bcast frames"},
+       {DPSW_CNT_ING_BCAST_BYTES,      "[hw] rx bcast bytes"},
+       {DPSW_CNT_ING_MCAST_FRAME,      "[hw] rx mcast frames"},
+       {DPSW_CNT_ING_MCAST_BYTE,       "[hw] rx mcast bytes"},
+       {DPSW_CNT_EGR_FRAME,            "[hw] tx frames"},
+       {DPSW_CNT_EGR_BYTE,             "[hw] tx bytes"},
+       {DPSW_CNT_EGR_FRAME_DISCARD,    "[hw] tx discarded frames"},
+       {DPSW_CNT_ING_NO_BUFF_DISCARD,  "[hw] rx nobuffer discards"},
 };
 
 #define DPAA2_SWITCH_NUM_COUNTERS      ARRAY_SIZE(dpaa2_switch_ethtool_counters)
@@ -62,6 +62,10 @@ dpaa2_switch_get_link_ksettings(struct net_device *netdev,
        struct dpsw_link_state state = {0};
        int err = 0;
 
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               return phylink_ethtool_ksettings_get(port_priv->mac->phylink,
+                                                    link_ksettings);
+
        err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
                                     port_priv->ethsw_data->dpsw_handle,
                                     port_priv->idx,
@@ -95,6 +99,10 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev,
        bool if_running;
        int err = 0, ret;
 
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               return phylink_ethtool_ksettings_set(port_priv->mac->phylink,
+                                                    link_ksettings);
+
        /* Interface needs to be down to change link settings */
        if_running = netif_running(netdev);
        if (if_running) {
@@ -134,11 +142,17 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev,
        return err;
 }
 
-static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset)
+static int
+dpaa2_switch_ethtool_get_sset_count(struct net_device *netdev, int sset)
 {
+       struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+       int num_ss_stats = DPAA2_SWITCH_NUM_COUNTERS;
+
        switch (sset) {
        case ETH_SS_STATS:
-               return DPAA2_SWITCH_NUM_COUNTERS;
+               if (port_priv->mac)
+                       num_ss_stats += dpaa2_mac_get_sset_count();
+               return num_ss_stats;
        default:
                return -EOPNOTSUPP;
        }
@@ -147,14 +161,19 @@ static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset)
 static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev,
                                             u32 stringset, u8 *data)
 {
+       struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+       u8 *p = data;
        int i;
 
        switch (stringset) {
        case ETH_SS_STATS:
-               for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++)
-                       memcpy(data + i * ETH_GSTRING_LEN,
-                              dpaa2_switch_ethtool_counters[i].name,
+               for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) {
+                       memcpy(p, dpaa2_switch_ethtool_counters[i].name,
                               ETH_GSTRING_LEN);
+                       p += ETH_GSTRING_LEN;
+               }
+               if (port_priv->mac)
+                       dpaa2_mac_get_strings(p);
                break;
        }
 }
@@ -176,6 +195,9 @@ static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev,
                        netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n",
                                   dpaa2_switch_ethtool_counters[i].name, err);
        }
+
+       if (port_priv->mac)
+               dpaa2_mac_get_ethtool_stats(port_priv->mac, data + i);
 }
 
 const struct ethtool_ops dpaa2_switch_port_ethtool_ops = {
index f9451ec..d6eefbb 100644 (file)
@@ -111,11 +111,11 @@ static int dpaa2_switch_flower_parse_key(struct flow_cls_offload *cls,
        return 0;
 }
 
-int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block,
                               struct dpaa2_switch_acl_entry *entry)
 {
        struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg;
-       struct ethsw_core *ethsw = acl_tbl->ethsw;
+       struct ethsw_core *ethsw = filter_block->ethsw;
        struct dpsw_acl_key *acl_key = &entry->key;
        struct device *dev = ethsw->dev;
        u8 *cmd_buff;
@@ -136,7 +136,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
        }
 
        err = dpsw_acl_add_entry(ethsw->mc_io, 0, ethsw->dpsw_handle,
-                                acl_tbl->id, acl_entry_cfg);
+                                filter_block->acl_id, acl_entry_cfg);
 
        dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff),
                         DMA_TO_DEVICE);
@@ -150,12 +150,13 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
        return 0;
 }
 
-static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl,
-                                        struct dpaa2_switch_acl_entry *entry)
+static int
+dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block,
+                             struct dpaa2_switch_acl_entry *entry)
 {
        struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg;
        struct dpsw_acl_key *acl_key = &entry->key;
-       struct ethsw_core *ethsw = acl_tbl->ethsw;
+       struct ethsw_core *ethsw = block->ethsw;
        struct device *dev = ethsw->dev;
        u8 *cmd_buff;
        int err;
@@ -175,7 +176,7 @@ static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl,
        }
 
        err = dpsw_acl_remove_entry(ethsw->mc_io, 0, ethsw->dpsw_handle,
-                                   acl_tbl->id, acl_entry_cfg);
+                                   block->acl_id, acl_entry_cfg);
 
        dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff),
                         DMA_TO_DEVICE);
@@ -190,19 +191,19 @@ static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl,
 }
 
 static int
-dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_filter_block *block,
                                   struct dpaa2_switch_acl_entry *entry)
 {
        struct dpaa2_switch_acl_entry *tmp;
        struct list_head *pos, *n;
        int index = 0;
 
-       if (list_empty(&acl_tbl->entries)) {
-               list_add(&entry->list, &acl_tbl->entries);
+       if (list_empty(&block->acl_entries)) {
+               list_add(&entry->list, &block->acl_entries);
                return index;
        }
 
-       list_for_each_safe(pos, n, &acl_tbl->entries) {
+       list_for_each_safe(pos, n, &block->acl_entries) {
                tmp = list_entry(pos, struct dpaa2_switch_acl_entry, list);
                if (entry->prio < tmp->prio)
                        break;
@@ -213,13 +214,13 @@ dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_acl_tbl *acl_tbl,
 }
 
 static struct dpaa2_switch_acl_entry*
-dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_filter_block *block,
                                    int index)
 {
        struct dpaa2_switch_acl_entry *tmp;
        int i = 0;
 
-       list_for_each_entry(tmp, &acl_tbl->entries, list) {
+       list_for_each_entry(tmp, &block->acl_entries, list) {
                if (i == index)
                        return tmp;
                ++i;
@@ -229,37 +230,38 @@ dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_acl_tbl *acl_tbl,
 }
 
 static int
-dpaa2_switch_acl_entry_set_precedence(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_entry_set_precedence(struct dpaa2_switch_filter_block *block,
                                      struct dpaa2_switch_acl_entry *entry,
                                      int precedence)
 {
        int err;
 
-       err = dpaa2_switch_acl_entry_remove(acl_tbl, entry);
+       err = dpaa2_switch_acl_entry_remove(block, entry);
        if (err)
                return err;
 
        entry->cfg.precedence = precedence;
-       return dpaa2_switch_acl_entry_add(acl_tbl, entry);
+       return dpaa2_switch_acl_entry_add(block, entry);
 }
 
-static int dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
-                                         struct dpaa2_switch_acl_entry *entry)
+static int
+dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_filter_block *block,
+                              struct dpaa2_switch_acl_entry *entry)
 {
        struct dpaa2_switch_acl_entry *tmp;
        int index, i, precedence, err;
 
        /* Add the new ACL entry to the linked list and get its index */
-       index = dpaa2_switch_acl_entry_add_to_list(acl_tbl, entry);
+       index = dpaa2_switch_acl_entry_add_to_list(block, entry);
 
        /* Move up in priority the ACL entries to make space
         * for the new filter.
         */
-       precedence = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES - acl_tbl->num_rules - 1;
+       precedence = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES - block->num_acl_rules - 1;
        for (i = 0; i < index; i++) {
-               tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i);
+               tmp = dpaa2_switch_acl_entry_get_by_index(block, i);
 
-               err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp,
+               err = dpaa2_switch_acl_entry_set_precedence(block, tmp,
                                                            precedence);
                if (err)
                        return err;
@@ -269,19 +271,19 @@ static int dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
 
        /* Add the new entry to hardware */
        entry->cfg.precedence = precedence;
-       err = dpaa2_switch_acl_entry_add(acl_tbl, entry);
-       acl_tbl->num_rules++;
+       err = dpaa2_switch_acl_entry_add(block, entry);
+       block->num_acl_rules++;
 
        return err;
 }
 
 static struct dpaa2_switch_acl_entry *
-dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_filter_block *block,
                                          unsigned long cookie)
 {
        struct dpaa2_switch_acl_entry *tmp, *n;
 
-       list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) {
+       list_for_each_entry_safe(tmp, n, &block->acl_entries, list) {
                if (tmp->cookie == cookie)
                        return tmp;
        }
@@ -289,13 +291,13 @@ dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_acl_tbl *acl_tbl,
 }
 
 static int
-dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_filter_block *block,
                                 struct dpaa2_switch_acl_entry *entry)
 {
        struct dpaa2_switch_acl_entry *tmp, *n;
        int index = 0;
 
-       list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) {
+       list_for_each_entry_safe(tmp, n, &block->acl_entries, list) {
                if (tmp->cookie == entry->cookie)
                        return index;
                index++;
@@ -303,21 +305,34 @@ dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_acl_tbl *acl_tbl,
        return -ENOENT;
 }
 
+static struct dpaa2_switch_mirror_entry *
+dpaa2_switch_mirror_find_entry_by_cookie(struct dpaa2_switch_filter_block *block,
+                                        unsigned long cookie)
+{
+       struct dpaa2_switch_mirror_entry *tmp, *n;
+
+       list_for_each_entry_safe(tmp, n, &block->mirror_entries, list) {
+               if (tmp->cookie == cookie)
+                       return tmp;
+       }
+       return NULL;
+}
+
 static int
-dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_filter_block *block,
                                  struct dpaa2_switch_acl_entry *entry)
 {
        struct dpaa2_switch_acl_entry *tmp;
        int index, i, precedence, err;
 
-       index = dpaa2_switch_acl_entry_get_index(acl_tbl, entry);
+       index = dpaa2_switch_acl_entry_get_index(block, entry);
 
        /* Remove from hardware the ACL entry */
-       err = dpaa2_switch_acl_entry_remove(acl_tbl, entry);
+       err = dpaa2_switch_acl_entry_remove(block, entry);
        if (err)
                return err;
 
-       acl_tbl->num_rules--;
+       block->num_acl_rules--;
 
        /* Remove it from the list also */
        list_del(&entry->list);
@@ -325,8 +340,8 @@ dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
        /* Move down in priority the entries over the deleted one */
        precedence = entry->cfg.precedence;
        for (i = index - 1; i >= 0; i--) {
-               tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i);
-               err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp,
+               tmp = dpaa2_switch_acl_entry_get_by_index(block, i);
+               err = dpaa2_switch_acl_entry_set_precedence(block, tmp,
                                                            precedence);
                if (err)
                        return err;
@@ -339,10 +354,10 @@ dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
        return 0;
 }
 
-static int dpaa2_switch_tc_parse_action(struct ethsw_core *ethsw,
-                                       struct flow_action_entry *cls_act,
-                                       struct dpsw_acl_result *dpsw_act,
-                                       struct netlink_ext_ack *extack)
+static int dpaa2_switch_tc_parse_action_acl(struct ethsw_core *ethsw,
+                                           struct flow_action_entry *cls_act,
+                                           struct dpsw_acl_result *dpsw_act,
+                                           struct netlink_ext_ack *extack)
 {
        int err = 0;
 
@@ -374,22 +389,110 @@ out:
        return err;
 }
 
-int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
+static int
+dpaa2_switch_block_add_mirror(struct dpaa2_switch_filter_block *block,
+                             struct dpaa2_switch_mirror_entry *entry,
+                             u16 to, struct netlink_ext_ack *extack)
+{
+       unsigned long block_ports = block->ports;
+       struct ethsw_core *ethsw = block->ethsw;
+       struct ethsw_port_priv *port_priv;
+       unsigned long ports_added = 0;
+       u16 vlan = entry->cfg.vlan_id;
+       bool mirror_port_enabled;
+       int err, port;
+
+       /* Setup the mirroring port */
+       mirror_port_enabled = (ethsw->mirror_port != ethsw->sw_attr.num_ifs);
+       if (!mirror_port_enabled) {
+               err = dpsw_set_reflection_if(ethsw->mc_io, 0,
+                                            ethsw->dpsw_handle, to);
+               if (err)
+                       return err;
+               ethsw->mirror_port = to;
+       }
+
+       /* Setup the same egress mirroring configuration on all the switch
+        * ports that share the same filter block.
+        */
+       for_each_set_bit(port, &block_ports, ethsw->sw_attr.num_ifs) {
+               port_priv = ethsw->ports[port];
+
+               /* We cannot add a per VLAN mirroring rule if the VLAN in
+                * question is not installed on the switch port.
+                */
+               if (entry->cfg.filter == DPSW_REFLECTION_FILTER_INGRESS_VLAN &&
+                   !(port_priv->vlans[vlan] & ETHSW_VLAN_MEMBER)) {
+                       NL_SET_ERR_MSG(extack,
+                                      "VLAN must be installed on the switch port");
+                       err = -EINVAL;
+                       goto err_remove_filters;
+               }
+
+               err = dpsw_if_add_reflection(ethsw->mc_io, 0,
+                                            ethsw->dpsw_handle,
+                                            port, &entry->cfg);
+               if (err)
+                       goto err_remove_filters;
+
+               ports_added |= BIT(port);
+       }
+
+       list_add(&entry->list, &block->mirror_entries);
+
+       return 0;
+
+err_remove_filters:
+       for_each_set_bit(port, &ports_added, ethsw->sw_attr.num_ifs) {
+               dpsw_if_remove_reflection(ethsw->mc_io, 0, ethsw->dpsw_handle,
+                                         port, &entry->cfg);
+       }
+
+       if (!mirror_port_enabled)
+               ethsw->mirror_port = ethsw->sw_attr.num_ifs;
+
+       return err;
+}
+
+static int
+dpaa2_switch_block_remove_mirror(struct dpaa2_switch_filter_block *block,
+                                struct dpaa2_switch_mirror_entry *entry)
+{
+       struct dpsw_reflection_cfg *cfg = &entry->cfg;
+       unsigned long block_ports = block->ports;
+       struct ethsw_core *ethsw = block->ethsw;
+       int port;
+
+       /* Remove this mirroring configuration from all the ports belonging to
+        * the filter block.
+        */
+       for_each_set_bit(port, &block_ports, ethsw->sw_attr.num_ifs)
+               dpsw_if_remove_reflection(ethsw->mc_io, 0, ethsw->dpsw_handle,
+                                         port, cfg);
+
+       /* Also remove it from the list of mirror filters */
+       list_del(&entry->list);
+       kfree(entry);
+
+       /* If this was the last mirror filter, then unset the mirror port */
+       if (list_empty(&block->mirror_entries))
+               ethsw->mirror_port =  ethsw->sw_attr.num_ifs;
+
+       return 0;
+}
+
+static int
+dpaa2_switch_cls_flower_replace_acl(struct dpaa2_switch_filter_block *block,
                                    struct flow_cls_offload *cls)
 {
        struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
        struct netlink_ext_ack *extack = cls->common.extack;
-       struct ethsw_core *ethsw = acl_tbl->ethsw;
        struct dpaa2_switch_acl_entry *acl_entry;
+       struct ethsw_core *ethsw = block->ethsw;
        struct flow_action_entry *act;
        int err;
 
-       if (!flow_offload_has_one_action(&rule->action)) {
-               NL_SET_ERR_MSG(extack, "Only singular actions are supported");
-               return -EOPNOTSUPP;
-       }
-
-       if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) {
+       if (dpaa2_switch_acl_tbl_is_full(block)) {
                NL_SET_ERR_MSG(extack, "Maximum filter capacity reached");
                return -ENOMEM;
        }
@@ -403,15 +506,15 @@ int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
                goto free_acl_entry;
 
        act = &rule->action.entries[0];
-       err = dpaa2_switch_tc_parse_action(ethsw, act,
-                                          &acl_entry->cfg.result, extack);
+       err = dpaa2_switch_tc_parse_action_acl(ethsw, act,
+                                              &acl_entry->cfg.result, extack);
        if (err)
                goto free_acl_entry;
 
        acl_entry->prio = cls->common.prio;
        acl_entry->cookie = cls->cookie;
 
-       err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry);
+       err = dpaa2_switch_acl_tbl_add_entry(block, acl_entry);
        if (err)
                goto free_acl_entry;
 
@@ -423,33 +526,171 @@ free_acl_entry:
        return err;
 }
 
-int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
-                                   struct flow_cls_offload *cls)
+static int dpaa2_switch_flower_parse_mirror_key(struct flow_cls_offload *cls,
+                                               u16 *vlan)
 {
-       struct dpaa2_switch_acl_entry *entry;
+       struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+       struct flow_dissector *dissector = rule->match.dissector;
+       struct netlink_ext_ack *extack = cls->common.extack;
+
+       if (dissector->used_keys &
+           ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
+             BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+             BIT(FLOW_DISSECTOR_KEY_VLAN))) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Mirroring is supported only per VLAN");
+               return -EOPNOTSUPP;
+       }
+
+       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+               struct flow_match_vlan match;
+
+               flow_rule_match_vlan(rule, &match);
 
-       entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie);
-       if (!entry)
-               return 0;
+               if (match.mask->vlan_priority != 0 ||
+                   match.mask->vlan_dei != 0) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Only matching on VLAN ID supported");
+                       return -EOPNOTSUPP;
+               }
 
-       return dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry);
+               if (match.mask->vlan_id != 0xFFF) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Masked matching not supported");
+                       return -EOPNOTSUPP;
+               }
+
+               *vlan = (u16)match.key->vlan_id;
+       }
+
+       return 0;
 }
 
-int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
-                                     struct tc_cls_matchall_offload *cls)
+static int
+dpaa2_switch_cls_flower_replace_mirror(struct dpaa2_switch_filter_block *block,
+                                      struct flow_cls_offload *cls)
 {
        struct netlink_ext_ack *extack = cls->common.extack;
-       struct ethsw_core *ethsw = acl_tbl->ethsw;
-       struct dpaa2_switch_acl_entry *acl_entry;
-       struct flow_action_entry *act;
+       struct dpaa2_switch_mirror_entry *mirror_entry;
+       struct ethsw_core *ethsw = block->ethsw;
+       struct dpaa2_switch_mirror_entry *tmp;
+       struct flow_action_entry *cls_act;
+       struct list_head *pos, *n;
+       bool mirror_port_enabled;
+       u16 if_id, vlan;
        int err;
 
-       if (!flow_offload_has_one_action(&cls->rule->action)) {
+       mirror_port_enabled = (ethsw->mirror_port != ethsw->sw_attr.num_ifs);
+       cls_act = &cls->rule->action.entries[0];
+
+       /* Offload rules only when the destination is a DPAA2 switch port */
+       if (!dpaa2_switch_port_dev_check(cls_act->dev)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Destination not a DPAA2 switch port");
+               return -EOPNOTSUPP;
+       }
+       if_id = dpaa2_switch_get_index(ethsw, cls_act->dev);
+
+       /* We have a single mirror port but can configure egress mirroring on
+        * all the other switch ports. We need to allow mirroring rules only
+        * when the destination port is the same.
+        */
+       if (mirror_port_enabled && ethsw->mirror_port != if_id) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Multiple mirror ports not supported");
+               return -EBUSY;
+       }
+
+       /* Parse the key */
+       err = dpaa2_switch_flower_parse_mirror_key(cls, &vlan);
+       if (err)
+               return err;
+
+       /* Make sure that we don't already have a mirror rule with the same
+        * configuration.
+        */
+       list_for_each_safe(pos, n, &block->mirror_entries) {
+               tmp = list_entry(pos, struct dpaa2_switch_mirror_entry, list);
+
+               if (tmp->cfg.filter == DPSW_REFLECTION_FILTER_INGRESS_VLAN &&
+                   tmp->cfg.vlan_id == vlan) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "VLAN mirror filter already installed");
+                       return -EBUSY;
+               }
+       }
+
+       mirror_entry = kzalloc(sizeof(*mirror_entry), GFP_KERNEL);
+       if (!mirror_entry)
+               return -ENOMEM;
+
+       mirror_entry->cfg.filter = DPSW_REFLECTION_FILTER_INGRESS_VLAN;
+       mirror_entry->cfg.vlan_id = vlan;
+       mirror_entry->cookie = cls->cookie;
+
+       return dpaa2_switch_block_add_mirror(block, mirror_entry, if_id,
+                                            extack);
+}
+
+int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_filter_block *block,
+                                   struct flow_cls_offload *cls)
+{
+       struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+       struct netlink_ext_ack *extack = cls->common.extack;
+       struct flow_action_entry *act;
+
+       if (!flow_offload_has_one_action(&rule->action)) {
                NL_SET_ERR_MSG(extack, "Only singular actions are supported");
                return -EOPNOTSUPP;
        }
 
-       if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) {
+       act = &rule->action.entries[0];
+       switch (act->id) {
+       case FLOW_ACTION_REDIRECT:
+       case FLOW_ACTION_TRAP:
+       case FLOW_ACTION_DROP:
+               return dpaa2_switch_cls_flower_replace_acl(block, cls);
+       case FLOW_ACTION_MIRRED:
+               return dpaa2_switch_cls_flower_replace_mirror(block, cls);
+       default:
+               NL_SET_ERR_MSG_MOD(extack, "Action not supported");
+               return -EOPNOTSUPP;
+       }
+}
+
+int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_filter_block *block,
+                                   struct flow_cls_offload *cls)
+{
+       struct dpaa2_switch_mirror_entry *mirror_entry;
+       struct dpaa2_switch_acl_entry *acl_entry;
+
+       /* If this filter is a an ACL one, remove it */
+       acl_entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(block,
+                                                             cls->cookie);
+       if (acl_entry)
+               return dpaa2_switch_acl_tbl_remove_entry(block, acl_entry);
+
+       /* If not, then it has to be a mirror */
+       mirror_entry = dpaa2_switch_mirror_find_entry_by_cookie(block,
+                                                               cls->cookie);
+       if (mirror_entry)
+               return dpaa2_switch_block_remove_mirror(block,
+                                                       mirror_entry);
+
+       return 0;
+}
+
+static int
+dpaa2_switch_cls_matchall_replace_acl(struct dpaa2_switch_filter_block *block,
+                                     struct tc_cls_matchall_offload *cls)
+{
+       struct netlink_ext_ack *extack = cls->common.extack;
+       struct ethsw_core *ethsw = block->ethsw;
+       struct dpaa2_switch_acl_entry *acl_entry;
+       struct flow_action_entry *act;
+       int err;
+
+       if (dpaa2_switch_acl_tbl_is_full(block)) {
                NL_SET_ERR_MSG(extack, "Maximum filter capacity reached");
                return -ENOMEM;
        }
@@ -459,15 +700,15 @@ int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
                return -ENOMEM;
 
        act = &cls->rule->action.entries[0];
-       err = dpaa2_switch_tc_parse_action(ethsw, act,
-                                          &acl_entry->cfg.result, extack);
+       err = dpaa2_switch_tc_parse_action_acl(ethsw, act,
+                                              &acl_entry->cfg.result, extack);
        if (err)
                goto free_acl_entry;
 
        acl_entry->prio = cls->common.prio;
        acl_entry->cookie = cls->cookie;
 
-       err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry);
+       err = dpaa2_switch_acl_tbl_add_entry(block, acl_entry);
        if (err)
                goto free_acl_entry;
 
@@ -479,14 +720,159 @@ free_acl_entry:
        return err;
 }
 
-int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
+static int
+dpaa2_switch_cls_matchall_replace_mirror(struct dpaa2_switch_filter_block *block,
+                                        struct tc_cls_matchall_offload *cls)
+{
+       struct netlink_ext_ack *extack = cls->common.extack;
+       struct dpaa2_switch_mirror_entry *mirror_entry;
+       struct ethsw_core *ethsw = block->ethsw;
+       struct dpaa2_switch_mirror_entry *tmp;
+       struct flow_action_entry *cls_act;
+       struct list_head *pos, *n;
+       bool mirror_port_enabled;
+       u16 if_id;
+
+       mirror_port_enabled = (ethsw->mirror_port != ethsw->sw_attr.num_ifs);
+       cls_act = &cls->rule->action.entries[0];
+
+       /* Offload rules only when the destination is a DPAA2 switch port */
+       if (!dpaa2_switch_port_dev_check(cls_act->dev)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Destination not a DPAA2 switch port");
+               return -EOPNOTSUPP;
+       }
+       if_id = dpaa2_switch_get_index(ethsw, cls_act->dev);
+
+       /* We have a single mirror port but can configure egress mirroring on
+        * all the other switch ports. We need to allow mirroring rules only
+        * when the destination port is the same.
+        */
+       if (mirror_port_enabled && ethsw->mirror_port != if_id) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Multiple mirror ports not supported");
+               return -EBUSY;
+       }
+
+       /* Make sure that we don't already have a mirror rule with the same
+        * configuration. One matchall rule per block is the maximum.
+        */
+       list_for_each_safe(pos, n, &block->mirror_entries) {
+               tmp = list_entry(pos, struct dpaa2_switch_mirror_entry, list);
+
+               if (tmp->cfg.filter == DPSW_REFLECTION_FILTER_INGRESS_ALL) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Matchall mirror filter already installed");
+                       return -EBUSY;
+               }
+       }
+
+       mirror_entry = kzalloc(sizeof(*mirror_entry), GFP_KERNEL);
+       if (!mirror_entry)
+               return -ENOMEM;
+
+       mirror_entry->cfg.filter = DPSW_REFLECTION_FILTER_INGRESS_ALL;
+       mirror_entry->cookie = cls->cookie;
+
+       return dpaa2_switch_block_add_mirror(block, mirror_entry, if_id,
+                                            extack);
+}
+
+int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_filter_block *block,
+                                     struct tc_cls_matchall_offload *cls)
+{
+       struct netlink_ext_ack *extack = cls->common.extack;
+       struct flow_action_entry *act;
+
+       if (!flow_offload_has_one_action(&cls->rule->action)) {
+               NL_SET_ERR_MSG(extack, "Only singular actions are supported");
+               return -EOPNOTSUPP;
+       }
+
+       act = &cls->rule->action.entries[0];
+       switch (act->id) {
+       case FLOW_ACTION_REDIRECT:
+       case FLOW_ACTION_TRAP:
+       case FLOW_ACTION_DROP:
+               return dpaa2_switch_cls_matchall_replace_acl(block, cls);
+       case FLOW_ACTION_MIRRED:
+               return dpaa2_switch_cls_matchall_replace_mirror(block, cls);
+       default:
+               NL_SET_ERR_MSG_MOD(extack, "Action not supported");
+               return -EOPNOTSUPP;
+       }
+}
+
+int dpaa2_switch_block_offload_mirror(struct dpaa2_switch_filter_block *block,
+                                     struct ethsw_port_priv *port_priv)
+{
+       struct ethsw_core *ethsw = port_priv->ethsw_data;
+       struct dpaa2_switch_mirror_entry *tmp;
+       int err;
+
+       list_for_each_entry(tmp, &block->mirror_entries, list) {
+               err = dpsw_if_add_reflection(ethsw->mc_io, 0,
+                                            ethsw->dpsw_handle,
+                                            port_priv->idx, &tmp->cfg);
+               if (err)
+                       goto unwind_add;
+       }
+
+       return 0;
+
+unwind_add:
+       list_for_each_entry(tmp, &block->mirror_entries, list)
+               dpsw_if_remove_reflection(ethsw->mc_io, 0,
+                                         ethsw->dpsw_handle,
+                                         port_priv->idx, &tmp->cfg);
+
+       return err;
+}
+
+int dpaa2_switch_block_unoffload_mirror(struct dpaa2_switch_filter_block *block,
+                                       struct ethsw_port_priv *port_priv)
+{
+       struct ethsw_core *ethsw = port_priv->ethsw_data;
+       struct dpaa2_switch_mirror_entry *tmp;
+       int err;
+
+       list_for_each_entry(tmp, &block->mirror_entries, list) {
+               err = dpsw_if_remove_reflection(ethsw->mc_io, 0,
+                                               ethsw->dpsw_handle,
+                                               port_priv->idx, &tmp->cfg);
+               if (err)
+                       goto unwind_remove;
+       }
+
+       return 0;
+
+unwind_remove:
+       list_for_each_entry(tmp, &block->mirror_entries, list)
+               dpsw_if_add_reflection(ethsw->mc_io, 0, ethsw->dpsw_handle,
+                                      port_priv->idx, &tmp->cfg);
+
+       return err;
+}
+
+int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_filter_block *block,
                                      struct tc_cls_matchall_offload *cls)
 {
-       struct dpaa2_switch_acl_entry *entry;
+       struct dpaa2_switch_mirror_entry *mirror_entry;
+       struct dpaa2_switch_acl_entry *acl_entry;
+
+       /* If this filter is a an ACL one, remove it */
+       acl_entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(block,
+                                                             cls->cookie);
+       if (acl_entry)
+               return dpaa2_switch_acl_tbl_remove_entry(block,
+                                                        acl_entry);
 
-       entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie);
-       if (!entry)
-               return 0;
+       /* If not, then it has to be a mirror */
+       mirror_entry = dpaa2_switch_mirror_find_entry_by_cookie(block,
+                                                               cls->cookie);
+       if (mirror_entry)
+               return dpaa2_switch_block_remove_mirror(block,
+                                                       mirror_entry);
 
-       return  dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry);
+       return 0;
 }
index 98cc013..175f15c 100644 (file)
@@ -41,14 +41,14 @@ static struct dpaa2_switch_fdb *dpaa2_switch_fdb_get_unused(struct ethsw_core *e
        return NULL;
 }
 
-static struct dpaa2_switch_acl_tbl *
-dpaa2_switch_acl_tbl_get_unused(struct ethsw_core *ethsw)
+static struct dpaa2_switch_filter_block *
+dpaa2_switch_filter_block_get_unused(struct ethsw_core *ethsw)
 {
        int i;
 
        for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
-               if (!ethsw->acls[i].in_use)
-                       return &ethsw->acls[i];
+               if (!ethsw->filter_blocks[i].in_use)
+                       return &ethsw->filter_blocks[i];
        return NULL;
 }
 
@@ -594,12 +594,18 @@ static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu)
        return 0;
 }
 
-static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev)
+static int dpaa2_switch_port_link_state_update(struct net_device *netdev)
 {
        struct ethsw_port_priv *port_priv = netdev_priv(netdev);
        struct dpsw_link_state state;
        int err;
 
+       /* When we manage the MAC/PHY using phylink there is no need
+        * to manually update the netif_carrier.
+        */
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               return 0;
+
        /* Interrupts are received even though no one issued an 'ifconfig up'
         * on the switch interface. Ignore these link state update interrupts
         */
@@ -677,12 +683,14 @@ static int dpaa2_switch_port_open(struct net_device *netdev)
        struct ethsw_core *ethsw = port_priv->ethsw_data;
        int err;
 
-       /* Explicitly set carrier off, otherwise
-        * netif_carrier_ok() will return true and cause 'ip link show'
-        * to report the LOWER_UP flag, even though the link
-        * notification wasn't even received.
-        */
-       netif_carrier_off(netdev);
+       if (!dpaa2_switch_port_is_type_phy(port_priv)) {
+               /* Explicitly set carrier off, otherwise
+                * netif_carrier_ok() will return true and cause 'ip link show'
+                * to report the LOWER_UP flag, even though the link
+                * notification wasn't even received.
+                */
+               netif_carrier_off(netdev);
+       }
 
        err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0,
                             port_priv->ethsw_data->dpsw_handle,
@@ -692,23 +700,12 @@ static int dpaa2_switch_port_open(struct net_device *netdev)
                return err;
        }
 
-       /* sync carrier state */
-       err = dpaa2_switch_port_carrier_state_sync(netdev);
-       if (err) {
-               netdev_err(netdev,
-                          "dpaa2_switch_port_carrier_state_sync err %d\n", err);
-               goto err_carrier_sync;
-       }
-
        dpaa2_switch_enable_ctrl_if_napi(ethsw);
 
-       return 0;
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               phylink_start(port_priv->mac->phylink);
 
-err_carrier_sync:
-       dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
-                       port_priv->ethsw_data->dpsw_handle,
-                       port_priv->idx);
-       return err;
+       return 0;
 }
 
 static int dpaa2_switch_port_stop(struct net_device *netdev)
@@ -717,6 +714,13 @@ static int dpaa2_switch_port_stop(struct net_device *netdev)
        struct ethsw_core *ethsw = port_priv->ethsw_data;
        int err;
 
+       if (dpaa2_switch_port_is_type_phy(port_priv)) {
+               phylink_stop(port_priv->mac->phylink);
+       } else {
+               netif_tx_stop_all_queues(netdev);
+               netif_carrier_off(netdev);
+       }
+
        err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
                              port_priv->ethsw_data->dpsw_handle,
                              port_priv->idx);
@@ -1127,28 +1131,28 @@ err_exit:
 }
 
 static int
-dpaa2_switch_setup_tc_cls_flower(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_setup_tc_cls_flower(struct dpaa2_switch_filter_block *filter_block,
                                 struct flow_cls_offload *f)
 {
        switch (f->command) {
        case FLOW_CLS_REPLACE:
-               return dpaa2_switch_cls_flower_replace(acl_tbl, f);
+               return dpaa2_switch_cls_flower_replace(filter_block, f);
        case FLOW_CLS_DESTROY:
-               return dpaa2_switch_cls_flower_destroy(acl_tbl, f);
+               return dpaa2_switch_cls_flower_destroy(filter_block, f);
        default:
                return -EOPNOTSUPP;
        }
 }
 
 static int
-dpaa2_switch_setup_tc_cls_matchall(struct dpaa2_switch_acl_tbl *acl_tbl,
+dpaa2_switch_setup_tc_cls_matchall(struct dpaa2_switch_filter_block *block,
                                   struct tc_cls_matchall_offload *f)
 {
        switch (f->command) {
        case TC_CLSMATCHALL_REPLACE:
-               return dpaa2_switch_cls_matchall_replace(acl_tbl, f);
+               return dpaa2_switch_cls_matchall_replace(block, f);
        case TC_CLSMATCHALL_DESTROY:
-               return dpaa2_switch_cls_matchall_destroy(acl_tbl, f);
+               return dpaa2_switch_cls_matchall_destroy(block, f);
        default:
                return -EOPNOTSUPP;
        }
@@ -1170,106 +1174,122 @@ static int dpaa2_switch_port_setup_tc_block_cb_ig(enum tc_setup_type type,
 
 static LIST_HEAD(dpaa2_switch_block_cb_list);
 
-static int dpaa2_switch_port_acl_tbl_bind(struct ethsw_port_priv *port_priv,
-                                         struct dpaa2_switch_acl_tbl *acl_tbl)
+static int
+dpaa2_switch_port_acl_tbl_bind(struct ethsw_port_priv *port_priv,
+                              struct dpaa2_switch_filter_block *block)
 {
        struct ethsw_core *ethsw = port_priv->ethsw_data;
        struct net_device *netdev = port_priv->netdev;
        struct dpsw_acl_if_cfg acl_if_cfg;
        int err;
 
-       if (port_priv->acl_tbl)
+       if (port_priv->filter_block)
                return -EINVAL;
 
        acl_if_cfg.if_id[0] = port_priv->idx;
        acl_if_cfg.num_ifs = 1;
        err = dpsw_acl_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
-                             acl_tbl->id, &acl_if_cfg);
+                             block->acl_id, &acl_if_cfg);
        if (err) {
                netdev_err(netdev, "dpsw_acl_add_if err %d\n", err);
                return err;
        }
 
-       acl_tbl->ports |= BIT(port_priv->idx);
-       port_priv->acl_tbl = acl_tbl;
+       block->ports |= BIT(port_priv->idx);
+       port_priv->filter_block = block;
 
        return 0;
 }
 
 static int
 dpaa2_switch_port_acl_tbl_unbind(struct ethsw_port_priv *port_priv,
-                                struct dpaa2_switch_acl_tbl *acl_tbl)
+                                struct dpaa2_switch_filter_block *block)
 {
        struct ethsw_core *ethsw = port_priv->ethsw_data;
        struct net_device *netdev = port_priv->netdev;
        struct dpsw_acl_if_cfg acl_if_cfg;
        int err;
 
-       if (port_priv->acl_tbl != acl_tbl)
+       if (port_priv->filter_block != block)
                return -EINVAL;
 
        acl_if_cfg.if_id[0] = port_priv->idx;
        acl_if_cfg.num_ifs = 1;
        err = dpsw_acl_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
-                                acl_tbl->id, &acl_if_cfg);
+                                block->acl_id, &acl_if_cfg);
        if (err) {
                netdev_err(netdev, "dpsw_acl_add_if err %d\n", err);
                return err;
        }
 
-       acl_tbl->ports &= ~BIT(port_priv->idx);
-       port_priv->acl_tbl = NULL;
+       block->ports &= ~BIT(port_priv->idx);
+       port_priv->filter_block = NULL;
        return 0;
 }
 
 static int dpaa2_switch_port_block_bind(struct ethsw_port_priv *port_priv,
-                                       struct dpaa2_switch_acl_tbl *acl_tbl)
+                                       struct dpaa2_switch_filter_block *block)
 {
-       struct dpaa2_switch_acl_tbl *old_acl_tbl = port_priv->acl_tbl;
+       struct dpaa2_switch_filter_block *old_block = port_priv->filter_block;
        int err;
 
+       /* Offload all the mirror entries found in the block on this new port
+        * joining it.
+        */
+       err = dpaa2_switch_block_offload_mirror(block, port_priv);
+       if (err)
+               return err;
+
        /* If the port is already bound to this ACL table then do nothing. This
         * can happen when this port is the first one to join a tc block
         */
-       if (port_priv->acl_tbl == acl_tbl)
+       if (port_priv->filter_block == block)
                return 0;
 
-       err = dpaa2_switch_port_acl_tbl_unbind(port_priv, old_acl_tbl);
+       err = dpaa2_switch_port_acl_tbl_unbind(port_priv, old_block);
        if (err)
                return err;
 
        /* Mark the previous ACL table as being unused if this was the last
         * port that was using it.
         */
-       if (old_acl_tbl->ports == 0)
-               old_acl_tbl->in_use = false;
+       if (old_block->ports == 0)
+               old_block->in_use = false;
 
-       return dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl);
+       return dpaa2_switch_port_acl_tbl_bind(port_priv, block);
 }
 
-static int dpaa2_switch_port_block_unbind(struct ethsw_port_priv *port_priv,
-                                         struct dpaa2_switch_acl_tbl *acl_tbl)
+static int
+dpaa2_switch_port_block_unbind(struct ethsw_port_priv *port_priv,
+                              struct dpaa2_switch_filter_block *block)
 {
        struct ethsw_core *ethsw = port_priv->ethsw_data;
-       struct dpaa2_switch_acl_tbl *new_acl_tbl;
+       struct dpaa2_switch_filter_block *new_block;
        int err;
 
+       /* Unoffload all the mirror entries found in the block from the
+        * port leaving it.
+        */
+       err = dpaa2_switch_block_unoffload_mirror(block, port_priv);
+       if (err)
+               return err;
+
        /* We are the last port that leaves a block (an ACL table).
         * We'll continue to use this table.
         */
-       if (acl_tbl->ports == BIT(port_priv->idx))
+       if (block->ports == BIT(port_priv->idx))
                return 0;
 
-       err = dpaa2_switch_port_acl_tbl_unbind(port_priv, acl_tbl);
+       err = dpaa2_switch_port_acl_tbl_unbind(port_priv, block);
        if (err)
                return err;
 
-       if (acl_tbl->ports == 0)
-               acl_tbl->in_use = false;
+       if (block->ports == 0)
+               block->in_use = false;
 
-       new_acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw);
-       new_acl_tbl->in_use = true;
-       return dpaa2_switch_port_acl_tbl_bind(port_priv, new_acl_tbl);
+       new_block = dpaa2_switch_filter_block_get_unused(ethsw);
+       new_block->in_use = true;
+       return dpaa2_switch_port_acl_tbl_bind(port_priv, new_block);
 }
 
 static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev,
@@ -1277,7 +1297,7 @@ static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev,
 {
        struct ethsw_port_priv *port_priv = netdev_priv(netdev);
        struct ethsw_core *ethsw = port_priv->ethsw_data;
-       struct dpaa2_switch_acl_tbl *acl_tbl;
+       struct dpaa2_switch_filter_block *filter_block;
        struct flow_block_cb *block_cb;
        bool register_block = false;
        int err;
@@ -1287,24 +1307,24 @@ static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev,
                                        ethsw);
 
        if (!block_cb) {
-               /* If the ACL table is not already known, then this port must
-                * be the first to join it. In this case, we can just continue
-                * to use our private table
+               /* If the filter block is not already known, then this port
+                * must be the first to join it. In this case, we can just
+                * continue to use our private table
                 */
-               acl_tbl = port_priv->acl_tbl;
+               filter_block = port_priv->filter_block;
 
                block_cb = flow_block_cb_alloc(dpaa2_switch_port_setup_tc_block_cb_ig,
-                                              ethsw, acl_tbl, NULL);
+                                              ethsw, filter_block, NULL);
                if (IS_ERR(block_cb))
                        return PTR_ERR(block_cb);
 
                register_block = true;
        } else {
-               acl_tbl = flow_block_cb_priv(block_cb);
+               filter_block = flow_block_cb_priv(block_cb);
        }
 
        flow_block_cb_incref(block_cb);
-       err = dpaa2_switch_port_block_bind(port_priv, acl_tbl);
+       err = dpaa2_switch_port_block_bind(port_priv, filter_block);
        if (err)
                goto err_block_bind;
 
@@ -1327,7 +1347,7 @@ static void dpaa2_switch_setup_tc_block_unbind(struct net_device *netdev,
 {
        struct ethsw_port_priv *port_priv = netdev_priv(netdev);
        struct ethsw_core *ethsw = port_priv->ethsw_data;
-       struct dpaa2_switch_acl_tbl *acl_tbl;
+       struct dpaa2_switch_filter_block *filter_block;
        struct flow_block_cb *block_cb;
        int err;
 
@@ -1337,8 +1357,8 @@ static void dpaa2_switch_setup_tc_block_unbind(struct net_device *netdev,
        if (!block_cb)
                return;
 
-       acl_tbl = flow_block_cb_priv(block_cb);
-       err = dpaa2_switch_port_block_unbind(port_priv, acl_tbl);
+       filter_block = flow_block_cb_priv(block_cb);
+       err = dpaa2_switch_port_block_unbind(port_priv, filter_block);
        if (!err && !flow_block_cb_decref(block_cb)) {
                flow_block_cb_remove(block_cb, f);
                list_del(&block_cb->driver_list);
@@ -1403,41 +1423,105 @@ bool dpaa2_switch_port_dev_check(const struct net_device *netdev)
        return netdev->netdev_ops == &dpaa2_switch_port_ops;
 }
 
-static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw)
+static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv)
 {
-       int i;
+       struct fsl_mc_device *dpsw_port_dev, *dpmac_dev;
+       struct dpaa2_mac *mac;
+       int err;
 
-       for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-               dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev);
-               dpaa2_switch_port_set_mac_addr(ethsw->ports[i]);
+       dpsw_port_dev = to_fsl_mc_device(port_priv->netdev->dev.parent);
+       dpmac_dev = fsl_mc_get_endpoint(dpsw_port_dev, port_priv->idx);
+
+       if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER)
+               return PTR_ERR(dpmac_dev);
+
+       if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type)
+               return 0;
+
+       mac = kzalloc(sizeof(*mac), GFP_KERNEL);
+       if (!mac)
+               return -ENOMEM;
+
+       mac->mc_dev = dpmac_dev;
+       mac->mc_io = port_priv->ethsw_data->mc_io;
+       mac->net_dev = port_priv->netdev;
+
+       err = dpaa2_mac_open(mac);
+       if (err)
+               goto err_free_mac;
+       port_priv->mac = mac;
+
+       if (dpaa2_switch_port_is_type_phy(port_priv)) {
+               err = dpaa2_mac_connect(mac);
+               if (err) {
+                       netdev_err(port_priv->netdev,
+                                  "Error connecting to the MAC endpoint %pe\n",
+                                  ERR_PTR(err));
+                       goto err_close_mac;
+               }
        }
+
+       return 0;
+
+err_close_mac:
+       dpaa2_mac_close(mac);
+       port_priv->mac = NULL;
+err_free_mac:
+       kfree(mac);
+       return err;
+}
+
+static void dpaa2_switch_port_disconnect_mac(struct ethsw_port_priv *port_priv)
+{
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               dpaa2_mac_disconnect(port_priv->mac);
+
+       if (!dpaa2_switch_port_has_mac(port_priv))
+               return;
+
+       dpaa2_mac_close(port_priv->mac);
+       kfree(port_priv->mac);
+       port_priv->mac = NULL;
 }
 
 static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
 {
        struct device *dev = (struct device *)arg;
        struct ethsw_core *ethsw = dev_get_drvdata(dev);
-
-       /* Mask the events and the if_id reserved bits to be cleared on read */
-       u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000;
-       int err;
+       struct ethsw_port_priv *port_priv;
+       u32 status = ~0;
+       int err, if_id;
 
        err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
                                  DPSW_IRQ_INDEX_IF, &status);
        if (err) {
                dev_err(dev, "Can't get irq status (err %d)\n", err);
-
-               err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
-                                           DPSW_IRQ_INDEX_IF, 0xFFFFFFFF);
-               if (err)
-                       dev_err(dev, "Can't clear irq status (err %d)\n", err);
                goto out;
        }
 
-       if (status & DPSW_IRQ_EVENT_LINK_CHANGED)
-               dpaa2_switch_links_state_update(ethsw);
+       if_id = (status & 0xFFFF0000) >> 16;
+       port_priv = ethsw->ports[if_id];
+
+       if (status & DPSW_IRQ_EVENT_LINK_CHANGED) {
+               dpaa2_switch_port_link_state_update(port_priv->netdev);
+               dpaa2_switch_port_set_mac_addr(port_priv);
+       }
+
+       if (status & DPSW_IRQ_EVENT_ENDPOINT_CHANGED) {
+               rtnl_lock();
+               if (dpaa2_switch_port_has_mac(port_priv))
+                       dpaa2_switch_port_disconnect_mac(port_priv);
+               else
+                       dpaa2_switch_port_connect_mac(port_priv);
+               rtnl_unlock();
+       }
 
 out:
+       err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
+                                   DPSW_IRQ_INDEX_IF, status);
+       if (err)
+               dev_err(dev, "Can't clear irq status (err %d)\n", err);
+
        return IRQ_HANDLED;
 }
 
@@ -1889,8 +1973,12 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
        return notifier_from_errno(err);
 }
 
+static struct notifier_block dpaa2_switch_port_switchdev_nb;
+static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb;
+
 static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
-                                        struct net_device *upper_dev)
+                                        struct net_device *upper_dev,
+                                        struct netlink_ext_ack *extack)
 {
        struct ethsw_port_priv *port_priv = netdev_priv(netdev);
        struct ethsw_core *ethsw = port_priv->ethsw_data;
@@ -1906,8 +1994,8 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
 
                other_port_priv = netdev_priv(other_dev);
                if (other_port_priv->ethsw_data != port_priv->ethsw_data) {
-                       netdev_err(netdev,
-                                  "Interface from a different DPSW is in the bridge already!\n");
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Interface from a different DPSW is in the bridge already");
                        return -EINVAL;
                }
        }
@@ -1929,8 +2017,16 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
        if (err)
                goto err_egress_flood;
 
+       err = switchdev_bridge_port_offload(netdev, netdev, NULL,
+                                           &dpaa2_switch_port_switchdev_nb,
+                                           &dpaa2_switch_port_switchdev_blocking_nb,
+                                           false, extack);
+       if (err)
+               goto err_switchdev_offload;
+
        return 0;
 
+err_switchdev_offload:
 err_egress_flood:
        dpaa2_switch_port_set_fdb(port_priv, NULL);
        return err;
@@ -1956,6 +2052,13 @@ static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, vo
        return dpaa2_switch_port_vlan_add(arg, vlan_proto, vid);
 }
 
+static void dpaa2_switch_port_pre_bridge_leave(struct net_device *netdev)
+{
+       switchdev_bridge_port_unoffload(netdev, NULL,
+                                       &dpaa2_switch_port_switchdev_nb,
+                                       &dpaa2_switch_port_switchdev_blocking_nb);
+}
+
 static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
 {
        struct ethsw_port_priv *port_priv = netdev_priv(netdev);
@@ -2029,6 +2132,28 @@ static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *net
        return 0;
 }
 
+static int
+dpaa2_switch_prechangeupper_sanity_checks(struct net_device *netdev,
+                                         struct net_device *upper_dev,
+                                         struct netlink_ext_ack *extack)
+{
+       int err;
+
+       if (!br_vlan_enabled(upper_dev)) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge");
+               return -EOPNOTSUPP;
+       }
+
+       err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Cannot join a bridge while VLAN uppers are present");
+               return 0;
+       }
+
+       return 0;
+}
+
 static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
                                             unsigned long event, void *ptr)
 {
@@ -2049,25 +2174,23 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
                if (!netif_is_bridge_master(upper_dev))
                        break;
 
-               if (!br_vlan_enabled(upper_dev)) {
-                       NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge");
-                       err = -EOPNOTSUPP;
+               err = dpaa2_switch_prechangeupper_sanity_checks(netdev,
+                                                               upper_dev,
+                                                               extack);
+               if (err)
                        goto out;
-               }
 
-               err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev);
-               if (err) {
-                       NL_SET_ERR_MSG_MOD(extack,
-                                          "Cannot join a bridge while VLAN uppers are present");
-                       goto out;
-               }
+               if (!info->linking)
+                       dpaa2_switch_port_pre_bridge_leave(netdev);
 
                break;
        case NETDEV_CHANGEUPPER:
                upper_dev = info->upper_dev;
                if (netif_is_bridge_master(upper_dev)) {
                        if (info->linking)
-                               err = dpaa2_switch_port_bridge_join(netdev, upper_dev);
+                               err = dpaa2_switch_port_bridge_join(netdev,
+                                                                   upper_dev,
+                                                                   extack);
                        else
                                err = dpaa2_switch_port_bridge_leave(netdev);
                }
@@ -2802,6 +2925,18 @@ err_free_dpbp:
        return err;
 }
 
+static void dpaa2_switch_remove_port(struct ethsw_core *ethsw,
+                                    u16 port_idx)
+{
+       struct ethsw_port_priv *port_priv = ethsw->ports[port_idx];
+
+       rtnl_lock();
+       dpaa2_switch_port_disconnect_mac(port_priv);
+       rtnl_unlock();
+       free_netdev(port_priv->netdev);
+       ethsw->ports[port_idx] = NULL;
+}
+
 static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
 {
        struct device *dev = &sw_dev->dev;
@@ -2952,7 +3087,7 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
        acl_entry.cfg.precedence = 0;
        acl_entry.cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF;
 
-       return dpaa2_switch_acl_entry_add(port_priv->acl_tbl, &acl_entry);
+       return dpaa2_switch_acl_entry_add(port_priv->filter_block, &acl_entry);
 }
 
 static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
@@ -2965,7 +3100,7 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
        };
        struct net_device *netdev = port_priv->netdev;
        struct ethsw_core *ethsw = port_priv->ethsw_data;
-       struct dpaa2_switch_acl_tbl *acl_tbl;
+       struct dpaa2_switch_filter_block *filter_block;
        struct dpsw_fdb_cfg fdb_cfg = {0};
        struct dpsw_if_attr dpsw_if_attr;
        struct dpaa2_switch_fdb *fdb;
@@ -3020,14 +3155,15 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
                return err;
        }
 
-       acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw);
-       acl_tbl->ethsw = ethsw;
-       acl_tbl->id = acl_tbl_id;
-       acl_tbl->in_use = true;
-       acl_tbl->num_rules = 0;
-       INIT_LIST_HEAD(&acl_tbl->entries);
+       filter_block = dpaa2_switch_filter_block_get_unused(ethsw);
+       filter_block->ethsw = ethsw;
+       filter_block->acl_id = acl_tbl_id;
+       filter_block->in_use = true;
+       filter_block->num_acl_rules = 0;
+       INIT_LIST_HEAD(&filter_block->acl_entries);
+       INIT_LIST_HEAD(&filter_block->mirror_entries);
 
-       err = dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl);
+       err = dpaa2_switch_port_acl_tbl_bind(port_priv, filter_block);
        if (err)
                return err;
 
@@ -3079,11 +3215,11 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
        for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
                port_priv = ethsw->ports[i];
                unregister_netdev(port_priv->netdev);
-               free_netdev(port_priv->netdev);
+               dpaa2_switch_remove_port(ethsw, i);
        }
 
        kfree(ethsw->fdbs);
-       kfree(ethsw->acls);
+       kfree(ethsw->filter_blocks);
        kfree(ethsw->ports);
 
        dpaa2_switch_teardown(sw_dev);
@@ -3156,6 +3292,10 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
                goto err_port_probe;
        port_priv->learn_ena = false;
 
+       err = dpaa2_switch_port_connect_mac(port_priv);
+       if (err)
+               goto err_port_probe;
+
        return 0;
 
 err_port_probe:
@@ -3209,9 +3349,10 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
                goto err_free_ports;
        }
 
-       ethsw->acls = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->acls),
-                             GFP_KERNEL);
-       if (!ethsw->acls) {
+       ethsw->filter_blocks = kcalloc(ethsw->sw_attr.num_ifs,
+                                      sizeof(*ethsw->filter_blocks),
+                                      GFP_KERNEL);
+       if (!ethsw->filter_blocks) {
                err = -ENOMEM;
                goto err_free_fdbs;
        }
@@ -3231,17 +3372,16 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
                               &ethsw->fq[i].napi, dpaa2_switch_poll,
                               NAPI_POLL_WEIGHT);
 
-       err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-       if (err) {
-               dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
-               goto err_free_netdev;
-       }
-
        /* Setup IRQs */
        err = dpaa2_switch_setup_irqs(sw_dev);
        if (err)
                goto err_stop;
 
+       /* By convention, if the mirror port is equal to the number of switch
+        * interfaces, then mirroring of any kind is disabled.
+        */
+       ethsw->mirror_port =  ethsw->sw_attr.num_ifs;
+
        /* Register the netdev only when the entire setup is done and the
         * switch port interfaces are ready to receive traffic
         */
@@ -3263,8 +3403,8 @@ err_stop:
        dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
 err_free_netdev:
        for (i--; i >= 0; i--)
-               free_netdev(ethsw->ports[i]->netdev);
-       kfree(ethsw->acls);
+               dpaa2_switch_remove_port(ethsw, i);
+       kfree(ethsw->filter_blocks);
 err_free_fdbs:
        kfree(ethsw->fdbs);
 err_free_ports:
index bdef71f..0002dca 100644 (file)
@@ -21,6 +21,7 @@
 #include <net/pkt_cls.h>
 #include <soc/fsl/dpaa2-io.h>
 
+#include "dpaa2-mac.h"
 #include "dpsw.h"
 
 /* Number of IRQs supported */
@@ -113,20 +114,29 @@ struct dpaa2_switch_acl_entry {
        struct dpsw_acl_key     key;
 };
 
-struct dpaa2_switch_acl_tbl {
-       struct list_head        entries;
+struct dpaa2_switch_mirror_entry {
+       struct list_head        list;
+       struct dpsw_reflection_cfg cfg;
+       unsigned long           cookie;
+       u16 if_id;
+};
+
+struct dpaa2_switch_filter_block {
        struct ethsw_core       *ethsw;
        u64                     ports;
-
-       u16                     id;
-       u8                      num_rules;
        bool                    in_use;
+
+       struct list_head        acl_entries;
+       u16                     acl_id;
+       u8                      num_acl_rules;
+
+       struct list_head        mirror_entries;
 };
 
 static inline bool
-dpaa2_switch_acl_tbl_is_full(struct dpaa2_switch_acl_tbl *acl_tbl)
+dpaa2_switch_acl_tbl_is_full(struct dpaa2_switch_filter_block *filter_block)
 {
-       if ((acl_tbl->num_rules + DPAA2_ETHSW_PORT_DEFAULT_TRAPS) >=
+       if ((filter_block->num_acl_rules + DPAA2_ETHSW_PORT_DEFAULT_TRAPS) >=
            DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES)
                return true;
        return false;
@@ -149,7 +159,8 @@ struct ethsw_port_priv {
        bool                    ucast_flood;
        bool                    learn_ena;
 
-       struct dpaa2_switch_acl_tbl *acl_tbl;
+       struct dpaa2_switch_filter_block *filter_block;
+       struct dpaa2_mac        *mac;
 };
 
 /* Switch data */
@@ -175,7 +186,8 @@ struct ethsw_core {
        int                             napi_users;
 
        struct dpaa2_switch_fdb         *fdbs;
-       struct dpaa2_switch_acl_tbl     *acls;
+       struct dpaa2_switch_filter_block *filter_blocks;
+       u16                             mirror_port;
 };
 
 static inline int dpaa2_switch_get_index(struct ethsw_core *ethsw,
@@ -215,6 +227,22 @@ static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
        return true;
 }
 
+static inline bool
+dpaa2_switch_port_is_type_phy(struct ethsw_port_priv *port_priv)
+{
+       if (port_priv->mac &&
+           (port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY ||
+            port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE))
+               return true;
+
+       return false;
+}
+
+static inline bool dpaa2_switch_port_has_mac(struct ethsw_port_priv *port_priv)
+{
+       return port_priv->mac ? true : false;
+}
+
 bool dpaa2_switch_port_dev_check(const struct net_device *netdev);
 
 int dpaa2_switch_port_vlans_add(struct net_device *netdev,
@@ -229,18 +257,24 @@ typedef int dpaa2_switch_fdb_cb_t(struct ethsw_port_priv *port_priv,
 
 /* TC offload */
 
-int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_filter_block *block,
                                    struct flow_cls_offload *cls);
 
-int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_filter_block *block,
                                    struct flow_cls_offload *cls);
 
-int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_filter_block *block,
                                      struct tc_cls_matchall_offload *cls);
 
-int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_filter_block *block,
                                      struct tc_cls_matchall_offload *cls);
 
-int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
+int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *block,
                               struct dpaa2_switch_acl_entry *entry);
+
+int dpaa2_switch_block_offload_mirror(struct dpaa2_switch_filter_block *block,
+                                     struct ethsw_port_priv *port_priv);
+
+int dpaa2_switch_block_unoffload_mirror(struct dpaa2_switch_filter_block *block,
+                                       struct ethsw_port_priv *port_priv);
 #endif /* __ETHSW_H */
index cb13e74..397d55f 100644 (file)
 #define DPSW_CMDID_GET_IRQ_STATUS           DPSW_CMD_ID(0x016)
 #define DPSW_CMDID_CLEAR_IRQ_STATUS         DPSW_CMD_ID(0x017)
 
+#define DPSW_CMDID_SET_REFLECTION_IF        DPSW_CMD_ID(0x022)
+
 #define DPSW_CMDID_IF_SET_TCI               DPSW_CMD_ID(0x030)
 #define DPSW_CMDID_IF_SET_STP               DPSW_CMD_ID(0x031)
 
 #define DPSW_CMDID_IF_GET_COUNTER           DPSW_CMD_V2(0x034)
 
+#define DPSW_CMDID_IF_ADD_REFLECTION        DPSW_CMD_ID(0x037)
+#define DPSW_CMDID_IF_REMOVE_REFLECTION     DPSW_CMD_ID(0x038)
+
 #define DPSW_CMDID_IF_ENABLE                DPSW_CMD_ID(0x03D)
 #define DPSW_CMDID_IF_DISABLE               DPSW_CMD_ID(0x03E)
 
@@ -533,5 +538,19 @@ struct dpsw_cmd_acl_entry {
        __le64 pad2[4];
        __le64 key_iova;
 };
+
+struct dpsw_cmd_set_reflection_if {
+       __le16 if_id;
+};
+
+#define DPSW_FILTER_SHIFT      0
+#define DPSW_FILTER_SIZE       2
+
+struct dpsw_cmd_if_reflection {
+       __le16 if_id;
+       __le16 vlan_id;
+       /* only 2 bits from the LSB */
+       u8 filter;
+};
 #pragma pack(pop)
 #endif /* __FSL_DPSW_CMD_H */
index 6352d6d..ab921d7 100644 (file)
@@ -1579,3 +1579,83 @@ int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
        /* send command to mc*/
        return mc_send_command(mc_io, &cmd);
 }
+
+/**
+ * dpsw_set_reflection_if() - Set target interface for traffic mirrored
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPSW object
+ * @if_id:     Interface Id
+ *
+ * Only one mirroring destination is allowed per switch
+ *
+ * Return:     Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_set_reflection_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+                          u16 if_id)
+{
+       struct dpsw_cmd_set_reflection_if *cmd_params;
+       struct fsl_mc_command cmd = { 0 };
+
+       cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_REFLECTION_IF,
+                                         cmd_flags,
+                                         token);
+       cmd_params = (struct dpsw_cmd_set_reflection_if *)cmd.params;
+       cmd_params->if_id = cpu_to_le16(if_id);
+
+       return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_if_add_reflection() - Setup mirroring rule
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPSW object
+ * @if_id:     Interface Identifier
+ * @cfg:       Reflection configuration
+ *
+ * Return:     Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_add_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+                          u16 if_id, const struct dpsw_reflection_cfg *cfg)
+{
+       struct dpsw_cmd_if_reflection *cmd_params;
+       struct fsl_mc_command cmd = { 0 };
+
+       cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_ADD_REFLECTION,
+                                         cmd_flags,
+                                         token);
+       cmd_params = (struct dpsw_cmd_if_reflection *)cmd.params;
+       cmd_params->if_id = cpu_to_le16(if_id);
+       cmd_params->vlan_id = cpu_to_le16(cfg->vlan_id);
+       dpsw_set_field(cmd_params->filter, FILTER, cfg->filter);
+
+       return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_if_remove_reflection() - Remove mirroring rule
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPSW object
+ * @if_id:     Interface Identifier
+ * @cfg:       Reflection configuration
+ *
+ * Return:     Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+                             u16 if_id, const struct dpsw_reflection_cfg *cfg)
+{
+       struct dpsw_cmd_if_reflection *cmd_params;
+       struct fsl_mc_command cmd = { 0 };
+
+       cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_REMOVE_REFLECTION,
+                                         cmd_flags,
+                                         token);
+       cmd_params = (struct dpsw_cmd_if_reflection *)cmd.params;
+       cmd_params->if_id = cpu_to_le16(if_id);
+       cmd_params->vlan_id = cpu_to_le16(cfg->vlan_id);
+       dpsw_set_field(cmd_params->filter, FILTER, cfg->filter);
+
+       return mc_send_command(mc_io, &cmd);
+}
index 5ef221a..b90bd36 100644 (file)
@@ -98,6 +98,11 @@ int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
  */
 #define DPSW_IRQ_EVENT_LINK_CHANGED    0x0001
 
+/**
+ * DPSW_IRQ_EVENT_ENDPOINT_CHANGED - Indicates a change in endpoint
+ */
+#define DPSW_IRQ_EVENT_ENDPOINT_CHANGED        0x0002
+
 /**
  * struct dpsw_irq_cfg - IRQ configuration
  * @addr:      Address that must be written to signal a message-based interrupt
@@ -752,4 +757,35 @@ int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 
 int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
                          u16 acl_id, const struct dpsw_acl_entry_cfg *cfg);
+
+/**
+ * enum dpsw_reflection_filter - Filter type for frames to be reflected
+ * @DPSW_REFLECTION_FILTER_INGRESS_ALL: Reflect all frames
+ * @DPSW_REFLECTION_FILTER_INGRESS_VLAN: Reflect only frames that belong to
+ *     the particular VLAN defined by vid parameter
+ *
+ */
+enum dpsw_reflection_filter {
+       DPSW_REFLECTION_FILTER_INGRESS_ALL = 0,
+       DPSW_REFLECTION_FILTER_INGRESS_VLAN = 1
+};
+
+/**
+ * struct dpsw_reflection_cfg - Structure representing the mirroring config
+ * @filter: Filter type for frames to be mirrored
+ * @vlan_id: VLAN ID to mirror; valid only when the type is DPSW_INGRESS_VLAN
+ */
+struct dpsw_reflection_cfg {
+       enum dpsw_reflection_filter filter;
+       u16 vlan_id;
+};
+
+int dpsw_set_reflection_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+                          u16 if_id);
+
+int dpsw_if_add_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+                          u16 if_id, const struct dpsw_reflection_cfg *cfg);
+
+int dpsw_if_remove_reflection(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+                             u16 if_id, const struct dpsw_reflection_cfg *cfg);
 #endif /* __FSL_DPSW_H */
index ebccaf0..9690e36 100644 (file)
@@ -585,7 +585,9 @@ static void enetc_get_ringparam(struct net_device *ndev,
 }
 
 static int enetc_get_coalesce(struct net_device *ndev,
-                             struct ethtool_coalesce *ic)
+                             struct ethtool_coalesce *ic,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct enetc_ndev_priv *priv = netdev_priv(ndev);
        struct enetc_int_vector *v = priv->int_vector[0];
@@ -602,7 +604,9 @@ static int enetc_get_coalesce(struct net_device *ndev,
 }
 
 static int enetc_set_coalesce(struct net_device *ndev,
-                             struct ethtool_coalesce *ic)
+                             struct ethtool_coalesce *ic,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct enetc_ndev_priv *priv = netdev_priv(ndev);
        u32 rx_ictt, tx_ictt;
index c84f6c2..60d94e0 100644 (file)
@@ -735,7 +735,7 @@ static const struct net_device_ops enetc_ndev_ops = {
        .ndo_set_vf_vlan        = enetc_pf_set_vf_vlan,
        .ndo_set_vf_spoofchk    = enetc_pf_set_vf_spoofchk,
        .ndo_set_features       = enetc_pf_set_features,
-       .ndo_do_ioctl           = enetc_ioctl,
+       .ndo_eth_ioctl          = enetc_ioctl,
        .ndo_setup_tc           = enetc_setup_tc,
        .ndo_bpf                = enetc_setup_bpf,
        .ndo_xdp_xmit           = enetc_xdp_xmit,
index 03090ba..1a9d1e8 100644 (file)
@@ -99,7 +99,7 @@ static const struct net_device_ops enetc_ndev_ops = {
        .ndo_get_stats          = enetc_get_stats,
        .ndo_set_mac_address    = enetc_vf_set_mac_addr,
        .ndo_set_features       = enetc_vf_set_features,
-       .ndo_do_ioctl           = enetc_ioctl,
+       .ndo_eth_ioctl          = enetc_ioctl,
        .ndo_setup_tc           = enetc_setup_tc,
 };
 
index 2e002e4..7b4961d 100644 (file)
@@ -77,6 +77,8 @@
 #define FEC_R_DES_ACTIVE_2     0x1e8 /* Rx descriptor active for ring 2 */
 #define FEC_X_DES_ACTIVE_2     0x1ec /* Tx descriptor active for ring 2 */
 #define FEC_QOS_SCHEME         0x1f0 /* Set multi queues Qos scheme */
+#define FEC_LPI_SLEEP          0x1f4 /* Set IEEE802.3az LPI Sleep Ts time */
+#define FEC_LPI_WAKE           0x1f8 /* Set IEEE802.3az LPI Wake Tw time */
 #define FEC_MIIGSK_CFGR                0x300 /* MIIGSK Configuration reg */
 #define FEC_MIIGSK_ENR         0x308 /* MIIGSK Enable reg */
 
 #define FEC_RXIC0              0xfff
 #define FEC_RXIC1              0xfff
 #define FEC_RXIC2              0xfff
+#define FEC_LPI_SLEEP          0xfff
+#define FEC_LPI_WAKE           0xfff
 #endif /* CONFIG_M5272 */
 
 
@@ -379,6 +383,9 @@ struct bufdesc_ex {
 #define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF)
 #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
 
+#define FEC_ENET_TXC_DLY       ((uint)0x00010000)
+#define FEC_ENET_RXC_DLY       ((uint)0x00020000)
+
 /* ENET interrupt coalescing macro define */
 #define FEC_ITR_CLK_SEL                (0x1 << 30)
 #define FEC_ITR_EN             (0x1 << 31)
@@ -472,6 +479,22 @@ struct bufdesc_ex {
  */
 #define FEC_QUIRK_HAS_MULTI_QUEUES     (1 << 19)
 
+/* i.MX8MQ ENET IP version add new feature to support IEEE 802.3az EEE
+ * standard. For the transmission, MAC supply two user registers to set
+ * Sleep (TS) and Wake (TW) time.
+ */
+#define FEC_QUIRK_HAS_EEE              (1 << 20)
+
+/* i.MX8QM ENET IP version add new feture to generate delayed TXC/RXC
+ * as an alternative option to make sure it works well with various PHYs.
+ * For the implementation of delayed clock, ENET takes synchronized 250MHz
+ * clocks to generate 2ns delay.
+ */
+#define FEC_QUIRK_DELAYED_CLKS_SUPPORT (1 << 21)
+
+/* i.MX8MQ SoC integration mix wakeup interrupt signal into "int2" interrupt line. */
+#define FEC_QUIRK_WAKEUP_FROM_INT2     (1 << 22)
+
 struct bufdesc_prop {
        int qid;
        /* Address of Rx and Tx buffers */
@@ -528,6 +551,7 @@ struct fec_enet_private {
        struct clk *clk_ref;
        struct clk *clk_enet_out;
        struct clk *clk_ptp;
+       struct clk *clk_2x_txclk;
 
        bool ptp_clk_on;
        struct mutex ptp_clk_mutex;
@@ -550,6 +574,8 @@ struct fec_enet_private {
        uint    phy_speed;
        phy_interface_t phy_interface;
        struct device_node *phy_node;
+       bool    rgmii_txc_dly;
+       bool    rgmii_rxc_dly;
        int     link;
        int     full_duplex;
        int     speed;
@@ -557,6 +583,7 @@ struct fec_enet_private {
        bool    bufdesc_ex;
        int     pause_flag;
        int     wol_flag;
+       int     wake_irq;
        u32     quirks;
 
        struct  napi_struct napi;
@@ -589,6 +616,10 @@ struct fec_enet_private {
        unsigned int tx_time_itr;
        unsigned int itr_clk_rate;
 
+       /* tx lpi eee mode */
+       struct ethtool_eee eee;
+       unsigned int clk_ref_rate;
+
        u32 rx_copybreak;
 
        /* ptp clock period in ns*/
index 7e4c498..80bd5c6 100644 (file)
@@ -135,6 +135,26 @@ static const struct fec_devinfo fec_imx6ul_info = {
                  FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII,
 };
 
+static const struct fec_devinfo fec_imx8mq_info = {
+       .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
+                 FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
+                 FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
+                 FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
+                 FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE |
+                 FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES |
+                 FEC_QUIRK_HAS_EEE | FEC_QUIRK_WAKEUP_FROM_INT2,
+};
+
+static const struct fec_devinfo fec_imx8qm_info = {
+       .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
+                 FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
+                 FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
+                 FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
+                 FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE |
+                 FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES |
+                 FEC_QUIRK_DELAYED_CLKS_SUPPORT,
+};
+
 static struct platform_device_id fec_devtype[] = {
        {
                /* keep it for coldfire */
@@ -161,6 +181,12 @@ static struct platform_device_id fec_devtype[] = {
        }, {
                .name = "imx6ul-fec",
                .driver_data = (kernel_ulong_t)&fec_imx6ul_info,
+       }, {
+               .name = "imx8mq-fec",
+               .driver_data = (kernel_ulong_t)&fec_imx8mq_info,
+       }, {
+               .name = "imx8qm-fec",
+               .driver_data = (kernel_ulong_t)&fec_imx8qm_info,
        }, {
                /* sentinel */
        }
@@ -175,6 +201,8 @@ enum imx_fec_type {
        MVF600_FEC,
        IMX6SX_FEC,
        IMX6UL_FEC,
+       IMX8MQ_FEC,
+       IMX8QM_FEC,
 };
 
 static const struct of_device_id fec_dt_ids[] = {
@@ -185,6 +213,8 @@ static const struct of_device_id fec_dt_ids[] = {
        { .compatible = "fsl,mvf600-fec", .data = &fec_devtype[MVF600_FEC], },
        { .compatible = "fsl,imx6sx-fec", .data = &fec_devtype[IMX6SX_FEC], },
        { .compatible = "fsl,imx6ul-fec", .data = &fec_devtype[IMX6UL_FEC], },
+       { .compatible = "fsl,imx8mq-fec", .data = &fec_devtype[IMX8MQ_FEC], },
+       { .compatible = "fsl,imx8qm-fec", .data = &fec_devtype[IMX8QM_FEC], },
        { /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, fec_dt_ids);
@@ -1107,6 +1137,13 @@ fec_restart(struct net_device *ndev)
        if (fep->bufdesc_ex)
                ecntl |= (1 << 4);
 
+       if (fep->quirks & FEC_QUIRK_DELAYED_CLKS_SUPPORT &&
+           fep->rgmii_txc_dly)
+               ecntl |= FEC_ENET_TXC_DLY;
+       if (fep->quirks & FEC_QUIRK_DELAYED_CLKS_SUPPORT &&
+           fep->rgmii_rxc_dly)
+               ecntl |= FEC_ENET_RXC_DLY;
+
 #ifndef CONFIG_M5272
        /* Enable the MIB statistic event counters */
        writel(0 << 31, fep->hwp + FEC_MIB_CTRLSTAT);
@@ -1970,6 +2007,10 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
                if (ret)
                        goto failed_clk_ref;
 
+               ret = clk_prepare_enable(fep->clk_2x_txclk);
+               if (ret)
+                       goto failed_clk_2x_txclk;
+
                fec_enet_phy_reset_after_clk_enable(ndev);
        } else {
                clk_disable_unprepare(fep->clk_enet_out);
@@ -1980,10 +2021,14 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
                        mutex_unlock(&fep->ptp_clk_mutex);
                }
                clk_disable_unprepare(fep->clk_ref);
+               clk_disable_unprepare(fep->clk_2x_txclk);
        }
 
        return 0;
 
+failed_clk_2x_txclk:
+       if (fep->clk_ref)
+               clk_disable_unprepare(fep->clk_ref);
 failed_clk_ref:
        if (fep->clk_ptp) {
                mutex_lock(&fep->ptp_clk_mutex);
@@ -1997,6 +2042,34 @@ failed_clk_ptp:
        return ret;
 }
 
+static int fec_enet_parse_rgmii_delay(struct fec_enet_private *fep,
+                                     struct device_node *np)
+{
+       u32 rgmii_tx_delay, rgmii_rx_delay;
+
+       /* For rgmii tx internal delay, valid values are 0ps and 2000ps */
+       if (!of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_tx_delay)) {
+               if (rgmii_tx_delay != 0 && rgmii_tx_delay != 2000) {
+                       dev_err(&fep->pdev->dev, "The only allowed RGMII TX delay values are: 0ps, 2000ps");
+                       return -EINVAL;
+               } else if (rgmii_tx_delay == 2000) {
+                       fep->rgmii_txc_dly = true;
+               }
+       }
+
+       /* For rgmii rx internal delay, valid values are 0ps and 2000ps */
+       if (!of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_rx_delay)) {
+               if (rgmii_rx_delay != 0 && rgmii_rx_delay != 2000) {
+                       dev_err(&fep->pdev->dev, "The only allowed RGMII RX delay values are: 0ps, 2000ps");
+                       return -EINVAL;
+               } else if (rgmii_rx_delay == 2000) {
+                       fep->rgmii_rxc_dly = true;
+               }
+       }
+
+       return 0;
+}
+
 static int fec_enet_mii_probe(struct net_device *ndev)
 {
        struct fec_enet_private *fep = netdev_priv(ndev);
@@ -2581,8 +2654,10 @@ static void fec_enet_itr_coal_set(struct net_device *ndev)
        }
 }
 
-static int
-fec_enet_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
+static int fec_enet_get_coalesce(struct net_device *ndev,
+                                struct ethtool_coalesce *ec,
+                                struct kernel_ethtool_coalesce *kernel_coal,
+                                struct netlink_ext_ack *extack)
 {
        struct fec_enet_private *fep = netdev_priv(ndev);
 
@@ -2598,8 +2673,10 @@ fec_enet_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
        return 0;
 }
 
-static int
-fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
+static int fec_enet_set_coalesce(struct net_device *ndev,
+                                struct ethtool_coalesce *ec,
+                                struct kernel_ethtool_coalesce *kernel_coal,
+                                struct netlink_ext_ack *extack)
 {
        struct fec_enet_private *fep = netdev_priv(ndev);
        struct device *dev = &fep->pdev->dev;
@@ -2651,7 +2728,7 @@ static void fec_enet_itr_coal_init(struct net_device *ndev)
        ec.tx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT;
        ec.tx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT;
 
-       fec_enet_set_coalesce(ndev, &ec);
+       fec_enet_set_coalesce(ndev, &ec, NULL, NULL);
 }
 
 static int fec_enet_get_tunable(struct net_device *netdev,
@@ -2692,6 +2769,92 @@ static int fec_enet_set_tunable(struct net_device *netdev,
        return ret;
 }
 
+/* LPI Sleep Ts count base on tx clk (clk_ref).
+ * The lpi sleep cnt value = X us / (cycle_ns).
+ */
+static int fec_enet_us_to_tx_cycle(struct net_device *ndev, int us)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+
+       return us * (fep->clk_ref_rate / 1000) / 1000;
+}
+
+static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       struct ethtool_eee *p = &fep->eee;
+       unsigned int sleep_cycle, wake_cycle;
+       int ret = 0;
+
+       if (enable) {
+               ret = phy_init_eee(ndev->phydev, 0);
+               if (ret)
+                       return ret;
+
+               sleep_cycle = fec_enet_us_to_tx_cycle(ndev, p->tx_lpi_timer);
+               wake_cycle = sleep_cycle;
+       } else {
+               sleep_cycle = 0;
+               wake_cycle = 0;
+       }
+
+       p->tx_lpi_enabled = enable;
+       p->eee_enabled = enable;
+       p->eee_active = enable;
+
+       writel(sleep_cycle, fep->hwp + FEC_LPI_SLEEP);
+       writel(wake_cycle, fep->hwp + FEC_LPI_WAKE);
+
+       return 0;
+}
+
+static int
+fec_enet_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       struct ethtool_eee *p = &fep->eee;
+
+       if (!(fep->quirks & FEC_QUIRK_HAS_EEE))
+               return -EOPNOTSUPP;
+
+       if (!netif_running(ndev))
+               return -ENETDOWN;
+
+       edata->eee_enabled = p->eee_enabled;
+       edata->eee_active = p->eee_active;
+       edata->tx_lpi_timer = p->tx_lpi_timer;
+       edata->tx_lpi_enabled = p->tx_lpi_enabled;
+
+       return phy_ethtool_get_eee(ndev->phydev, edata);
+}
+
+static int
+fec_enet_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+       struct fec_enet_private *fep = netdev_priv(ndev);
+       struct ethtool_eee *p = &fep->eee;
+       int ret = 0;
+
+       if (!(fep->quirks & FEC_QUIRK_HAS_EEE))
+               return -EOPNOTSUPP;
+
+       if (!netif_running(ndev))
+               return -ENETDOWN;
+
+       p->tx_lpi_timer = edata->tx_lpi_timer;
+
+       if (!edata->eee_enabled || !edata->tx_lpi_enabled ||
+           !edata->tx_lpi_timer)
+               ret = fec_enet_eee_mode_set(ndev, false);
+       else
+               ret = fec_enet_eee_mode_set(ndev, true);
+
+       if (ret)
+               return ret;
+
+       return phy_ethtool_set_eee(ndev->phydev, edata);
+}
+
 static void
 fec_enet_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
@@ -2719,12 +2882,12 @@ fec_enet_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
        device_set_wakeup_enable(&ndev->dev, wol->wolopts & WAKE_MAGIC);
        if (device_may_wakeup(&ndev->dev)) {
                fep->wol_flag |= FEC_WOL_FLAG_ENABLE;
-               if (fep->irq[0] > 0)
-                       enable_irq_wake(fep->irq[0]);
+               if (fep->wake_irq > 0)
+                       enable_irq_wake(fep->wake_irq);
        } else {
                fep->wol_flag &= (~FEC_WOL_FLAG_ENABLE);
-               if (fep->irq[0] > 0)
-                       disable_irq_wake(fep->irq[0]);
+               if (fep->wake_irq > 0)
+                       disable_irq_wake(fep->wake_irq);
        }
 
        return 0;
@@ -2752,6 +2915,8 @@ static const struct ethtool_ops fec_enet_ethtool_ops = {
        .set_tunable            = fec_enet_set_tunable,
        .get_wol                = fec_enet_get_wol,
        .set_wol                = fec_enet_set_wol,
+       .get_eee                = fec_enet_get_eee,
+       .set_eee                = fec_enet_set_eee,
        .get_link_ksettings     = phy_ethtool_get_link_ksettings,
        .set_link_ksettings     = phy_ethtool_set_link_ksettings,
        .self_test              = net_selftest,
@@ -3280,7 +3445,7 @@ static const struct net_device_ops fec_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_tx_timeout         = fec_timeout,
        .ndo_set_mac_address    = fec_set_mac_address,
-       .ndo_do_ioctl           = fec_enet_ioctl,
+       .ndo_eth_ioctl          = fec_enet_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = fec_poll_controller,
 #endif
@@ -3535,6 +3700,17 @@ static int fec_enet_get_irq_cnt(struct platform_device *pdev)
        return irq_cnt;
 }
 
+static void fec_enet_get_wakeup_irq(struct platform_device *pdev)
+{
+       struct net_device *ndev = platform_get_drvdata(pdev);
+       struct fec_enet_private *fep = netdev_priv(ndev);
+
+       if (fep->quirks & FEC_QUIRK_WAKEUP_FROM_INT2)
+               fep->wake_irq = fep->irq[2];
+       else
+               fep->wake_irq = fep->irq[0];
+}
+
 static int fec_enet_init_stop_mode(struct fec_enet_private *fep,
                                   struct device_node *np)
 {
@@ -3666,6 +3842,10 @@ fec_probe(struct platform_device *pdev)
                fep->phy_interface = interface;
        }
 
+       ret = fec_enet_parse_rgmii_delay(fep, np);
+       if (ret)
+               goto failed_rgmii_delay;
+
        fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
        if (IS_ERR(fep->clk_ipg)) {
                ret = PTR_ERR(fep->clk_ipg);
@@ -3692,6 +3872,14 @@ fec_probe(struct platform_device *pdev)
        fep->clk_ref = devm_clk_get(&pdev->dev, "enet_clk_ref");
        if (IS_ERR(fep->clk_ref))
                fep->clk_ref = NULL;
+       fep->clk_ref_rate = clk_get_rate(fep->clk_ref);
+
+       /* clk_2x_txclk is optional, depends on board */
+       if (fep->rgmii_txc_dly || fep->rgmii_rxc_dly) {
+               fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk");
+               if (IS_ERR(fep->clk_2x_txclk))
+                       fep->clk_2x_txclk = NULL;
+       }
 
        fep->bufdesc_ex = fep->quirks & FEC_QUIRK_HAS_BUFDESC_EX;
        fep->clk_ptp = devm_clk_get(&pdev->dev, "ptp");
@@ -3762,6 +3950,9 @@ fec_probe(struct platform_device *pdev)
                fep->irq[i] = irq;
        }
 
+       /* Decide which interrupt line is wakeup capable */
+       fec_enet_get_wakeup_irq(pdev);
+
        ret = fec_enet_mii_init(pdev);
        if (ret)
                goto failed_mii_init;
@@ -3809,6 +4000,7 @@ failed_clk_ahb:
 failed_clk_ipg:
        fec_enet_clk_enable(ndev, false);
 failed_clk:
+failed_rgmii_delay:
        if (of_phy_is_fixed_link(np))
                of_phy_deregister_fixed_link(np);
        of_node_put(phy_node);
index 02c4765..73ff359 100644 (file)
@@ -792,7 +792,7 @@ static const struct net_device_ops mpc52xx_fec_netdev_ops = {
        .ndo_set_rx_mode = mpc52xx_fec_set_multicast_list,
        .ndo_set_mac_address = mpc52xx_fec_set_mac_address,
        .ndo_validate_addr = eth_validate_addr,
-       .ndo_do_ioctl = phy_do_ioctl,
+       .ndo_eth_ioctl = phy_do_ioctl,
        .ndo_tx_timeout = mpc52xx_fec_tx_timeout,
        .ndo_get_stats = mpc52xx_fec_get_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 6ee325a..2db6e38 100644 (file)
@@ -900,7 +900,7 @@ static const struct net_device_ops fs_enet_netdev_ops = {
        .ndo_start_xmit         = fs_enet_start_xmit,
        .ndo_tx_timeout         = fs_timeout,
        .ndo_set_rx_mode        = fs_set_multicast_list,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 9646483..af6ad94 100644 (file)
@@ -3184,7 +3184,7 @@ static const struct net_device_ops gfar_netdev_ops = {
        .ndo_set_features = gfar_set_features,
        .ndo_set_rx_mode = gfar_set_multi,
        .ndo_tx_timeout = gfar_timeout,
-       .ndo_do_ioctl = gfar_ioctl,
+       .ndo_eth_ioctl = gfar_ioctl,
        .ndo_get_stats64 = gfar_get_stats64,
        .ndo_change_carrier = fixed_phy_change_carrier,
        .ndo_set_mac_address = gfar_set_mac_addr,
index cc7d4f9..7b32ed2 100644 (file)
@@ -243,7 +243,9 @@ static unsigned int gfar_ticks2usecs(struct gfar_private *priv,
 /* Get the coalescing parameters, and put them in the cvals
  * structure.  */
 static int gfar_gcoalesce(struct net_device *dev,
-                         struct ethtool_coalesce *cvals)
+                         struct ethtool_coalesce *cvals,
+                         struct kernel_ethtool_coalesce *kernel_coal,
+                         struct netlink_ext_ack *extack)
 {
        struct gfar_private *priv = netdev_priv(dev);
        struct gfar_priv_rx_q *rx_queue = NULL;
@@ -280,7 +282,9 @@ static int gfar_gcoalesce(struct net_device *dev,
  * in order for coalescing to be active
  */
 static int gfar_scoalesce(struct net_device *dev,
-                         struct ethtool_coalesce *cvals)
+                         struct ethtool_coalesce *cvals,
+                         struct kernel_ethtool_coalesce *kernel_coal,
+                         struct netlink_ext_ack *extack)
 {
        struct gfar_private *priv = netdev_priv(dev);
        int i, err = 0;
index 0acfafb..3eb288d 100644 (file)
@@ -3516,7 +3516,7 @@ static const struct net_device_ops ucc_geth_netdev_ops = {
        .ndo_set_mac_address    = ucc_geth_set_mac_addr,
        .ndo_set_rx_mode        = ucc_geth_set_multi,
        .ndo_tx_timeout         = ucc_geth_timeout,
-       .ndo_do_ioctl           = ucc_geth_ioctl,
+       .ndo_eth_ioctl          = ucc_geth_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = ucc_netpoll,
 #endif
index 5bb56b4..f089d33 100644 (file)
@@ -322,7 +322,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
        tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
 
        // Check if next command will overflow the buffer.
-       if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) {
+       if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) ==
+           (tail & priv->adminq_mask)) {
                int err;
 
                // Flush existing commands to make room.
@@ -332,7 +333,8 @@ static int gve_adminq_issue_cmd(struct gve_priv *priv,
 
                // Retry.
                tail = ioread32be(&priv->reg_bar0->adminq_event_counter);
-               if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) == tail) {
+               if (((priv->adminq_prod_cnt + 1) & priv->adminq_mask) ==
+                   (tail & priv->adminq_mask)) {
                        // This should never happen. We just flushed the
                        // command queue so there should be enough space.
                        return -ENOMEM;
index bb062b0..3312e1d 100644 (file)
@@ -90,6 +90,8 @@ config HNS_ENET
 config HNS3
        tristate "Hisilicon Network Subsystem Support HNS3 (Framework)"
        depends on PCI
+       select NET_DEVLINK
+       select PAGE_POOL
        help
          This selects the framework support for Hisilicon Network Subsystem 3.
          This layer facilitates clients like ENET, RoCE and user-space ethernet
@@ -102,7 +104,7 @@ config HNS3_HCLGE
        tristate "Hisilicon HNS3 HCLGE Acceleration Engine & Compatibility Layer Support"
        default m
        depends on PCI_MSI
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK_OPTIONAL
        help
          This selects the HNS3_HCLGE network acceleration engine & its hardware
          compatibility layer. The engine would be used in Hisilicon hip08 family of
index e53512f..37b605f 100644 (file)
@@ -796,7 +796,9 @@ static void hip04_tx_timeout_task(struct work_struct *work)
 }
 
 static int hip04_get_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct hip04_priv *priv = netdev_priv(netdev);
 
@@ -807,7 +809,9 @@ static int hip04_get_coalesce(struct net_device *netdev,
 }
 
 static int hip04_set_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct hip04_priv *priv = netdev_priv(netdev);
 
index 3c4db4a..22bf914 100644 (file)
@@ -685,7 +685,7 @@ static const struct net_device_ops hisi_femac_netdev_ops = {
        .ndo_open               = hisi_femac_net_open,
        .ndo_stop               = hisi_femac_net_close,
        .ndo_start_xmit         = hisi_femac_net_xmit,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_set_mac_address    = hisi_femac_set_mac_address,
        .ndo_set_rx_mode        = hisi_femac_net_set_rx_mode,
 };
index ad534f9..343c605 100644 (file)
@@ -1945,7 +1945,7 @@ static const struct net_device_ops hns_nic_netdev_ops = {
        .ndo_tx_timeout = hns_nic_net_timeout,
        .ndo_set_mac_address = hns_nic_net_set_mac_address,
        .ndo_change_mtu = hns_nic_change_mtu,
-       .ndo_do_ioctl = phy_do_ioctl_running,
+       .ndo_eth_ioctl = phy_do_ioctl_running,
        .ndo_set_features = hns_nic_set_features,
        .ndo_fix_features = hns_nic_fix_features,
        .ndo_get_stats64 = hns_nic_get_stats64,
index 7e62dcf..ab73902 100644 (file)
@@ -730,11 +730,15 @@ static int hns_set_pauseparam(struct net_device *net_dev,
  * hns_get_coalesce - get coalesce info.
  * @net_dev: net device
  * @ec: coalesce info.
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
  *
  * Return 0 on success, negative on failure.
  */
 static int hns_get_coalesce(struct net_device *net_dev,
-                           struct ethtool_coalesce *ec)
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct hns_nic_priv *priv = netdev_priv(net_dev);
        struct hnae_ae_ops *ops;
@@ -774,11 +778,15 @@ static int hns_get_coalesce(struct net_device *net_dev,
  * hns_set_coalesce - set coalesce info.
  * @net_dev: net device
  * @ec: coalesce info.
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
  *
  * Return 0 on success, negative on failure.
  */
 static int hns_set_coalesce(struct net_device *net_dev,
-                           struct ethtool_coalesce *ec)
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct hns_nic_priv *priv = netdev_priv(net_dev);
        struct hnae_ae_ops *ops;
index aa86a81..c2bd258 100644 (file)
@@ -9,7 +9,7 @@
 
 enum HCLGE_MBX_OPCODE {
        HCLGE_MBX_RESET = 0x01,         /* (VF -> PF) assert reset */
-       HCLGE_MBX_ASSERTING_RESET,      /* (PF -> VF) PF is asserting reset*/
+       HCLGE_MBX_ASSERTING_RESET,      /* (PF -> VF) PF is asserting reset */
        HCLGE_MBX_SET_UNICAST,          /* (VF -> PF) set UC addr */
        HCLGE_MBX_SET_MULTICAST,        /* (VF -> PF) set MC addr */
        HCLGE_MBX_SET_VLAN,             /* (VF -> PF) set VLAN */
index e0b7c3c..546a605 100644 (file)
@@ -65,7 +65,7 @@
 #define HNAE3_UNIC_CLIENT_INITED_B             0x4
 #define HNAE3_ROCE_CLIENT_INITED_B             0x5
 
-#define HNAE3_DEV_SUPPORT_ROCE_DCB_BITS (BIT(HNAE3_DEV_SUPPORT_DCB_B) |\
+#define HNAE3_DEV_SUPPORT_ROCE_DCB_BITS (BIT(HNAE3_DEV_SUPPORT_DCB_B) | \
                BIT(HNAE3_DEV_SUPPORT_ROCE_B))
 
 #define hnae3_dev_roce_supported(hdev) \
@@ -718,6 +718,8 @@ struct hnae3_ae_ops {
                            u32 nsec, u32 sec);
        int (*get_ts_info)(struct hnae3_handle *handle,
                           struct ethtool_ts_info *info);
+       int (*get_link_diagnosis_info)(struct hnae3_handle *handle,
+                                      u32 *status_code);
 };
 
 struct hnae3_dcb_ops {
@@ -772,6 +774,7 @@ struct hnae3_knic_private_info {
 
        u16 int_rl_setting;
        enum pkt_hash_types rss_type;
+       void __iomem *io_base;
 };
 
 struct hnae3_roce_private_info {
index 80461ab..2b66c59 100644 (file)
@@ -38,9 +38,8 @@ static struct hns3_dbg_dentry_info hns3_dbg_dentry[] = {
        },
 };
 
-static int hns3_dbg_bd_file_init(struct hnae3_handle *handle, unsigned int cmd);
-static int hns3_dbg_common_file_init(struct hnae3_handle *handle,
-                                    unsigned int cmd);
+static int hns3_dbg_bd_file_init(struct hnae3_handle *handle, u32 cmd);
+static int hns3_dbg_common_file_init(struct hnae3_handle *handle, u32 cmd);
 
 static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = {
        {
@@ -696,7 +695,7 @@ static int hns3_dbg_queue_map(struct hnae3_handle *h, char *buf, int len)
                sprintf(result[j++], "%u", i);
                sprintf(result[j++], "%u",
                        h->ae_algo->ops->get_global_queue_id(h, i));
-               sprintf(result[j++], "%u",
+               sprintf(result[j++], "%d",
                        priv->ring[i].tqp_vector->vector_irq);
                hns3_dbg_fill_content(content, sizeof(content), queue_map_items,
                                      (const char **)result,
@@ -798,10 +797,10 @@ static const struct hns3_dbg_item tx_bd_info_items[] = {
        { "T_CS_VLAN_TSO", 2 },
        { "OT_VLAN_TAG", 3 },
        { "TV", 2 },
-       { "OLT_VLAN_LEN", 2},
-       { "PAYLEN_OL4CS", 2},
-       { "BD_FE_SC_VLD", 2},
-       { "MSS_HW_CSUM", 0},
+       { "OLT_VLAN_LEN", 2 },
+       { "PAYLEN_OL4CS", 2 },
+       { "BD_FE_SC_VLD", 2 },
+       { "MSS_HW_CSUM", 0 },
 };
 
 static void hns3_dump_tx_bd_info(struct hns3_nic_priv *priv,
@@ -868,7 +867,7 @@ static void
 hns3_dbg_dev_caps(struct hnae3_handle *h, char *buf, int len, int *pos)
 {
        struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
-       static const char * const str[] = {"no", "yes"};
+       const char * const str[] = {"no", "yes"};
        unsigned long *caps = ae_dev->caps;
        u32 i, state;
 
index cdb5f14..22af3d6 100644 (file)
@@ -63,7 +63,7 @@ MODULE_PARM_DESC(tx_sgl, "Minimum number of frags when using dma_map_sg() to opt
 
 #define HNS3_SGL_SIZE(nfrag)   (sizeof(struct scatterlist) * (nfrag) + \
                                 sizeof(struct sg_table))
-#define HNS3_MAX_SGL_SIZE      ALIGN(HNS3_SGL_SIZE(HNS3_MAX_TSO_BD_NUM),\
+#define HNS3_MAX_SGL_SIZE      ALIGN(HNS3_SGL_SIZE(HNS3_MAX_TSO_BD_NUM), \
                                      dma_get_cache_alignment())
 
 #define DEFAULT_MSG_LEVEL (NETIF_MSG_PROBE | NETIF_MSG_LINK | \
@@ -100,7 +100,7 @@ static const struct pci_device_id hns3_pci_tbl[] = {
        {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_RDMA_DCB_PFC_VF),
         HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
        /* required last entry */
-       {0, }
+       {0,}
 };
 MODULE_DEVICE_TABLE(pci, hns3_pci_tbl);
 
@@ -971,8 +971,7 @@ static u32 hns3_tx_spare_space(struct hns3_enet_ring *ring)
        /* The free tx buffer is divided into two part, so pick the
         * larger one.
         */
-       return (ntc > (tx_spare->len - ntu) ? ntc :
-                       (tx_spare->len - ntu)) - 1;
+       return max(ntc, tx_spare->len - ntu) - 1;
 }
 
 static void hns3_tx_spare_update(struct hns3_enet_ring *ring)
@@ -2852,7 +2851,7 @@ static const struct net_device_ops hns3_nic_netdev_ops = {
        .ndo_start_xmit         = hns3_nic_net_xmit,
        .ndo_tx_timeout         = hns3_nic_net_timeout,
        .ndo_set_mac_address    = hns3_nic_net_set_mac_address,
-       .ndo_do_ioctl           = hns3_nic_do_ioctl,
+       .ndo_eth_ioctl          = hns3_nic_do_ioctl,
        .ndo_change_mtu         = hns3_nic_change_mtu,
        .ndo_set_features       = hns3_nic_set_features,
        .ndo_features_check     = hns3_features_check,
@@ -3127,11 +3126,6 @@ static void hns3_set_default_feature(struct net_device *netdev)
 
        netdev->priv_flags |= IFF_UNICAST_FLT;
 
-       netdev->hw_enc_features |= NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
-               NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
-               NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-               NETIF_F_SCTP_CRC | NETIF_F_TSO_MANGLEID | NETIF_F_FRAGLIST;
-
        netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
 
        netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
@@ -3141,62 +3135,37 @@ static void hns3_set_default_feature(struct net_device *netdev)
                NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
                NETIF_F_SCTP_CRC | NETIF_F_FRAGLIST;
 
-       netdev->vlan_features |= NETIF_F_RXCSUM |
-               NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO |
-               NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
-               NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-               NETIF_F_SCTP_CRC | NETIF_F_FRAGLIST;
-
-       netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX |
-               NETIF_F_HW_VLAN_CTAG_RX |
-               NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
-               NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_GRE |
-               NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_UDP_TUNNEL |
-               NETIF_F_SCTP_CRC | NETIF_F_FRAGLIST;
-
        if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
-               netdev->hw_features |= NETIF_F_GRO_HW;
                netdev->features |= NETIF_F_GRO_HW;
 
-               if (!(h->flags & HNAE3_SUPPORT_VF)) {
-                       netdev->hw_features |= NETIF_F_NTUPLE;
+               if (!(h->flags & HNAE3_SUPPORT_VF))
                        netdev->features |= NETIF_F_NTUPLE;
-               }
        }
 
-       if (test_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps)) {
-               netdev->hw_features |= NETIF_F_GSO_UDP_L4;
+       if (test_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps))
                netdev->features |= NETIF_F_GSO_UDP_L4;
-               netdev->vlan_features |= NETIF_F_GSO_UDP_L4;
-               netdev->hw_enc_features |= NETIF_F_GSO_UDP_L4;
-       }
 
-       if (test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps)) {
-               netdev->hw_features |= NETIF_F_HW_CSUM;
+       if (test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps))
                netdev->features |= NETIF_F_HW_CSUM;
-               netdev->vlan_features |= NETIF_F_HW_CSUM;
-               netdev->hw_enc_features |= NETIF_F_HW_CSUM;
-       } else {
-               netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+       else
                netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-               netdev->vlan_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-               netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-       }
 
-       if (test_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps)) {
-               netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
+       if (test_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps))
                netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
-               netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
-               netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
-       }
 
-       if (test_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, ae_dev->caps)) {
-               netdev->hw_features |= NETIF_F_HW_TC;
+       if (test_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, ae_dev->caps))
                netdev->features |= NETIF_F_HW_TC;
-       }
 
-       if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps))
-               netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+       netdev->hw_features |= netdev->features;
+       if (!test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps))
+               netdev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+
+       netdev->vlan_features |= netdev->features &
+               ~(NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_TX |
+                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_GRO_HW | NETIF_F_NTUPLE |
+                 NETIF_F_HW_TC);
+
+       netdev->hw_enc_features |= netdev->vlan_features | NETIF_F_TSO_MANGLEID;
 }
 
 static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
@@ -3205,6 +3174,21 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
        unsigned int order = hns3_page_order(ring);
        struct page *p;
 
+       if (ring->page_pool) {
+               p = page_pool_dev_alloc_frag(ring->page_pool,
+                                            &cb->page_offset,
+                                            hns3_buf_size(ring));
+               if (unlikely(!p))
+                       return -ENOMEM;
+
+               cb->priv = p;
+               cb->buf = page_address(p);
+               cb->dma = page_pool_get_dma_addr(p);
+               cb->type = DESC_TYPE_PP_FRAG;
+               cb->reuse_flag = 0;
+               return 0;
+       }
+
        p = dev_alloc_pages(order);
        if (!p)
                return -ENOMEM;
@@ -3227,8 +3211,13 @@ static void hns3_free_buffer(struct hns3_enet_ring *ring,
        if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD |
                        DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB))
                napi_consume_skb(cb->priv, budget);
-       else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
-               __page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
+       else if (!HNAE3_IS_TX_RING(ring)) {
+               if (cb->type & DESC_TYPE_PAGE && cb->pagecnt_bias)
+                       __page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
+               else if (cb->type & DESC_TYPE_PP_FRAG)
+                       page_pool_put_full_page(ring->page_pool, cb->priv,
+                                               false);
+       }
        memset(cb, 0, sizeof(*cb));
 }
 
@@ -3315,7 +3304,7 @@ static int hns3_alloc_and_map_buffer(struct hns3_enet_ring *ring,
        int ret;
 
        ret = hns3_alloc_buffer(ring, cb);
-       if (ret)
+       if (ret || ring->page_pool)
                goto out;
 
        ret = hns3_map_buffer(ring, cb);
@@ -3337,7 +3326,8 @@ static int hns3_alloc_and_attach_buffer(struct hns3_enet_ring *ring, int i)
        if (ret)
                return ret;
 
-       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+                                        ring->desc_cb[i].page_offset);
 
        return 0;
 }
@@ -3367,7 +3357,8 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
 {
        hns3_unmap_buffer(ring, &ring->desc_cb[i]);
        ring->desc_cb[i] = *res_cb;
-       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+                                        ring->desc_cb[i].page_offset);
        ring->desc[i].rx.bd_base_info = 0;
 }
 
@@ -3539,6 +3530,12 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
        u32 frag_size = size - pull_len;
        bool reused;
 
+       if (ring->page_pool) {
+               skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset,
+                               frag_size, truesize);
+               return;
+       }
+
        /* Avoid re-using remote or pfmem page */
        if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
                goto out;
@@ -3856,6 +3853,9 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
                /* We can reuse buffer as-is, just make sure it is reusable */
                if (dev_page_is_reusable(desc_cb->priv))
                        desc_cb->reuse_flag = 1;
+               else if (desc_cb->type & DESC_TYPE_PP_FRAG)
+                       page_pool_put_full_page(ring->page_pool, desc_cb->priv,
+                                               false);
                else /* This page cannot be reused so discard it */
                        __page_frag_cache_drain(desc_cb->priv,
                                                desc_cb->pagecnt_bias);
@@ -3863,6 +3863,10 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
                hns3_rx_ring_move_fw(ring);
                return 0;
        }
+
+       if (ring->page_pool)
+               skb_mark_for_recycle(skb);
+
        u64_stats_update_begin(&ring->syncp);
        ring->stats.seg_pkt_cnt++;
        u64_stats_update_end(&ring->syncp);
@@ -3901,6 +3905,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring)
                                            "alloc rx fraglist skb fail\n");
                                return -ENXIO;
                        }
+
+                       if (ring->page_pool)
+                               skb_mark_for_recycle(new_skb);
+
                        ring->frag_num = 0;
 
                        if (ring->tail_skb) {
@@ -4434,9 +4442,7 @@ static void hns3_tx_dim_work(struct work_struct *work)
 static void hns3_nic_init_dim(struct hns3_enet_tqp_vector *tqp_vector)
 {
        INIT_WORK(&tqp_vector->rx_group.dim.work, hns3_rx_dim_work);
-       tqp_vector->rx_group.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
        INIT_WORK(&tqp_vector->tx_group.dim.work, hns3_tx_dim_work);
-       tqp_vector->tx_group.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 }
 
 static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
@@ -4705,6 +4711,29 @@ static void hns3_put_ring_config(struct hns3_nic_priv *priv)
        priv->ring = NULL;
 }
 
+static void hns3_alloc_page_pool(struct hns3_enet_ring *ring)
+{
+       struct page_pool_params pp_params = {
+               .flags = PP_FLAG_DMA_MAP | PP_FLAG_PAGE_FRAG |
+                               PP_FLAG_DMA_SYNC_DEV,
+               .order = hns3_page_order(ring),
+               .pool_size = ring->desc_num * hns3_buf_size(ring) /
+                               (PAGE_SIZE << hns3_page_order(ring)),
+               .nid = dev_to_node(ring_to_dev(ring)),
+               .dev = ring_to_dev(ring),
+               .dma_dir = DMA_FROM_DEVICE,
+               .offset = 0,
+               .max_len = PAGE_SIZE << hns3_page_order(ring),
+       };
+
+       ring->page_pool = page_pool_create(&pp_params);
+       if (IS_ERR(ring->page_pool)) {
+               dev_warn(ring_to_dev(ring), "page pool creation failed: %ld\n",
+                        PTR_ERR(ring->page_pool));
+               ring->page_pool = NULL;
+       }
+}
+
 static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
 {
        int ret;
@@ -4724,6 +4753,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
                goto out_with_desc_cb;
 
        if (!HNAE3_IS_TX_RING(ring)) {
+               hns3_alloc_page_pool(ring);
+
                ret = hns3_alloc_ring_buffers(ring);
                if (ret)
                        goto out_with_desc;
@@ -4764,6 +4795,11 @@ void hns3_fini_ring(struct hns3_enet_ring *ring)
                devm_kfree(ring_to_dev(ring), tx_spare);
                ring->tx_spare = NULL;
        }
+
+       if (!HNAE3_IS_TX_RING(ring) && ring->page_pool) {
+               page_pool_destroy(ring->page_pool);
+               ring->page_pool = NULL;
+       }
 }
 
 static int hns3_buf_size2type(u32 buf_size)
@@ -4954,6 +4990,66 @@ static void hns3_info_show(struct hns3_nic_priv *priv)
        dev_info(priv->dev, "Max mtu size: %u\n", priv->netdev->max_mtu);
 }
 
+static void hns3_set_cq_period_mode(struct hns3_nic_priv *priv,
+                                   enum dim_cq_period_mode mode, bool is_tx)
+{
+       struct hnae3_ae_dev *ae_dev = pci_get_drvdata(priv->ae_handle->pdev);
+       struct hnae3_handle *handle = priv->ae_handle;
+       int i;
+
+       if (is_tx) {
+               priv->tx_cqe_mode = mode;
+
+               for (i = 0; i < priv->vector_num; i++)
+                       priv->tqp_vector[i].tx_group.dim.mode = mode;
+       } else {
+               priv->rx_cqe_mode = mode;
+
+               for (i = 0; i < priv->vector_num; i++)
+                       priv->tqp_vector[i].rx_group.dim.mode = mode;
+       }
+
+       /* only device version above V3(include V3), GL can switch CQ/EQ
+        * period mode.
+        */
+       if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) {
+               u32 new_mode;
+               u64 reg;
+
+               new_mode = (mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE) ?
+                       HNS3_CQ_MODE_CQE : HNS3_CQ_MODE_EQE;
+               reg = is_tx ? HNS3_GL1_CQ_MODE_REG : HNS3_GL0_CQ_MODE_REG;
+
+               writel(new_mode, handle->kinfo.io_base + reg);
+       }
+}
+
+void hns3_cq_period_mode_init(struct hns3_nic_priv *priv,
+                             enum dim_cq_period_mode tx_mode,
+                             enum dim_cq_period_mode rx_mode)
+{
+       hns3_set_cq_period_mode(priv, tx_mode, true);
+       hns3_set_cq_period_mode(priv, rx_mode, false);
+}
+
+static void hns3_state_init(struct hnae3_handle *handle)
+{
+       struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
+       struct net_device *netdev = handle->kinfo.netdev;
+       struct hns3_nic_priv *priv = netdev_priv(netdev);
+
+       set_bit(HNS3_NIC_STATE_INITED, &priv->state);
+
+       if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
+               set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags);
+
+       if (test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps))
+               set_bit(HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, &priv->state);
+
+       if (hnae3_ae_dev_rxd_adv_layout_supported(ae_dev))
+               set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state);
+}
+
 static int hns3_client_init(struct hnae3_handle *handle)
 {
        struct pci_dev *pdev = handle->pdev;
@@ -5021,6 +5117,9 @@ static int hns3_client_init(struct hnae3_handle *handle)
                goto out_init_ring;
        }
 
+       hns3_cq_period_mode_init(priv, DIM_CQ_PERIOD_MODE_START_FROM_EQE,
+                                DIM_CQ_PERIOD_MODE_START_FROM_EQE);
+
        ret = hns3_init_phy(netdev);
        if (ret)
                goto out_init_phy;
@@ -5054,16 +5153,7 @@ static int hns3_client_init(struct hnae3_handle *handle)
 
        netdev->max_mtu = HNS3_MAX_MTU(ae_dev->dev_specs.max_frm_size);
 
-       if (test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps))
-               set_bit(HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, &priv->state);
-
-       if (hnae3_ae_dev_rxd_adv_layout_supported(ae_dev))
-               set_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state);
-
-       set_bit(HNS3_NIC_STATE_INITED, &priv->state);
-
-       if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
-               set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags);
+       hns3_state_init(handle);
 
        ret = register_netdev(netdev);
        if (ret) {
@@ -5353,6 +5443,8 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
        if (ret)
                goto err_uninit_vector;
 
+       hns3_cq_period_mode_init(priv, priv->tx_cqe_mode, priv->rx_cqe_mode);
+
        /* the device can work without cpu rmap, only aRFS needs it */
        ret = hns3_set_rx_cpu_rmap(netdev);
        if (ret)
index 15af3d9..6162d9f 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/dim.h>
 #include <linux/if_vlan.h>
+#include <net/page_pool.h>
 
 #include "hnae3.h"
 
@@ -201,6 +202,12 @@ enum hns3_nic_state {
 
 #define HNS3_RING_EN_B                         0
 
+#define HNS3_GL0_CQ_MODE_REG                   0x20d00
+#define HNS3_GL1_CQ_MODE_REG                   0x20d04
+#define HNS3_GL2_CQ_MODE_REG                   0x20d08
+#define HNS3_CQ_MODE_EQE                       1U
+#define HNS3_CQ_MODE_CQE                       0U
+
 enum hns3_pkt_l2t_type {
        HNS3_L2_TYPE_UNICAST,
        HNS3_L2_TYPE_MULTICAST,
@@ -307,6 +314,7 @@ enum hns3_desc_type {
        DESC_TYPE_BOUNCE_ALL            = 1 << 3,
        DESC_TYPE_BOUNCE_HEAD           = 1 << 4,
        DESC_TYPE_SGL_SKB               = 1 << 5,
+       DESC_TYPE_PP_FRAG               = 1 << 6,
 };
 
 struct hns3_desc_cb {
@@ -340,7 +348,7 @@ enum hns3_pkt_l3type {
        HNS3_L3_TYPE_LLDP,
        HNS3_L3_TYPE_BPDU,
        HNS3_L3_TYPE_MAC_PAUSE,
-       HNS3_L3_TYPE_PFC_PAUSE,/* 0x9*/
+       HNS3_L3_TYPE_PFC_PAUSE, /* 0x9 */
 
        /* reserved for 0xA~0xB */
 
@@ -384,11 +392,11 @@ enum hns3_pkt_ol4type {
 };
 
 struct hns3_rx_ptype {
-       u32 ptype:8;
-       u32 csum_level:2;
-       u32 ip_summed:2;
-       u32 l3_type:4;
-       u32 valid:1;
+       u32 ptype : 8;
+       u32 csum_level : 2;
+       u32 ip_summed : 2;
+       u32 l3_type : 4;
+       u32 valid : 1;
 };
 
 struct ring_stats {
@@ -451,6 +459,7 @@ struct hns3_enet_ring {
        struct hnae3_queue *tqp;
        int queue_index;
        struct device *dev; /* will be used for DMA mapping of descriptors */
+       struct page_pool *page_pool;
 
        /* statistic */
        struct ring_stats stats;
@@ -513,9 +522,9 @@ struct hns3_enet_coalesce {
        u16 int_gl;
        u16 int_ql;
        u16 int_ql_max;
-       u8 adapt_enable:1;
-       u8 ql_enable:1;
-       u8 unit_1us:1;
+       u8 adapt_enable : 1;
+       u8 ql_enable : 1;
+       u8 unit_1us : 1;
        enum hns3_flow_level_range flow_level;
 };
 
@@ -569,6 +578,8 @@ struct hns3_nic_priv {
 
        unsigned long state;
 
+       enum dim_cq_period_mode tx_cqe_mode;
+       enum dim_cq_period_mode rx_cqe_mode;
        struct hns3_enet_coalesce tx_coal;
        struct hns3_enet_coalesce rx_coal;
        u32 tx_copybreak;
@@ -593,6 +604,11 @@ struct hns3_hw_error_info {
        const char *msg;
 };
 
+struct hns3_reset_type_map {
+       enum ethtool_reset_flags rst_flags;
+       enum hnae3_reset_type rst_type;
+};
+
 static inline int ring_space(struct hns3_enet_ring *ring)
 {
        /* This smp_load_acquire() pairs with smp_store_release() in
@@ -702,4 +718,7 @@ void hns3_dbg_register_debugfs(const char *debugfs_dir_name);
 void hns3_dbg_unregister_debugfs(void);
 void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size);
 u16 hns3_get_max_available_channels(struct hnae3_handle *h);
+void hns3_cq_period_mode_init(struct hns3_nic_priv *priv,
+                             enum dim_cq_period_mode tx_mode,
+                             enum dim_cq_period_mode rx_mode);
 #endif
index 82061ab..7ea511d 100644 (file)
@@ -7,21 +7,7 @@
 #include <linux/sfp.h>
 
 #include "hns3_enet.h"
-
-struct hns3_stats {
-       char stats_string[ETH_GSTRING_LEN];
-       int stats_offset;
-};
-
-struct hns3_sfp_type {
-       u8 type;
-       u8 ext_type;
-};
-
-struct hns3_pflag_desc {
-       char name[ETH_GSTRING_LEN];
-       void (*handler)(struct net_device *netdev, bool enable);
-};
+#include "hns3_ethtool.h"
 
 /* tqp related stats */
 #define HNS3_TQP_STAT(_string, _member)        {                       \
@@ -312,33 +298,8 @@ out:
        return ret_val;
 }
 
-/**
- * hns3_self_test - self test
- * @ndev: net device
- * @eth_test: test cmd
- * @data: test result
- */
-static void hns3_self_test(struct net_device *ndev,
-                          struct ethtool_test *eth_test, u64 *data)
+static void hns3_set_selftest_param(struct hnae3_handle *h, int (*st_param)[2])
 {
-       struct hns3_nic_priv *priv = netdev_priv(ndev);
-       struct hnae3_handle *h = priv->ae_handle;
-       int st_param[HNS3_SELF_TEST_TYPE_NUM][2];
-       bool if_running = netif_running(ndev);
-       int test_index = 0;
-       u32 i;
-
-       if (hns3_nic_resetting(ndev)) {
-               netdev_err(ndev, "dev resetting!");
-               return;
-       }
-
-       /* Only do offline selftest, or pass by default */
-       if (eth_test->flags != ETH_TEST_FL_OFFLINE)
-               return;
-
-       netif_dbg(h, drv, ndev, "self test start");
-
        st_param[HNAE3_LOOP_APP][0] = HNAE3_LOOP_APP;
        st_param[HNAE3_LOOP_APP][1] =
                        h->flags & HNAE3_SUPPORT_APP_LOOPBACK;
@@ -355,6 +316,18 @@ static void hns3_self_test(struct net_device *ndev,
        st_param[HNAE3_LOOP_PHY][0] = HNAE3_LOOP_PHY;
        st_param[HNAE3_LOOP_PHY][1] =
                        h->flags & HNAE3_SUPPORT_PHY_LOOPBACK;
+}
+
+static void hns3_selftest_prepare(struct net_device *ndev,
+                                 bool if_running, int (*st_param)[2])
+{
+       struct hns3_nic_priv *priv = netdev_priv(ndev);
+       struct hnae3_handle *h = priv->ae_handle;
+
+       if (netif_msg_ifdown(h))
+               netdev_info(ndev, "self test start\n");
+
+       hns3_set_selftest_param(h, st_param);
 
        if (if_running)
                ndev->netdev_ops->ndo_stop(ndev);
@@ -373,6 +346,35 @@ static void hns3_self_test(struct net_device *ndev,
                h->ae_algo->ops->halt_autoneg(h, true);
 
        set_bit(HNS3_NIC_STATE_TESTING, &priv->state);
+}
+
+static void hns3_selftest_restore(struct net_device *ndev, bool if_running)
+{
+       struct hns3_nic_priv *priv = netdev_priv(ndev);
+       struct hnae3_handle *h = priv->ae_handle;
+
+       clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
+
+       if (h->ae_algo->ops->halt_autoneg)
+               h->ae_algo->ops->halt_autoneg(h, false);
+
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+       if (h->ae_algo->ops->enable_vlan_filter)
+               h->ae_algo->ops->enable_vlan_filter(h, true);
+#endif
+
+       if (if_running)
+               ndev->netdev_ops->ndo_open(ndev);
+
+       if (netif_msg_ifdown(h))
+               netdev_info(ndev, "self test end\n");
+}
+
+static void hns3_do_selftest(struct net_device *ndev, int (*st_param)[2],
+                            struct ethtool_test *eth_test, u64 *data)
+{
+       int test_index = 0;
+       u32 i;
 
        for (i = 0; i < HNS3_SELF_TEST_TYPE_NUM; i++) {
                enum hnae3_loop loop_type = (enum hnae3_loop)st_param[i][0];
@@ -391,21 +393,32 @@ static void hns3_self_test(struct net_device *ndev,
 
                test_index++;
        }
+}
 
-       clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
-
-       if (h->ae_algo->ops->halt_autoneg)
-               h->ae_algo->ops->halt_autoneg(h, false);
+/**
+ * hns3_nic_self_test - self test
+ * @ndev: net device
+ * @eth_test: test cmd
+ * @data: test result
+ */
+static void hns3_self_test(struct net_device *ndev,
+                          struct ethtool_test *eth_test, u64 *data)
+{
+       int st_param[HNS3_SELF_TEST_TYPE_NUM][2];
+       bool if_running = netif_running(ndev);
 
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
-       if (h->ae_algo->ops->enable_vlan_filter)
-               h->ae_algo->ops->enable_vlan_filter(h, true);
-#endif
+       if (hns3_nic_resetting(ndev)) {
+               netdev_err(ndev, "dev resetting!");
+               return;
+       }
 
-       if (if_running)
-               ndev->netdev_ops->ndo_open(ndev);
+       /* Only do offline selftest, or pass by default */
+       if (eth_test->flags != ETH_TEST_FL_OFFLINE)
+               return;
 
-       netif_dbg(h, drv, ndev, "self test end\n");
+       hns3_selftest_prepare(ndev, if_running, st_param);
+       hns3_do_selftest(ndev, st_param, eth_test, data);
+       hns3_selftest_restore(ndev, if_running);
 }
 
 static void hns3_update_limit_promisc_mode(struct net_device *netdev,
@@ -953,6 +966,60 @@ static int hns3_get_rxnfc(struct net_device *netdev,
        }
 }
 
+static const struct hns3_reset_type_map hns3_reset_type[] = {
+       {ETH_RESET_MGMT, HNAE3_IMP_RESET},
+       {ETH_RESET_ALL, HNAE3_GLOBAL_RESET},
+       {ETH_RESET_DEDICATED, HNAE3_FUNC_RESET},
+};
+
+static const struct hns3_reset_type_map hns3vf_reset_type[] = {
+       {ETH_RESET_DEDICATED, HNAE3_VF_FUNC_RESET},
+};
+
+static int hns3_set_reset(struct net_device *netdev, u32 *flags)
+{
+       enum hnae3_reset_type rst_type = HNAE3_NONE_RESET;
+       struct hnae3_handle *h = hns3_get_handle(netdev);
+       struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
+       const struct hnae3_ae_ops *ops = h->ae_algo->ops;
+       const struct hns3_reset_type_map *rst_type_map;
+       u32 i, size;
+
+       if (ops->ae_dev_resetting && ops->ae_dev_resetting(h))
+               return -EBUSY;
+
+       if (!ops->set_default_reset_request || !ops->reset_event)
+               return -EOPNOTSUPP;
+
+       if (h->flags & HNAE3_SUPPORT_VF) {
+               rst_type_map = hns3vf_reset_type;
+               size = ARRAY_SIZE(hns3vf_reset_type);
+       } else {
+               rst_type_map = hns3_reset_type;
+               size = ARRAY_SIZE(hns3_reset_type);
+       }
+
+       for (i = 0; i < size; i++) {
+               if (rst_type_map[i].rst_flags == *flags) {
+                       rst_type = rst_type_map[i].rst_type;
+                       break;
+               }
+       }
+
+       if (rst_type == HNAE3_NONE_RESET ||
+           (rst_type == HNAE3_IMP_RESET &&
+            ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2))
+               return -EOPNOTSUPP;
+
+       netdev_info(netdev, "Setting reset type %d\n", rst_type);
+
+       ops->set_default_reset_request(ae_dev, rst_type);
+
+       ops->reset_event(h->pdev, h);
+
+       return 0;
+}
+
 static void hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv,
                                        u32 tx_desc_num, u32 rx_desc_num)
 {
@@ -1139,7 +1206,9 @@ static void hns3_get_channels(struct net_device *netdev,
 }
 
 static int hns3_get_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *cmd)
+                            struct ethtool_coalesce *cmd,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        struct hns3_enet_coalesce *tx_coal = &priv->tx_coal;
@@ -1161,6 +1230,11 @@ static int hns3_get_coalesce(struct net_device *netdev,
        cmd->tx_max_coalesced_frames = tx_coal->int_ql;
        cmd->rx_max_coalesced_frames = rx_coal->int_ql;
 
+       kernel_coal->use_cqe_mode_tx = (priv->tx_cqe_mode ==
+                                       DIM_CQ_PERIOD_MODE_START_FROM_CQE);
+       kernel_coal->use_cqe_mode_rx = (priv->rx_cqe_mode ==
+                                       DIM_CQ_PERIOD_MODE_START_FROM_CQE);
+
        return 0;
 }
 
@@ -1321,13 +1395,17 @@ static void hns3_set_coalesce_per_queue(struct net_device *netdev,
 }
 
 static int hns3_set_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *cmd)
+                            struct ethtool_coalesce *cmd,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct hnae3_handle *h = hns3_get_handle(netdev);
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        struct hns3_enet_coalesce *tx_coal = &priv->tx_coal;
        struct hns3_enet_coalesce *rx_coal = &priv->rx_coal;
        u16 queue_num = h->kinfo.num_tqps;
+       enum dim_cq_period_mode tx_mode;
+       enum dim_cq_period_mode rx_mode;
        int ret;
        int i;
 
@@ -1353,6 +1431,14 @@ static int hns3_set_coalesce(struct net_device *netdev,
        for (i = 0; i < queue_num; i++)
                hns3_set_coalesce_per_queue(netdev, cmd, i);
 
+       tx_mode = kernel_coal->use_cqe_mode_tx ?
+                 DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+                 DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+       rx_mode = kernel_coal->use_cqe_mode_rx ?
+                 DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+                 DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+       hns3_cq_period_mode_init(priv, tx_mode, rx_mode);
+
        return 0;
 }
 
@@ -1658,7 +1744,8 @@ static int hns3_set_tunable(struct net_device *netdev,
                                 ETHTOOL_COALESCE_USE_ADAPTIVE |        \
                                 ETHTOOL_COALESCE_RX_USECS_HIGH |       \
                                 ETHTOOL_COALESCE_TX_USECS_HIGH |       \
-                                ETHTOOL_COALESCE_MAX_FRAMES)
+                                ETHTOOL_COALESCE_MAX_FRAMES |          \
+                                ETHTOOL_COALESCE_USE_CQE)
 
 static int hns3_get_ts_info(struct net_device *netdev,
                            struct ethtool_ts_info *info)
@@ -1671,6 +1758,71 @@ static int hns3_get_ts_info(struct net_device *netdev,
        return ethtool_op_get_ts_info(netdev, info);
 }
 
+static const struct hns3_ethtool_link_ext_state_mapping
+hns3_link_ext_state_map[] = {
+       {1, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+               ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD},
+       {2, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+               ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED},
+
+       {256, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+               ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT},
+       {257, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+               ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY},
+       {512, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+               ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT},
+
+       {513, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+               ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK},
+       {514, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+               ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED},
+       {515, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+               ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED},
+
+       {768, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+               ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS},
+       {769, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+               ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST},
+       {770, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+               ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS},
+
+       {1024, ETHTOOL_LINK_EXT_STATE_NO_CABLE, 0},
+       {1025, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+               ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+
+       {1026, ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, 0},
+};
+
+static int hns3_get_link_ext_state(struct net_device *netdev,
+                                  struct ethtool_link_ext_state_info *info)
+{
+       const struct hns3_ethtool_link_ext_state_mapping *map;
+       struct hnae3_handle *h = hns3_get_handle(netdev);
+       u32 status_code, i;
+       int ret;
+
+       if (netif_carrier_ok(netdev))
+               return -ENODATA;
+
+       if (!h->ae_algo->ops->get_link_diagnosis_info)
+               return -EOPNOTSUPP;
+
+       ret = h->ae_algo->ops->get_link_diagnosis_info(h, &status_code);
+       if (ret)
+               return ret;
+
+       for (i = 0; i < ARRAY_SIZE(hns3_link_ext_state_map); i++) {
+               map = &hns3_link_ext_state_map[i];
+               if (map->status_code == status_code) {
+                       info->link_ext_state = map->link_ext_state;
+                       info->__link_ext_substate = map->link_ext_substate;
+                       return 0;
+               }
+       }
+
+       return -ENODATA;
+}
+
 static const struct ethtool_ops hns3vf_ethtool_ops = {
        .supported_coalesce_params = HNS3_ETHTOOL_COALESCE,
        .get_drvinfo = hns3_get_drvinfo,
@@ -1699,6 +1851,7 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
        .set_priv_flags = hns3_set_priv_flags,
        .get_tunable = hns3_get_tunable,
        .set_tunable = hns3_set_tunable,
+       .reset = hns3_set_reset,
 };
 
 static const struct ethtool_ops hns3_ethtool_ops = {
@@ -1740,6 +1893,8 @@ static const struct ethtool_ops hns3_ethtool_ops = {
        .get_ts_info = hns3_get_ts_info,
        .get_tunable = hns3_get_tunable,
        .set_tunable = hns3_set_tunable,
+       .reset = hns3_set_reset,
+       .get_link_ext_state = hns3_get_link_ext_state,
 };
 
 void hns3_ethtool_set_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.h b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.h
new file mode 100644 (file)
index 0000000..822d6fc
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+// Copyright (c) 2021 Hisilicon Limited.
+
+#ifndef __HNS3_ETHTOOL_H
+#define __HNS3_ETHTOOL_H
+
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+
+struct hns3_stats {
+       char stats_string[ETH_GSTRING_LEN];
+       int stats_offset;
+};
+
+struct hns3_sfp_type {
+       u8 type;
+       u8 ext_type;
+};
+
+struct hns3_pflag_desc {
+       char name[ETH_GSTRING_LEN];
+       void (*handler)(struct net_device *netdev, bool enable);
+};
+
+struct hns3_ethtool_link_ext_state_mapping {
+       u32 status_code;
+       enum ethtool_link_ext_state link_ext_state;
+       u8 link_ext_substate;
+};
+
+#endif
index a685392..d1bf5c4 100644 (file)
@@ -7,6 +7,6 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
 ccflags-y += -I $(srctree)/$(src)
 
 obj-$(CONFIG_HNS3_HCLGE) += hclge.o
-hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o  hclge_debugfs.o hclge_ptp.o
+hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o  hclge_debugfs.o hclge_ptp.o hclge_devlink.o
 
 hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o
index eb748aa..474c6d1 100644 (file)
@@ -169,17 +169,19 @@ static bool hclge_is_special_opcode(u16 opcode)
        /* these commands have several descriptors,
         * and use the first one to save opcode and return value
         */
-       u16 spec_opcode[] = {HCLGE_OPC_STATS_64_BIT,
-                            HCLGE_OPC_STATS_32_BIT,
-                            HCLGE_OPC_STATS_MAC,
-                            HCLGE_OPC_STATS_MAC_ALL,
-                            HCLGE_OPC_QUERY_32_BIT_REG,
-                            HCLGE_OPC_QUERY_64_BIT_REG,
-                            HCLGE_QUERY_CLEAR_MPF_RAS_INT,
-                            HCLGE_QUERY_CLEAR_PF_RAS_INT,
-                            HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
-                            HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
-                            HCLGE_QUERY_ALL_ERR_INFO};
+       static const u16 spec_opcode[] = {
+               HCLGE_OPC_STATS_64_BIT,
+               HCLGE_OPC_STATS_32_BIT,
+               HCLGE_OPC_STATS_MAC,
+               HCLGE_OPC_STATS_MAC_ALL,
+               HCLGE_OPC_QUERY_32_BIT_REG,
+               HCLGE_OPC_QUERY_64_BIT_REG,
+               HCLGE_QUERY_CLEAR_MPF_RAS_INT,
+               HCLGE_QUERY_CLEAR_PF_RAS_INT,
+               HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
+               HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
+               HCLGE_QUERY_ALL_ERR_INFO
+       };
        int i;
 
        for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
@@ -360,41 +362,34 @@ static void hclge_set_default_capability(struct hclge_dev *hdev)
        }
 }
 
+const struct hclge_caps_bit_map hclge_cmd_caps_bit_map0[] = {
+       {HCLGE_CAP_UDP_GSO_B, HNAE3_DEV_SUPPORT_UDP_GSO_B},
+       {HCLGE_CAP_PTP_B, HNAE3_DEV_SUPPORT_PTP_B},
+       {HCLGE_CAP_INT_QL_B, HNAE3_DEV_SUPPORT_INT_QL_B},
+       {HCLGE_CAP_TQP_TXRX_INDEP_B, HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B},
+       {HCLGE_CAP_HW_TX_CSUM_B, HNAE3_DEV_SUPPORT_HW_TX_CSUM_B},
+       {HCLGE_CAP_UDP_TUNNEL_CSUM_B, HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B},
+       {HCLGE_CAP_FD_FORWARD_TC_B, HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B},
+       {HCLGE_CAP_FEC_B, HNAE3_DEV_SUPPORT_FEC_B},
+       {HCLGE_CAP_PAUSE_B, HNAE3_DEV_SUPPORT_PAUSE_B},
+       {HCLGE_CAP_PHY_IMP_B, HNAE3_DEV_SUPPORT_PHY_IMP_B},
+       {HCLGE_CAP_RAS_IMP_B, HNAE3_DEV_SUPPORT_RAS_IMP_B},
+       {HCLGE_CAP_RXD_ADV_LAYOUT_B, HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B},
+       {HCLGE_CAP_PORT_VLAN_BYPASS_B, HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B},
+       {HCLGE_CAP_PORT_VLAN_BYPASS_B, HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B},
+};
+
 static void hclge_parse_capability(struct hclge_dev *hdev,
                                   struct hclge_query_version_cmd *cmd)
 {
        struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
-       u32 caps;
+       u32 caps, i;
 
        caps = __le32_to_cpu(cmd->caps[0]);
-       if (hnae3_get_bit(caps, HCLGE_CAP_UDP_GSO_B))
-               set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_PTP_B))
-               set_bit(HNAE3_DEV_SUPPORT_PTP_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_INT_QL_B))
-               set_bit(HNAE3_DEV_SUPPORT_INT_QL_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_TQP_TXRX_INDEP_B))
-               set_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_HW_TX_CSUM_B))
-               set_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_UDP_TUNNEL_CSUM_B))
-               set_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_FD_FORWARD_TC_B))
-               set_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_FEC_B))
-               set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_PAUSE_B))
-               set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_PHY_IMP_B))
-               set_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_RAS_IMP_B))
-               set_bit(HNAE3_DEV_SUPPORT_RAS_IMP_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_RXD_ADV_LAYOUT_B))
-               set_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGE_CAP_PORT_VLAN_BYPASS_B)) {
-               set_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps);
-               set_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps);
-       }
+       for (i = 0; i < ARRAY_SIZE(hclge_cmd_caps_bit_map0); i++)
+               if (hnae3_get_bit(caps, hclge_cmd_caps_bit_map0[i].imp_bit))
+                       set_bit(hclge_cmd_caps_bit_map0[i].local_bit,
+                               ae_dev->caps);
 }
 
 static __le32 hclge_build_api_caps(void)
index ac70d49..3324447 100644 (file)
@@ -320,6 +320,9 @@ enum hclge_opcode_type {
        /* PHY command */
        HCLGE_OPC_PHY_LINK_KSETTING     = 0x7025,
        HCLGE_OPC_PHY_REG               = 0x7026,
+
+       /* Query link diagnosis info command */
+       HCLGE_OPC_QUERY_LINK_DIAGNOSIS  = 0x702A,
 };
 
 #define HCLGE_TQP_REG_OFFSET           0x80000
@@ -450,7 +453,7 @@ struct hclge_tc_thrd {
 };
 
 struct hclge_priv_buf {
-       struct hclge_waterline wl;      /* Waterline for low and high*/
+       struct hclge_waterline wl;      /* Waterline for low and high */
        u32 buf_size;   /* TC private buffer size */
        u32 tx_buf_size;
        u32 enable;     /* Enable TC private buffer or not */
@@ -1014,16 +1017,6 @@ struct hclge_common_lb_cmd {
 
 #define HCLGE_TYPE_CRQ                 0
 #define HCLGE_TYPE_CSQ                 1
-#define HCLGE_NIC_CSQ_BASEADDR_L_REG   0x27000
-#define HCLGE_NIC_CSQ_BASEADDR_H_REG   0x27004
-#define HCLGE_NIC_CSQ_DEPTH_REG                0x27008
-#define HCLGE_NIC_CSQ_TAIL_REG         0x27010
-#define HCLGE_NIC_CSQ_HEAD_REG         0x27014
-#define HCLGE_NIC_CRQ_BASEADDR_L_REG   0x27018
-#define HCLGE_NIC_CRQ_BASEADDR_H_REG   0x2701c
-#define HCLGE_NIC_CRQ_DEPTH_REG                0x27020
-#define HCLGE_NIC_CRQ_TAIL_REG         0x27024
-#define HCLGE_NIC_CRQ_HEAD_REG         0x27028
 
 /* this bit indicates that the driver is ready for hardware reset */
 #define HCLGE_NIC_SW_RST_RDY_B         16
@@ -1198,6 +1191,19 @@ struct hclge_dev_specs_1_cmd {
        u8 rsv1[18];
 };
 
+/* mac speed type defined in firmware command */
+enum HCLGE_FIRMWARE_MAC_SPEED {
+       HCLGE_FW_MAC_SPEED_1G,
+       HCLGE_FW_MAC_SPEED_10G,
+       HCLGE_FW_MAC_SPEED_25G,
+       HCLGE_FW_MAC_SPEED_40G,
+       HCLGE_FW_MAC_SPEED_50G,
+       HCLGE_FW_MAC_SPEED_100G,
+       HCLGE_FW_MAC_SPEED_10M,
+       HCLGE_FW_MAC_SPEED_100M,
+       HCLGE_FW_MAC_SPEED_200G,
+};
+
 #define HCLGE_PHY_LINK_SETTING_BD_NUM          2
 
 struct hclge_phy_link_ksetting_0_cmd {
@@ -1228,6 +1234,12 @@ struct hclge_phy_reg_cmd {
        u8 rsv1[18];
 };
 
+/* capabilities bits map between imp firmware and local driver */
+struct hclge_caps_bit_map {
+       u16 imp_bit;
+       u16 local_bit;
+};
+
 int hclge_cmd_init(struct hclge_dev *hdev);
 static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
 {
index 39f56f2..4a619e5 100644 (file)
@@ -104,26 +104,30 @@ static int hclge_dcb_common_validate(struct hclge_dev *hdev, u8 num_tc,
        return 0;
 }
 
-static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
-                             u8 *tc, bool *changed)
+static u8 hclge_ets_tc_changed(struct hclge_dev *hdev, struct ieee_ets *ets,
+                              bool *changed)
 {
-       bool has_ets_tc = false;
-       u32 total_ets_bw = 0;
-       u8 max_tc = 0;
-       int ret;
+       u8 max_tc_id = 0;
        u8 i;
 
        for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
                if (ets->prio_tc[i] != hdev->tm_info.prio_tc[i])
                        *changed = true;
 
-               if (ets->prio_tc[i] > max_tc)
-                       max_tc = ets->prio_tc[i];
+               if (ets->prio_tc[i] > max_tc_id)
+                       max_tc_id = ets->prio_tc[i];
        }
 
-       ret = hclge_dcb_common_validate(hdev, max_tc + 1, ets->prio_tc);
-       if (ret)
-               return ret;
+       /* return max tc number, max tc id need to plus 1 */
+       return max_tc_id + 1;
+}
+
+static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev,
+                                      struct ieee_ets *ets, bool *changed)
+{
+       bool has_ets_tc = false;
+       u32 total_ets_bw = 0;
+       u8 i;
 
        for (i = 0; i < hdev->tc_max; i++) {
                switch (ets->tc_tsa[i]) {
@@ -148,7 +152,26 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
        if (has_ets_tc && total_ets_bw != BW_PERCENT)
                return -EINVAL;
 
-       *tc = max_tc + 1;
+       return 0;
+}
+
+static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
+                             u8 *tc, bool *changed)
+{
+       u8 tc_num;
+       int ret;
+
+       tc_num = hclge_ets_tc_changed(hdev, ets, changed);
+
+       ret = hclge_dcb_common_validate(hdev, tc_num, ets->prio_tc);
+       if (ret)
+               return ret;
+
+       ret = hclge_ets_sch_mode_validate(hdev, ets, changed);
+       if (ret)
+               return ret;
+
+       *tc = tc_num;
        if (*tc != hdev->tm_info.num_tc)
                *changed = true;
 
@@ -234,9 +257,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
                if (ret)
                        goto err_out;
 
-               ret = hclge_notify_init_up(hdev);
-               if (ret)
-                       return ret;
+               return hclge_notify_init_up(hdev);
        }
 
        return hclge_tm_dwrr_cfg(hdev);
index 2887881..68ed171 100644 (file)
@@ -926,26 +926,45 @@ static int hclge_dbg_dump_tm_nodes(struct hclge_dev *hdev, char *buf, int len)
        return 0;
 }
 
+static const struct hclge_dbg_item tm_pri_items[] = {
+       { "ID", 4 },
+       { "MODE", 2 },
+       { "DWRR", 2 },
+       { "C_IR_B", 2 },
+       { "C_IR_U", 2 },
+       { "C_IR_S", 2 },
+       { "C_BS_B", 2 },
+       { "C_BS_S", 2 },
+       { "C_FLAG", 2 },
+       { "C_RATE(Mbps)", 2 },
+       { "P_IR_B", 2 },
+       { "P_IR_U", 2 },
+       { "P_IR_S", 2 },
+       { "P_BS_B", 2 },
+       { "P_BS_S", 2 },
+       { "P_FLAG", 2 },
+       { "P_RATE(Mbps)", 0 }
+};
+
 static int hclge_dbg_dump_tm_pri(struct hclge_dev *hdev, char *buf, int len)
 {
-       struct hclge_tm_shaper_para c_shaper_para;
-       struct hclge_tm_shaper_para p_shaper_para;
-       u8 pri_num, sch_mode, weight;
-       char *sch_mode_str;
-       int pos = 0;
-       int ret;
-       u8 i;
+       char data_str[ARRAY_SIZE(tm_pri_items)][HCLGE_DBG_DATA_STR_LEN];
+       struct hclge_tm_shaper_para c_shaper_para, p_shaper_para;
+       char *result[ARRAY_SIZE(tm_pri_items)], *sch_mode_str;
+       char content[HCLGE_DBG_TM_INFO_LEN];
+       u8 pri_num, sch_mode, weight, i, j;
+       int pos, ret;
 
        ret = hclge_tm_get_pri_num(hdev, &pri_num);
        if (ret)
                return ret;
 
-       pos += scnprintf(buf + pos, len - pos,
-                        "ID    MODE  DWRR  C_IR_B  C_IR_U  C_IR_S  C_BS_B  ");
-       pos += scnprintf(buf + pos, len - pos,
-                        "C_BS_S  C_FLAG  C_RATE(Mbps)  P_IR_B  P_IR_U  ");
-       pos += scnprintf(buf + pos, len - pos,
-                        "P_IR_S  P_BS_B  P_BS_S  P_FLAG  P_RATE(Mbps)\n");
+       for (i = 0; i < ARRAY_SIZE(tm_pri_items); i++)
+               result[i] = &data_str[i][0];
+
+       hclge_dbg_fill_content(content, sizeof(content), tm_pri_items,
+                              NULL, ARRAY_SIZE(tm_pri_items));
+       pos = scnprintf(buf, len, "%s", content);
 
        for (i = 0; i < pri_num; i++) {
                ret = hclge_tm_get_pri_sch_mode(hdev, i, &sch_mode);
@@ -971,21 +990,16 @@ static int hclge_dbg_dump_tm_pri(struct hclge_dev *hdev, char *buf, int len)
                sch_mode_str = sch_mode & HCLGE_TM_TX_SCHD_DWRR_MSK ? "dwrr" :
                               "sp";
 
-               pos += scnprintf(buf + pos, len - pos,
-                                "%04u  %4s  %3u   %3u     %3u     %3u     ",
-                                i, sch_mode_str, weight, c_shaper_para.ir_b,
-                                c_shaper_para.ir_u, c_shaper_para.ir_s);
-               pos += scnprintf(buf + pos, len - pos,
-                                "%3u     %3u       %1u     %6u        ",
-                                c_shaper_para.bs_b, c_shaper_para.bs_s,
-                                c_shaper_para.flag, c_shaper_para.rate);
-               pos += scnprintf(buf + pos, len - pos,
-                                "%3u     %3u     %3u     %3u     %3u       ",
-                                p_shaper_para.ir_b, p_shaper_para.ir_u,
-                                p_shaper_para.ir_s, p_shaper_para.bs_b,
-                                p_shaper_para.bs_s);
-               pos += scnprintf(buf + pos, len - pos, "%1u     %6u\n",
-                                p_shaper_para.flag, p_shaper_para.rate);
+               j = 0;
+               sprintf(result[j++], "%04u", i);
+               sprintf(result[j++], "%4s", sch_mode_str);
+               sprintf(result[j++], "%3u", weight);
+               hclge_dbg_fill_shaper_content(&c_shaper_para, result, &j);
+               hclge_dbg_fill_shaper_content(&p_shaper_para, result, &j);
+               hclge_dbg_fill_content(content, sizeof(content), tm_pri_items,
+                                      (const char **)result,
+                                      ARRAY_SIZE(tm_pri_items));
+               pos += scnprintf(buf + pos, len - pos, "%s", content);
        }
 
        return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
new file mode 100644 (file)
index 0000000..e4aad69
--- /dev/null
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (c) 2021 Hisilicon Limited. */
+
+#include <net/devlink.h>
+
+#include "hclge_devlink.h"
+
+static int hclge_devlink_info_get(struct devlink *devlink,
+                                 struct devlink_info_req *req,
+                                 struct netlink_ext_ack *extack)
+{
+#define        HCLGE_DEVLINK_FW_STRING_LEN     32
+       struct hclge_devlink_priv *priv = devlink_priv(devlink);
+       char version_str[HCLGE_DEVLINK_FW_STRING_LEN];
+       struct hclge_dev *hdev = priv->hdev;
+       int ret;
+
+       ret = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+       if (ret)
+               return ret;
+
+       snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu",
+                hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK,
+                                HNAE3_FW_VERSION_BYTE3_SHIFT),
+                hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE2_MASK,
+                                HNAE3_FW_VERSION_BYTE2_SHIFT),
+                hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE1_MASK,
+                                HNAE3_FW_VERSION_BYTE1_SHIFT),
+                hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK,
+                                HNAE3_FW_VERSION_BYTE0_SHIFT));
+
+       return devlink_info_version_running_put(req,
+                                               DEVLINK_INFO_VERSION_GENERIC_FW,
+                                               version_str);
+}
+
+static int hclge_devlink_reload_down(struct devlink *devlink, bool netns_change,
+                                    enum devlink_reload_action action,
+                                    enum devlink_reload_limit limit,
+                                    struct netlink_ext_ack *extack)
+{
+       struct hclge_devlink_priv *priv = devlink_priv(devlink);
+       struct hclge_dev *hdev = priv->hdev;
+       struct hnae3_handle *h = &hdev->vport->nic;
+       struct pci_dev *pdev = hdev->pdev;
+       int ret;
+
+       if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) {
+               dev_err(&pdev->dev, "reset is handling\n");
+               return -EBUSY;
+       }
+
+       switch (action) {
+       case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+               rtnl_lock();
+               ret = hdev->nic_client->ops->reset_notify(h, HNAE3_DOWN_CLIENT);
+               if (ret) {
+                       rtnl_unlock();
+                       return ret;
+               }
+
+               ret = hdev->nic_client->ops->reset_notify(h,
+                                                         HNAE3_UNINIT_CLIENT);
+               rtnl_unlock();
+               return ret;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int hclge_devlink_reload_up(struct devlink *devlink,
+                                  enum devlink_reload_action action,
+                                  enum devlink_reload_limit limit,
+                                  u32 *actions_performed,
+                                  struct netlink_ext_ack *extack)
+{
+       struct hclge_devlink_priv *priv = devlink_priv(devlink);
+       struct hclge_dev *hdev = priv->hdev;
+       struct hnae3_handle *h = &hdev->vport->nic;
+       int ret;
+
+       *actions_performed = BIT(action);
+       switch (action) {
+       case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+               rtnl_lock();
+               ret = hdev->nic_client->ops->reset_notify(h, HNAE3_INIT_CLIENT);
+               if (ret) {
+                       rtnl_unlock();
+                       return ret;
+               }
+
+               ret = hdev->nic_client->ops->reset_notify(h, HNAE3_UP_CLIENT);
+               rtnl_unlock();
+               return ret;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static const struct devlink_ops hclge_devlink_ops = {
+       .info_get = hclge_devlink_info_get,
+       .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
+       .reload_down = hclge_devlink_reload_down,
+       .reload_up = hclge_devlink_reload_up,
+};
+
+int hclge_devlink_init(struct hclge_dev *hdev)
+{
+       struct pci_dev *pdev = hdev->pdev;
+       struct hclge_devlink_priv *priv;
+       struct devlink *devlink;
+       int ret;
+
+       devlink = devlink_alloc(&hclge_devlink_ops,
+                               sizeof(struct hclge_devlink_priv), &pdev->dev);
+       if (!devlink)
+               return -ENOMEM;
+
+       priv = devlink_priv(devlink);
+       priv->hdev = hdev;
+       hdev->devlink = devlink;
+
+       ret = devlink_register(devlink);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to register devlink, ret = %d\n",
+                       ret);
+               goto out_reg_fail;
+       }
+
+       devlink_reload_enable(devlink);
+
+       return 0;
+
+out_reg_fail:
+       devlink_free(devlink);
+       return ret;
+}
+
+void hclge_devlink_uninit(struct hclge_dev *hdev)
+{
+       struct devlink *devlink = hdev->devlink;
+
+       devlink_reload_disable(devlink);
+
+       devlink_unregister(devlink);
+
+       devlink_free(devlink);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.h
new file mode 100644 (file)
index 0000000..918be04
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2021 Hisilicon Limited. */
+
+#ifndef __HCLGE_DEVLINK_H
+#define __HCLGE_DEVLINK_H
+
+#include "hclge_main.h"
+
+struct hclge_devlink_priv {
+       struct hclge_dev *hdev;
+};
+
+int hclge_devlink_init(struct hclge_dev *hdev);
+void hclge_devlink_uninit(struct hclge_dev *hdev);
+#endif
index ec9a7f8..718c16d 100644 (file)
 #include "hclge_err.h"
 
 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
-       { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(1),
+               .msg = "imp_itcm0_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "imp_itcm1_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "imp_itcm2_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "imp_itcm3_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(9),
+               .msg = "imp_dtcm0_mem0_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(11),
+               .msg = "imp_dtcm0_mem1_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(13),
+               .msg = "imp_dtcm1_mem0_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(15),
+               .msg = "imp_dtcm1_mem1_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(17),
+               .msg = "imp_itcm4_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
-       { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(1),
+               .msg = "cmdq_nic_rx_depth_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "cmdq_nic_tx_depth_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "cmdq_nic_rx_tail_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "cmdq_nic_tx_tail_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(9),
+               .msg = "cmdq_nic_rx_head_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(11),
+               .msg = "cmdq_nic_tx_head_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(13),
+               .msg = "cmdq_nic_rx_addr_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(15),
+               .msg = "cmdq_nic_tx_addr_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(17),
+               .msg = "cmdq_rocee_rx_depth_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(19),
+               .msg = "cmdq_rocee_tx_depth_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(21),
+               .msg = "cmdq_rocee_rx_tail_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(23),
+               .msg = "cmdq_rocee_tx_tail_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(25),
+               .msg = "cmdq_rocee_rx_head_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(27),
+               .msg = "cmdq_rocee_tx_head_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(29),
+               .msg = "cmdq_rocee_rx_addr_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(31),
+               .msg = "cmdq_rocee_tx_addr_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
-       { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(6),
+               .msg = "tqp_int_cfg_even_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "tqp_int_cfg_odd_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(8),
+               .msg = "tqp_int_ctrl_even_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(9),
+               .msg = "tqp_int_ctrl_odd_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(10),
+               .msg = "tx_que_scan_int_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(11),
+               .msg = "rx_que_scan_int_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
-       { .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(1),
+               .msg = "msix_nic_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "msix_rocee_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_igu_int[] = {
-       { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "igu_rx_buf0_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "igu_rx_buf1_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
-       { .int_msk = BIT(0), .msg = "rx_buf_overflow",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(2), .msg = "rx_stp_fifo_underflow",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(3), .msg = "tx_buf_overflow",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(4), .msg = "tx_buf_underrun",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(5), .msg = "rx_stp_buf_overflow",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "rx_buf_overflow",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(1),
+               .msg = "rx_stp_fifo_overflow",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "rx_stp_fifo_underflow",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "tx_buf_overflow",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(4),
+               .msg = "tx_buf_underrun",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "rx_stp_buf_overflow",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_ncsi_err_int[] = {
-       { .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(1),
+               .msg = "ncsi_tx_ecc_mbit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = {
-       { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_m1bit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(27), .msg = "flow_director_ad_mem0_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(28), .msg = "flow_director_ad_mem1_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(29), .msg = "rx_vlan_tag_memory_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(30), .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "vf_vlan_ad_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(1),
+               .msg = "umv_mcast_group_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "umv_key_mem0_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "umv_key_mem1_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(4),
+               .msg = "umv_key_mem2_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "umv_key_mem3_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(6),
+               .msg = "umv_ad_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "rss_tc_mode_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(8),
+               .msg = "rss_idt_mem0_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(9),
+               .msg = "rss_idt_mem1_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(10),
+               .msg = "rss_idt_mem2_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(11),
+               .msg = "rss_idt_mem3_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(12),
+               .msg = "rss_idt_mem4_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(13),
+               .msg = "rss_idt_mem5_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(14),
+               .msg = "rss_idt_mem6_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(15),
+               .msg = "rss_idt_mem7_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(16),
+               .msg = "rss_idt_mem8_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(17),
+               .msg = "rss_idt_mem9_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(18),
+               .msg = "rss_idt_mem10_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(19),
+               .msg = "rss_idt_mem11_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(20),
+               .msg = "rss_idt_mem12_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(21),
+               .msg = "rss_idt_mem13_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(22),
+               .msg = "rss_idt_mem14_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(23),
+               .msg = "rss_idt_mem15_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(24),
+               .msg = "port_vlan_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(25),
+               .msg = "mcast_linear_table_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(26),
+               .msg = "mcast_result_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(27),
+               .msg = "flow_director_ad_mem0_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(28),
+               .msg = "flow_director_ad_mem1_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(29),
+               .msg = "rx_vlan_tag_memory_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(30),
+               .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = {
-       { .int_msk = BIT(0), .msg = "tx_vlan_tag_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "tx_vlan_tag_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(1),
+               .msg = "rss_list_tc_unassigned_queue_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = {
-       { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "hfs_fifo_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(1),
+               .msg = "rslt_descr_fifo_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "tx_vlan_tag_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "FD_CN0_memory_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(4),
+               .msg = "FD_CN1_memory_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "GRO_AD_memory_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_tm_sch_rint[] = {
-       { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(12), .msg = "tm_sch_port_shap_offset_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(13), .msg = "tm_sch_port_shap_offset_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(14), .msg = "tm_sch_pg_pshap_offset_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(15), .msg = "tm_sch_pg_pshap_offset_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(16), .msg = "tm_sch_pg_cshap_offset_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(17), .msg = "tm_sch_pg_cshap_offset_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(18), .msg = "tm_sch_pri_pshap_offset_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(19), .msg = "tm_sch_pri_pshap_offset_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(20), .msg = "tm_sch_pri_cshap_offset_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(21), .msg = "tm_sch_pri_cshap_offset_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(1),
+               .msg = "tm_sch_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "tm_sch_port_shap_sub_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "tm_sch_port_shap_sub_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(4),
+               .msg = "tm_sch_pg_pshap_sub_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "tm_sch_pg_pshap_sub_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(6),
+               .msg = "tm_sch_pg_cshap_sub_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "tm_sch_pg_cshap_sub_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(8),
+               .msg = "tm_sch_pri_pshap_sub_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(9),
+               .msg = "tm_sch_pri_pshap_sub_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(10),
+               .msg = "tm_sch_pri_cshap_sub_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(11),
+               .msg = "tm_sch_pri_cshap_sub_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(12),
+               .msg = "tm_sch_port_shap_offset_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(13),
+               .msg = "tm_sch_port_shap_offset_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(14),
+               .msg = "tm_sch_pg_pshap_offset_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(15),
+               .msg = "tm_sch_pg_pshap_offset_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(16),
+               .msg = "tm_sch_pg_cshap_offset_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(17),
+               .msg = "tm_sch_pg_cshap_offset_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(18),
+               .msg = "tm_sch_pri_pshap_offset_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(19),
+               .msg = "tm_sch_pri_pshap_offset_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(20),
+               .msg = "tm_sch_pri_cshap_offset_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(21),
+               .msg = "tm_sch_pri_cshap_offset_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(22),
+               .msg = "tm_sch_rq_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(23),
+               .msg = "tm_sch_rq_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(24),
+               .msg = "tm_sch_nq_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(25),
+               .msg = "tm_sch_nq_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(26),
+               .msg = "tm_sch_roce_up_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(27),
+               .msg = "tm_sch_roce_up_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(28),
+               .msg = "tm_sch_rcb_byte_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(29),
+               .msg = "tm_sch_rcb_byte_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(30),
+               .msg = "tm_sch_ssu_byte_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(31),
+               .msg = "tm_sch_ssu_byte_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_qcn_fifo_rint[] = {
-       { .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(9), .msg = "qcn_shap_gp0_offset_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "qcn_shap_gp0_sch_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(1),
+               .msg = "qcn_shap_gp0_sch_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "qcn_shap_gp1_sch_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "qcn_shap_gp1_sch_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(4),
+               .msg = "qcn_shap_gp2_sch_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "qcn_shap_gp2_sch_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(6),
+               .msg = "qcn_shap_gp3_sch_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "qcn_shap_gp3_sch_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(8),
+               .msg = "qcn_shap_gp0_offset_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(9),
+               .msg = "qcn_shap_gp0_offset_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(10),
+               .msg = "qcn_shap_gp1_offset_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(11),
+               .msg = "qcn_shap_gp1_offset_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(12),
+               .msg = "qcn_shap_gp2_offset_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(13),
+               .msg = "qcn_shap_gp2_offset_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(14),
+               .msg = "qcn_shap_gp3_offset_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(15),
+               .msg = "qcn_shap_gp3_offset_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(16),
+               .msg = "qcn_byte_info_fifo_rd_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(17),
+               .msg = "qcn_byte_info_fifo_wr_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_qcn_ecc_rint[] = {
-       { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(1),
+               .msg = "qcn_byte_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "qcn_time_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "qcn_fb_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "qcn_link_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(9),
+               .msg = "qcn_rate_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(11),
+               .msg = "qcn_tmplt_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(13),
+               .msg = "qcn_shap_cfg_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(15),
+               .msg = "qcn_gp0_barrel_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(17),
+               .msg = "qcn_gp1_barrel_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(19),
+               .msg = "qcn_gp2_barrel_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(21),
+               .msg = "qcn_gp3_barral_mem_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = {
-       { .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(8), .msg = "cge_igu_afifo_overflow_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(9), .msg = "lge_igu_afifo_overflow_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(10), .msg = "egu_cge_afifo_underrun_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(11), .msg = "egu_lge_afifo_underrun_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(12), .msg = "egu_ge_afifo_underrun_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(13), .msg = "ge_igu_afifo_overflow_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "egu_cge_afifo_ecc_1bit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(1),
+               .msg = "egu_cge_afifo_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "egu_lge_afifo_ecc_1bit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "egu_lge_afifo_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(4),
+               .msg = "cge_igu_afifo_ecc_1bit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "cge_igu_afifo_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(6),
+               .msg = "lge_igu_afifo_ecc_1bit_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "lge_igu_afifo_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(8),
+               .msg = "cge_igu_afifo_overflow_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(9),
+               .msg = "lge_igu_afifo_overflow_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(10),
+               .msg = "egu_cge_afifo_underrun_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(11),
+               .msg = "egu_lge_afifo_underrun_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(12),
+               .msg = "egu_ge_afifo_underrun_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(13),
+               .msg = "ge_igu_afifo_overflow_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = {
-       { .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(26), .msg = "rd_bus_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(27), .msg = "wr_bus_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(28), .msg = "reg_search_miss",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(29), .msg = "rx_q_search_miss",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(30), .msg = "ooo_ecc_err_detect",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(13),
+               .msg = "rpu_rx_pkt_bit32_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(14),
+               .msg = "rpu_rx_pkt_bit33_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(15),
+               .msg = "rpu_rx_pkt_bit34_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(16),
+               .msg = "rpu_rx_pkt_bit35_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(17),
+               .msg = "rcb_tx_ring_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(18),
+               .msg = "rcb_rx_ring_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(19),
+               .msg = "rcb_tx_fbd_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(20),
+               .msg = "rcb_rx_ebd_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(21),
+               .msg = "rcb_tso_info_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(22),
+               .msg = "rcb_tx_int_info_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(23),
+               .msg = "rcb_rx_int_info_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(24),
+               .msg = "tpu_tx_pkt_0_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(25),
+               .msg = "tpu_tx_pkt_1_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(26),
+               .msg = "rd_bus_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(27),
+               .msg = "wr_bus_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(28),
+               .msg = "reg_search_miss",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(29),
+               .msg = "rx_q_search_miss",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(30),
+               .msg = "ooo_ecc_err_detect",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(31),
+               .msg = "ooo_ecc_err_multpl",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = {
-       { .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(4),
+               .msg = "gro_bd_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "gro_context_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(6),
+               .msg = "rx_stash_cfg_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "axi_rd_fbd_ecc_mbit_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = {
-       { .int_msk = BIT(0), .msg = "over_8bd_no_fe",
-         .reset_level = HNAE3_FUNC_RESET },
-       { .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(3), .msg = "tx_rd_fbd_poison",
-         .reset_level = HNAE3_FUNC_RESET },
-       { .int_msk = BIT(4), .msg = "rx_rd_ebd_poison",
-         .reset_level = HNAE3_FUNC_RESET },
-       { .int_msk = BIT(5), .msg = "buf_wait_timeout",
-         .reset_level = HNAE3_NONE_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "over_8bd_no_fe",
+               .reset_level = HNAE3_FUNC_RESET
+       }, {
+               .int_msk = BIT(1),
+               .msg = "tso_mss_cmp_min_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "tso_mss_cmp_max_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "tx_rd_fbd_poison",
+               .reset_level = HNAE3_FUNC_RESET
+       }, {
+               .int_msk = BIT(4),
+               .msg = "rx_rd_ebd_poison",
+               .reset_level = HNAE3_FUNC_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "buf_wait_timeout",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_ssu_com_err_int[] = {
-       { .int_msk = BIT(0), .msg = "buf_sum_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(1), .msg = "ppp_mb_num_err",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(2), .msg = "ppp_mbid_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(3), .msg = "ppp_rlt_mac_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(4), .msg = "ppp_rlt_host_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(5), .msg = "cks_edit_position_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(6), .msg = "cks_edit_condition_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(7), .msg = "vlan_edit_condition_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(8), .msg = "vlan_num_ot_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(9), .msg = "vlan_num_in_err",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "buf_sum_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(1),
+               .msg = "ppp_mb_num_err",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "ppp_mbid_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "ppp_rlt_mac_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(4),
+               .msg = "ppp_rlt_host_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "cks_edit_position_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(6),
+               .msg = "cks_edit_condition_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "vlan_edit_condition_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(8),
+               .msg = "vlan_num_ot_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(9),
+               .msg = "vlan_num_in_err",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 #define HCLGE_SSU_MEM_ECC_ERR(x) \
-       { .int_msk = BIT(x), .msg = "ssu_mem" #x "_ecc_mbit_err", \
-         .reset_level = HNAE3_GLOBAL_RESET }
+{ \
+       .int_msk = BIT(x), \
+       .msg = "ssu_mem" #x "_ecc_mbit_err", \
+       .reset_level = HNAE3_GLOBAL_RESET \
+}
 
 static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
        HCLGE_SSU_MEM_ECC_ERR(0),
@@ -504,131 +931,269 @@ static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
 };
 
 static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = {
-       { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
-         .reset_level = HNAE3_FUNC_RESET },
-       { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(2), .msg = "igu_pkt_without_key_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(3), .msg = "roc_eof_mis_match_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(5), .msg = "igu_eof_mis_match_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(6), .msg = "roc_sof_mis_match_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(8), .msg = "igu_sof_mis_match_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(11), .msg = "ets_rd_int_rx_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(12), .msg = "ets_wr_int_rx_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(13), .msg = "ets_rd_int_tx_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(14), .msg = "ets_wr_int_tx_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "roc_pkt_without_key_port",
+               .reset_level = HNAE3_FUNC_RESET
+       }, {
+               .int_msk = BIT(1),
+               .msg = "tpu_pkt_without_key_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "igu_pkt_without_key_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "roc_eof_mis_match_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(4),
+               .msg = "tpu_eof_mis_match_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "igu_eof_mis_match_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(6),
+               .msg = "roc_sof_mis_match_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "tpu_sof_mis_match_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(8),
+               .msg = "igu_sof_mis_match_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(11),
+               .msg = "ets_rd_int_rx_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(12),
+               .msg = "ets_wr_int_rx_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(13),
+               .msg = "ets_rd_int_tx_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(14),
+               .msg = "ets_wr_int_tx_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = {
-       { .int_msk = BIT(0), .msg = "ig_mac_inf_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(1), .msg = "ig_host_inf_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(2), .msg = "ig_roc_buf_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(3), .msg = "ig_host_data_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(4), .msg = "ig_host_key_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(5), .msg = "tx_qcn_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(6), .msg = "rx_qcn_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(9), .msg = "qm_eof_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(10), .msg = "mb_rlt_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(15), .msg = "host_cmd_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(16), .msg = "mac_cmd_fifo_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(19), .msg = "dup_bitmap_empty_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "ig_mac_inf_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(1),
+               .msg = "ig_host_inf_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "ig_roc_buf_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "ig_host_data_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(4),
+               .msg = "ig_host_key_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(5),
+               .msg = "tx_qcn_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(6),
+               .msg = "rx_qcn_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(7),
+               .msg = "tx_pf_rd_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(8),
+               .msg = "rx_pf_rd_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(9),
+               .msg = "qm_eof_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(10),
+               .msg = "mb_rlt_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(11),
+               .msg = "dup_uncopy_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(12),
+               .msg = "dup_cnt_rd_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(13),
+               .msg = "dup_cnt_drop_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(14),
+               .msg = "dup_cnt_wrb_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(15),
+               .msg = "host_cmd_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(16),
+               .msg = "mac_cmd_fifo_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(17),
+               .msg = "host_cmd_bitmap_empty_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(18),
+               .msg = "mac_cmd_bitmap_empty_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(19),
+               .msg = "dup_bitmap_empty_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(20),
+               .msg = "out_queue_bitmap_empty_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(21),
+               .msg = "bank2_bitmap_empty_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(22),
+               .msg = "bank1_bitmap_empty_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(23),
+               .msg = "bank0_bitmap_empty_int",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = {
-       { .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "ets_rd_int_rx_tcg",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(1),
+               .msg = "ets_wr_int_rx_tcg",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(2),
+               .msg = "ets_rd_int_tx_tcg",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               .int_msk = BIT(3),
+               .msg = "ets_wr_int_tx_tcg",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = {
-       { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
-         .reset_level = HNAE3_FUNC_RESET },
-       { .int_msk = BIT(9), .msg = "low_water_line_err_port",
-         .reset_level = HNAE3_NONE_RESET },
-       { .int_msk = BIT(10), .msg = "hi_water_line_err_port",
-         .reset_level = HNAE3_GLOBAL_RESET },
-       { /* sentinel */ }
+       {
+               .int_msk = BIT(0),
+               .msg = "roc_pkt_without_key_port",
+               .reset_level = HNAE3_FUNC_RESET
+       }, {
+               .int_msk = BIT(9),
+               .msg = "low_water_line_err_port",
+               .reset_level = HNAE3_NONE_RESET
+       }, {
+               .int_msk = BIT(10),
+               .msg = "hi_water_line_err_port",
+               .reset_level = HNAE3_GLOBAL_RESET
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
-       { .int_msk = 0, .msg = "rocee qmm ovf: sgid invalid err" },
-       { .int_msk = 0x4, .msg = "rocee qmm ovf: sgid ovf err" },
-       { .int_msk = 0x8, .msg = "rocee qmm ovf: smac invalid err" },
-       { .int_msk = 0xC, .msg = "rocee qmm ovf: smac ovf err" },
-       { .int_msk = 0x10, .msg = "rocee qmm ovf: cqc invalid err" },
-       { .int_msk = 0x11, .msg = "rocee qmm ovf: cqc ovf err" },
-       { .int_msk = 0x12, .msg = "rocee qmm ovf: cqc hopnum err" },
-       { .int_msk = 0x13, .msg = "rocee qmm ovf: cqc ba0 err" },
-       { .int_msk = 0x14, .msg = "rocee qmm ovf: srqc invalid err" },
-       { .int_msk = 0x15, .msg = "rocee qmm ovf: srqc ovf err" },
-       { .int_msk = 0x16, .msg = "rocee qmm ovf: srqc hopnum err" },
-       { .int_msk = 0x17, .msg = "rocee qmm ovf: srqc ba0 err" },
-       { .int_msk = 0x18, .msg = "rocee qmm ovf: mpt invalid err" },
-       { .int_msk = 0x19, .msg = "rocee qmm ovf: mpt ovf err" },
-       { .int_msk = 0x1A, .msg = "rocee qmm ovf: mpt hopnum err" },
-       { .int_msk = 0x1B, .msg = "rocee qmm ovf: mpt ba0 err" },
-       { .int_msk = 0x1C, .msg = "rocee qmm ovf: qpc invalid err" },
-       { .int_msk = 0x1D, .msg = "rocee qmm ovf: qpc ovf err" },
-       { .int_msk = 0x1E, .msg = "rocee qmm ovf: qpc hopnum err" },
-       { .int_msk = 0x1F, .msg = "rocee qmm ovf: qpc ba0 err" },
-       { /* sentinel */ }
+       {
+               .int_msk = 0,
+               .msg = "rocee qmm ovf: sgid invalid err"
+       }, {
+               .int_msk = 0x4,
+               .msg = "rocee qmm ovf: sgid ovf err"
+       }, {
+               .int_msk = 0x8,
+               .msg = "rocee qmm ovf: smac invalid err"
+       }, {
+               .int_msk = 0xC,
+               .msg = "rocee qmm ovf: smac ovf err"
+       }, {
+               .int_msk = 0x10,
+               .msg = "rocee qmm ovf: cqc invalid err"
+       }, {
+               .int_msk = 0x11,
+               .msg = "rocee qmm ovf: cqc ovf err"
+       }, {
+               .int_msk = 0x12,
+               .msg = "rocee qmm ovf: cqc hopnum err"
+       }, {
+               .int_msk = 0x13,
+               .msg = "rocee qmm ovf: cqc ba0 err"
+       }, {
+               .int_msk = 0x14,
+               .msg = "rocee qmm ovf: srqc invalid err"
+       }, {
+               .int_msk = 0x15,
+               .msg = "rocee qmm ovf: srqc ovf err"
+       }, {
+               .int_msk = 0x16,
+               .msg = "rocee qmm ovf: srqc hopnum err"
+       }, {
+               .int_msk = 0x17,
+               .msg = "rocee qmm ovf: srqc ba0 err"
+       }, {
+               .int_msk = 0x18,
+               .msg = "rocee qmm ovf: mpt invalid err"
+       }, {
+               .int_msk = 0x19,
+               .msg = "rocee qmm ovf: mpt ovf err"
+       }, {
+               .int_msk = 0x1A,
+               .msg = "rocee qmm ovf: mpt hopnum err"
+       }, {
+               .int_msk = 0x1B,
+               .msg = "rocee qmm ovf: mpt ba0 err"
+       }, {
+               .int_msk = 0x1C,
+               .msg = "rocee qmm ovf: qpc invalid err"
+       }, {
+               .int_msk = 0x1D,
+               .msg = "rocee qmm ovf: qpc ovf err"
+       }, {
+               .int_msk = 0x1E,
+               .msg = "rocee qmm ovf: qpc hopnum err"
+       }, {
+               .int_msk = 0x1F,
+               .msg = "rocee qmm ovf: qpc ba0 err"
+       }, {
+               /* sentinel */
+       }
 };
 
 static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
@@ -1709,34 +2274,36 @@ static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev)
 
 static const struct hclge_hw_blk hw_blk[] = {
        {
-         .msk = BIT(0), .name = "IGU_EGU",
-         .config_err_int = hclge_config_igu_egu_hw_err_int,
-       },
-       {
-         .msk = BIT(1), .name = "PPP",
-         .config_err_int = hclge_config_ppp_hw_err_int,
-       },
-       {
-         .msk = BIT(2), .name = "SSU",
-         .config_err_int = hclge_config_ssu_hw_err_int,
-       },
-       {
-         .msk = BIT(3), .name = "PPU",
-         .config_err_int = hclge_config_ppu_hw_err_int,
-       },
-       {
-         .msk = BIT(4), .name = "TM",
-         .config_err_int = hclge_config_tm_hw_err_int,
-       },
-       {
-         .msk = BIT(5), .name = "COMMON",
-         .config_err_int = hclge_config_common_hw_err_int,
-       },
-       {
-         .msk = BIT(8), .name = "MAC",
-         .config_err_int = hclge_config_mac_err_int,
-       },
-       { /* sentinel */ }
+               .msk = BIT(0),
+               .name = "IGU_EGU",
+               .config_err_int = hclge_config_igu_egu_hw_err_int,
+       }, {
+               .msk = BIT(1),
+               .name = "PPP",
+               .config_err_int = hclge_config_ppp_hw_err_int,
+       }, {
+               .msk = BIT(2),
+               .name = "SSU",
+               .config_err_int = hclge_config_ssu_hw_err_int,
+       }, {
+               .msk = BIT(3),
+               .name = "PPU",
+               .config_err_int = hclge_config_ppu_hw_err_int,
+       }, {
+               .msk = BIT(4),
+               .name = "TM",
+               .config_err_int = hclge_config_tm_hw_err_int,
+       }, {
+               .msk = BIT(5),
+               .name = "COMMON",
+               .config_err_int = hclge_config_common_hw_err_int,
+       }, {
+               .msk = BIT(8),
+               .name = "MAC",
+               .config_err_int = hclge_config_mac_err_int,
+       }, {
+               /* sentinel */
+       }
 };
 
 static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable)
index 03ae122..e55ba2e 100644 (file)
@@ -23,6 +23,7 @@
 #include "hclge_tm.h"
 #include "hclge_err.h"
 #include "hnae3.h"
+#include "hclge_devlink.h"
 
 #define HCLGE_NAME                     "hclge"
 #define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset)))
@@ -91,23 +92,23 @@ static const struct pci_device_id ae_algo_pci_tbl[] = {
 
 MODULE_DEVICE_TABLE(pci, ae_algo_pci_tbl);
 
-static const u32 cmdq_reg_addr_list[] = {HCLGE_CMDQ_TX_ADDR_L_REG,
-                                        HCLGE_CMDQ_TX_ADDR_H_REG,
-                                        HCLGE_CMDQ_TX_DEPTH_REG,
-                                        HCLGE_CMDQ_TX_TAIL_REG,
-                                        HCLGE_CMDQ_TX_HEAD_REG,
-                                        HCLGE_CMDQ_RX_ADDR_L_REG,
-                                        HCLGE_CMDQ_RX_ADDR_H_REG,
-                                        HCLGE_CMDQ_RX_DEPTH_REG,
-                                        HCLGE_CMDQ_RX_TAIL_REG,
-                                        HCLGE_CMDQ_RX_HEAD_REG,
+static const u32 cmdq_reg_addr_list[] = {HCLGE_NIC_CSQ_BASEADDR_L_REG,
+                                        HCLGE_NIC_CSQ_BASEADDR_H_REG,
+                                        HCLGE_NIC_CSQ_DEPTH_REG,
+                                        HCLGE_NIC_CSQ_TAIL_REG,
+                                        HCLGE_NIC_CSQ_HEAD_REG,
+                                        HCLGE_NIC_CRQ_BASEADDR_L_REG,
+                                        HCLGE_NIC_CRQ_BASEADDR_H_REG,
+                                        HCLGE_NIC_CRQ_DEPTH_REG,
+                                        HCLGE_NIC_CRQ_TAIL_REG,
+                                        HCLGE_NIC_CRQ_HEAD_REG,
                                         HCLGE_VECTOR0_CMDQ_SRC_REG,
                                         HCLGE_CMDQ_INTR_STS_REG,
                                         HCLGE_CMDQ_INTR_EN_REG,
                                         HCLGE_CMDQ_INTR_GEN_REG};
 
 static const u32 common_reg_addr_list[] = {HCLGE_MISC_VECTOR_REG_BASE,
-                                          HCLGE_VECTOR0_OTER_EN_REG,
+                                          HCLGE_PF_OTHER_INT_REG,
                                           HCLGE_MISC_RESET_STS_REG,
                                           HCLGE_MISC_VECTOR_INT_STS,
                                           HCLGE_GLOBAL_RESET_REG,
@@ -374,14 +375,14 @@ static const enum hclge_opcode_type hclge_dfx_reg_opcode_list[] = {
 };
 
 static const struct key_info meta_data_key_info[] = {
-       { PACKET_TYPE_ID, 6},
-       { IP_FRAGEMENT, 1},
-       { ROCE_TYPE, 1},
-       { NEXT_KEY, 5},
-       { VLAN_NUMBER, 2},
-       { SRC_VPORT, 12},
-       { DST_VPORT, 12},
-       { TUNNEL_PACKET, 1},
+       { PACKET_TYPE_ID, 6 },
+       { IP_FRAGEMENT, 1 },
+       { ROCE_TYPE, 1 },
+       { NEXT_KEY, 5 },
+       { VLAN_NUMBER, 2 },
+       { SRC_VPORT, 12 },
+       { DST_VPORT, 12 },
+       { TUNNEL_PACKET, 1 },
 };
 
 static const struct key_info tuple_key_info[] = {
@@ -748,9 +749,9 @@ static void hclge_update_stats(struct hnae3_handle *handle,
 
 static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
 {
-#define HCLGE_LOOPBACK_TEST_FLAGS (HNAE3_SUPPORT_APP_LOOPBACK |\
-               HNAE3_SUPPORT_PHY_LOOPBACK |\
-               HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK |\
+#define HCLGE_LOOPBACK_TEST_FLAGS (HNAE3_SUPPORT_APP_LOOPBACK | \
+               HNAE3_SUPPORT_PHY_LOOPBACK | \
+               HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK | \
                HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK)
 
        struct hclge_vport *vport = hclge_get_vport(handle);
@@ -958,31 +959,31 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
 static int hclge_parse_speed(u8 speed_cmd, u32 *speed)
 {
        switch (speed_cmd) {
-       case 6:
+       case HCLGE_FW_MAC_SPEED_10M:
                *speed = HCLGE_MAC_SPEED_10M;
                break;
-       case 7:
+       case HCLGE_FW_MAC_SPEED_100M:
                *speed = HCLGE_MAC_SPEED_100M;
                break;
-       case 0:
+       case HCLGE_FW_MAC_SPEED_1G:
                *speed = HCLGE_MAC_SPEED_1G;
                break;
-       case 1:
+       case HCLGE_FW_MAC_SPEED_10G:
                *speed = HCLGE_MAC_SPEED_10G;
                break;
-       case 2:
+       case HCLGE_FW_MAC_SPEED_25G:
                *speed = HCLGE_MAC_SPEED_25G;
                break;
-       case 3:
+       case HCLGE_FW_MAC_SPEED_40G:
                *speed = HCLGE_MAC_SPEED_40G;
                break;
-       case 4:
+       case HCLGE_FW_MAC_SPEED_50G:
                *speed = HCLGE_MAC_SPEED_50G;
                break;
-       case 5:
+       case HCLGE_FW_MAC_SPEED_100G:
                *speed = HCLGE_MAC_SPEED_100G;
                break;
-       case 8:
+       case HCLGE_FW_MAC_SPEED_200G:
                *speed = HCLGE_MAC_SPEED_200G;
                break;
        default:
@@ -992,44 +993,43 @@ static int hclge_parse_speed(u8 speed_cmd, u32 *speed)
        return 0;
 }
 
+static const struct hclge_speed_bit_map speed_bit_map[] = {
+       {HCLGE_MAC_SPEED_10M, HCLGE_SUPPORT_10M_BIT},
+       {HCLGE_MAC_SPEED_100M, HCLGE_SUPPORT_100M_BIT},
+       {HCLGE_MAC_SPEED_1G, HCLGE_SUPPORT_1G_BIT},
+       {HCLGE_MAC_SPEED_10G, HCLGE_SUPPORT_10G_BIT},
+       {HCLGE_MAC_SPEED_25G, HCLGE_SUPPORT_25G_BIT},
+       {HCLGE_MAC_SPEED_40G, HCLGE_SUPPORT_40G_BIT},
+       {HCLGE_MAC_SPEED_50G, HCLGE_SUPPORT_50G_BIT},
+       {HCLGE_MAC_SPEED_100G, HCLGE_SUPPORT_100G_BIT},
+       {HCLGE_MAC_SPEED_200G, HCLGE_SUPPORT_200G_BIT},
+};
+
+static int hclge_get_speed_bit(u32 speed, u32 *speed_bit)
+{
+       u16 i;
+
+       for (i = 0; i < ARRAY_SIZE(speed_bit_map); i++) {
+               if (speed == speed_bit_map[i].speed) {
+                       *speed_bit = speed_bit_map[i].speed_bit;
+                       return 0;
+               }
+       }
+
+       return -EINVAL;
+}
+
 static int hclge_check_port_speed(struct hnae3_handle *handle, u32 speed)
 {
        struct hclge_vport *vport = hclge_get_vport(handle);
        struct hclge_dev *hdev = vport->back;
        u32 speed_ability = hdev->hw.mac.speed_ability;
        u32 speed_bit = 0;
+       int ret;
 
-       switch (speed) {
-       case HCLGE_MAC_SPEED_10M:
-               speed_bit = HCLGE_SUPPORT_10M_BIT;
-               break;
-       case HCLGE_MAC_SPEED_100M:
-               speed_bit = HCLGE_SUPPORT_100M_BIT;
-               break;
-       case HCLGE_MAC_SPEED_1G:
-               speed_bit = HCLGE_SUPPORT_1G_BIT;
-               break;
-       case HCLGE_MAC_SPEED_10G:
-               speed_bit = HCLGE_SUPPORT_10G_BIT;
-               break;
-       case HCLGE_MAC_SPEED_25G:
-               speed_bit = HCLGE_SUPPORT_25G_BIT;
-               break;
-       case HCLGE_MAC_SPEED_40G:
-               speed_bit = HCLGE_SUPPORT_40G_BIT;
-               break;
-       case HCLGE_MAC_SPEED_50G:
-               speed_bit = HCLGE_SUPPORT_50G_BIT;
-               break;
-       case HCLGE_MAC_SPEED_100G:
-               speed_bit = HCLGE_SUPPORT_100G_BIT;
-               break;
-       case HCLGE_MAC_SPEED_200G:
-               speed_bit = HCLGE_SUPPORT_200G_BIT;
-               break;
-       default:
-               return -EINVAL;
-       }
+       ret = hclge_get_speed_bit(speed, &speed_bit);
+       if (ret)
+               return ret;
 
        if (speed_bit & speed_ability)
                return 0;
@@ -1814,6 +1814,7 @@ static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps)
        nic->pdev = hdev->pdev;
        nic->ae_algo = &ae_algo;
        nic->numa_node_mask = hdev->numa_node_mask;
+       nic->kinfo.io_base = hdev->hw.io_base;
 
        ret = hclge_knic_setup(vport, num_tqps,
                               hdev->num_tx_desc, hdev->num_rx_desc);
@@ -2580,39 +2581,39 @@ static int hclge_cfg_mac_speed_dup_hw(struct hclge_dev *hdev, int speed,
        switch (speed) {
        case HCLGE_MAC_SPEED_10M:
                hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                               HCLGE_CFG_SPEED_S, 6);
+                               HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_10M);
                break;
        case HCLGE_MAC_SPEED_100M:
                hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                               HCLGE_CFG_SPEED_S, 7);
+                               HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_100M);
                break;
        case HCLGE_MAC_SPEED_1G:
                hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                               HCLGE_CFG_SPEED_S, 0);
+                               HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_1G);
                break;
        case HCLGE_MAC_SPEED_10G:
                hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                               HCLGE_CFG_SPEED_S, 1);
+                               HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_10G);
                break;
        case HCLGE_MAC_SPEED_25G:
                hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                               HCLGE_CFG_SPEED_S, 2);
+                               HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_25G);
                break;
        case HCLGE_MAC_SPEED_40G:
                hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                               HCLGE_CFG_SPEED_S, 3);
+                               HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_40G);
                break;
        case HCLGE_MAC_SPEED_50G:
                hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                               HCLGE_CFG_SPEED_S, 4);
+                               HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_50G);
                break;
        case HCLGE_MAC_SPEED_100G:
                hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                               HCLGE_CFG_SPEED_S, 5);
+                               HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_100G);
                break;
        case HCLGE_MAC_SPEED_200G:
                hnae3_set_field(req->speed_dup, HCLGE_CFG_SPEED_M,
-                               HCLGE_CFG_SPEED_S, 8);
+                               HCLGE_CFG_SPEED_S, HCLGE_FW_MAC_SPEED_200G);
                break;
        default:
                dev_err(&hdev->pdev->dev, "invalid speed (%d)\n", speed);
@@ -3420,7 +3421,7 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
        hclge_enable_vector(&hdev->misc_vector, false);
        event_cause = hclge_check_event_cause(hdev, &clearval);
 
-       /* vector 0 interrupt is shared with reset and mailbox source events.*/
+       /* vector 0 interrupt is shared with reset and mailbox source events. */
        switch (event_cause) {
        case HCLGE_VECTOR0_EVENT_ERR:
                hclge_errhand_task_schedule(hdev);
@@ -3789,6 +3790,12 @@ static void hclge_do_reset(struct hclge_dev *hdev)
        }
 
        switch (hdev->reset_type) {
+       case HNAE3_IMP_RESET:
+               dev_info(&pdev->dev, "IMP reset requested\n");
+               val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG);
+               hnae3_set_bit(val, HCLGE_TRIGGER_IMP_RESET_B, 1);
+               hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, val);
+               break;
        case HNAE3_GLOBAL_RESET:
                dev_info(&pdev->dev, "global reset requested\n");
                val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG);
@@ -5937,7 +5944,7 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
        cur_key_x = key_x;
        cur_key_y = key_y;
 
-       for (i = 0 ; i < MAX_TUPLE; i++) {
+       for (i = 0; i < MAX_TUPLE; i++) {
                bool tuple_valid;
 
                tuple_size = tuple_key_info[i].key_length / 8;
@@ -11509,10 +11516,14 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
        if (ret)
                goto out;
 
+       ret = hclge_devlink_init(hdev);
+       if (ret)
+               goto err_pci_uninit;
+
        /* Firmware command queue initialize */
        ret = hclge_cmd_queue_init(hdev);
        if (ret)
-               goto err_pci_uninit;
+               goto err_devlink_uninit;
 
        /* Firmware command initialize */
        ret = hclge_cmd_init(hdev);
@@ -11689,6 +11700,8 @@ err_msi_uninit:
        pci_free_irq_vectors(pdev);
 err_cmd_uninit:
        hclge_cmd_uninit(hdev);
+err_devlink_uninit:
+       hclge_devlink_uninit(hdev);
 err_pci_uninit:
        pcim_iounmap(pdev, hdev->hw.io_base);
        pci_clear_master(pdev);
@@ -12079,6 +12092,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 
        hclge_cmd_uninit(hdev);
        hclge_misc_irq_uninit(hdev);
+       hclge_devlink_uninit(hdev);
        hclge_pci_uninit(hdev);
        mutex_destroy(&hdev->vport_lock);
        hclge_uninit_vport_vlan_table(hdev);
@@ -12867,6 +12881,29 @@ static int hclge_get_module_eeprom(struct hnae3_handle *handle, u32 offset,
        return 0;
 }
 
+static int hclge_get_link_diagnosis_info(struct hnae3_handle *handle,
+                                        u32 *status_code)
+{
+       struct hclge_vport *vport = hclge_get_vport(handle);
+       struct hclge_dev *hdev = vport->back;
+       struct hclge_desc desc;
+       int ret;
+
+       if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2)
+               return -EOPNOTSUPP;
+
+       hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_LINK_DIAGNOSIS, true);
+       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "failed to query link diagnosis info, ret = %d\n", ret);
+               return ret;
+       }
+
+       *status_code = le32_to_cpu(desc.data[0]);
+       return 0;
+}
+
 static const struct hnae3_ae_ops hclge_ops = {
        .init_ae_dev = hclge_init_ae_dev,
        .uninit_ae_dev = hclge_uninit_ae_dev,
@@ -12967,6 +13004,7 @@ static const struct hnae3_ae_ops hclge_ops = {
        .set_tx_hwts_info = hclge_ptp_set_tx_info,
        .get_rx_hwts = hclge_ptp_get_rx_hwts,
        .get_ts_info = hclge_ptp_get_ts_info,
+       .get_link_diagnosis_info = hclge_get_link_diagnosis_info,
 };
 
 static struct hnae3_ae_algo ae_algo = {
index e446b83..de6afbc 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/phy.h>
 #include <linux/if_vlan.h>
 #include <linux/kfifo.h>
+#include <net/devlink.h>
 
 #include "hclge_cmd.h"
 #include "hclge_ptp.h"
 #define HCLGE_VECTOR_REG_OFFSET_H      0x1000
 #define HCLGE_VECTOR_VF_OFFSET         0x100000
 
-#define HCLGE_CMDQ_TX_ADDR_L_REG       0x27000
-#define HCLGE_CMDQ_TX_ADDR_H_REG       0x27004
-#define HCLGE_CMDQ_TX_DEPTH_REG                0x27008
-#define HCLGE_CMDQ_TX_TAIL_REG         0x27010
-#define HCLGE_CMDQ_TX_HEAD_REG         0x27014
-#define HCLGE_CMDQ_RX_ADDR_L_REG       0x27018
-#define HCLGE_CMDQ_RX_ADDR_H_REG       0x2701C
-#define HCLGE_CMDQ_RX_DEPTH_REG                0x27020
-#define HCLGE_CMDQ_RX_TAIL_REG         0x27024
-#define HCLGE_CMDQ_RX_HEAD_REG         0x27028
+#define HCLGE_NIC_CSQ_BASEADDR_L_REG   0x27000
+#define HCLGE_NIC_CSQ_BASEADDR_H_REG   0x27004
+#define HCLGE_NIC_CSQ_DEPTH_REG                0x27008
+#define HCLGE_NIC_CSQ_TAIL_REG         0x27010
+#define HCLGE_NIC_CSQ_HEAD_REG         0x27014
+#define HCLGE_NIC_CRQ_BASEADDR_L_REG   0x27018
+#define HCLGE_NIC_CRQ_BASEADDR_H_REG   0x2701C
+#define HCLGE_NIC_CRQ_DEPTH_REG                0x27020
+#define HCLGE_NIC_CRQ_TAIL_REG         0x27024
+#define HCLGE_NIC_CRQ_HEAD_REG         0x27028
+
 #define HCLGE_CMDQ_INTR_STS_REG                0x27104
 #define HCLGE_CMDQ_INTR_EN_REG         0x27108
 #define HCLGE_CMDQ_INTR_GEN_REG                0x2710C
 
 /* bar registers for common func */
-#define HCLGE_VECTOR0_OTER_EN_REG      0x20600
 #define HCLGE_GRO_EN_REG               0x28000
 #define HCLGE_RXD_ADV_LAYOUT_EN_REG    0x28008
 
@@ -193,6 +194,7 @@ enum HLCGE_PORT_TYPE {
 #define HCLGE_VECTOR0_IMP_CMDQ_ERR_B   4U
 #define HCLGE_VECTOR0_IMP_RD_POISON_B  5U
 #define HCLGE_VECTOR0_ALL_MSIX_ERR_B   6U
+#define HCLGE_TRIGGER_IMP_RESET_B      7U
 
 #define HCLGE_MAC_DEFAULT_FRAME \
        (ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN + ETH_DATA_LEN)
@@ -944,6 +946,7 @@ struct hclge_dev {
        cpumask_t affinity_mask;
        struct irq_affinity_notify affinity_notify;
        struct hclge_ptp *ptp;
+       struct devlink *devlink;
 };
 
 /* VPort level vlan tag configuration for TX direction */
@@ -1055,6 +1058,11 @@ struct hclge_vport {
        struct list_head vlan_list;     /* Store VF vlan table */
 };
 
+struct hclge_speed_bit_map {
+       u32 speed;
+       u32 speed_bit;
+};
+
 int hclge_set_vport_promisc_mode(struct hclge_vport *vport, bool en_uc_pmc,
                                 bool en_mc_pmc, bool en_bc_pmc);
 int hclge_add_uc_addr_common(struct hclge_vport *vport,
index c0a478a..2ce5302 100644 (file)
 
 static u16 hclge_errno_to_resp(int errno)
 {
-       return abs(errno);
+       int resp = abs(errno);
+
+       /* The status for pf to vf msg cmd is u16, constrainted by HW.
+        * We need to keep the same type with it.
+        * The intput errno is the stander error code, it's safely to
+        * use a u16 to store the abs(errno).
+        */
+       return (u16)resp;
 }
 
 /* hclge_gen_resp_to_vf: used to generate a synchronous response to VF when PF
@@ -66,6 +73,8 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport,
                memcpy(resp_pf_to_vf->msg.resp_data, resp_msg->data,
                       resp_msg->len);
 
+       trace_hclge_pf_mbx_send(hdev, resp_pf_to_vf);
+
        status = hclge_cmd_send(&hdev->hw, &desc, 1);
        if (status)
                dev_err(&hdev->pdev->dev,
index dbf5f4c..7a9b77d 100644 (file)
@@ -127,7 +127,7 @@ static inline struct hclge_dev *hclge_ptp_get_hdev(struct ptp_clock_info *info)
 }
 
 bool hclge_ptp_set_tx_info(struct hnae3_handle *handle, struct sk_buff *skb);
-void hclge_ptp_clean_tx_hwts(struct hclge_dev *dev);
+void hclge_ptp_clean_tx_hwts(struct hclge_dev *hdev);
 void hclge_ptp_get_rx_hwts(struct hnae3_handle *handle, struct sk_buff *skb,
                           u32 nsec, u32 sec);
 int hclge_ptp_get_cfg(struct hclge_dev *hdev, struct ifreq *ifr);
index 2c26ea6..51ff7d8 100644 (file)
@@ -7,4 +7,4 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
 ccflags-y += -I $(srctree)/$(src)
 
 obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o
-hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o
+hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o  hclgevf_devlink.o
index d9ddb0a..59772b0 100644 (file)
@@ -71,7 +71,7 @@ static bool hclgevf_cmd_csq_done(struct hclgevf_hw *hw)
 
 static bool hclgevf_is_special_opcode(u16 opcode)
 {
-       static const u16 spec_opcode[] = {0x30, 0x31, 0x32};
+       const u16 spec_opcode[] = {0x30, 0x31, 0x32};
        int i;
 
        for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
@@ -342,25 +342,26 @@ static void hclgevf_set_default_capability(struct hclgevf_dev *hdev)
        set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
 }
 
+const struct hclgevf_caps_bit_map hclgevf_cmd_caps_bit_map0[] = {
+       {HCLGEVF_CAP_UDP_GSO_B, HNAE3_DEV_SUPPORT_UDP_GSO_B},
+       {HCLGEVF_CAP_INT_QL_B, HNAE3_DEV_SUPPORT_INT_QL_B},
+       {HCLGEVF_CAP_TQP_TXRX_INDEP_B, HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B},
+       {HCLGEVF_CAP_HW_TX_CSUM_B, HNAE3_DEV_SUPPORT_HW_TX_CSUM_B},
+       {HCLGEVF_CAP_UDP_TUNNEL_CSUM_B, HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B},
+       {HCLGEVF_CAP_RXD_ADV_LAYOUT_B, HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B},
+};
+
 static void hclgevf_parse_capability(struct hclgevf_dev *hdev,
                                     struct hclgevf_query_version_cmd *cmd)
 {
        struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
-       u32 caps;
+       u32 caps, i;
 
        caps = __le32_to_cpu(cmd->caps[0]);
-       if (hnae3_get_bit(caps, HCLGEVF_CAP_UDP_GSO_B))
-               set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGEVF_CAP_INT_QL_B))
-               set_bit(HNAE3_DEV_SUPPORT_INT_QL_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGEVF_CAP_TQP_TXRX_INDEP_B))
-               set_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGEVF_CAP_HW_TX_CSUM_B))
-               set_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGEVF_CAP_UDP_TUNNEL_CSUM_B))
-               set_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps);
-       if (hnae3_get_bit(caps, HCLGEVF_CAP_RXD_ADV_LAYOUT_B))
-               set_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, ae_dev->caps);
+       for (i = 0; i < ARRAY_SIZE(hclgevf_cmd_caps_bit_map0); i++)
+               if (hnae3_get_bit(caps, hclgevf_cmd_caps_bit_map0[i].imp_bit))
+                       set_bit(hclgevf_cmd_caps_bit_map0[i].local_bit,
+                               ae_dev->caps);
 }
 
 static __le32 hclgevf_build_api_caps(void)
index 5b82177..39d0b58 100644 (file)
@@ -266,16 +266,6 @@ struct hclgevf_cfg_tx_queue_pointer_cmd {
 
 #define HCLGEVF_TYPE_CRQ               0
 #define HCLGEVF_TYPE_CSQ               1
-#define HCLGEVF_NIC_CSQ_BASEADDR_L_REG 0x27000
-#define HCLGEVF_NIC_CSQ_BASEADDR_H_REG 0x27004
-#define HCLGEVF_NIC_CSQ_DEPTH_REG      0x27008
-#define HCLGEVF_NIC_CSQ_TAIL_REG       0x27010
-#define HCLGEVF_NIC_CSQ_HEAD_REG       0x27014
-#define HCLGEVF_NIC_CRQ_BASEADDR_L_REG 0x27018
-#define HCLGEVF_NIC_CRQ_BASEADDR_H_REG 0x2701c
-#define HCLGEVF_NIC_CRQ_DEPTH_REG      0x27020
-#define HCLGEVF_NIC_CRQ_TAIL_REG       0x27024
-#define HCLGEVF_NIC_CRQ_HEAD_REG       0x27028
 
 /* this bit indicates that the driver is ready for hardware reset */
 #define HCLGEVF_NIC_SW_RST_RDY_B       16
@@ -306,6 +296,12 @@ struct hclgevf_dev_specs_1_cmd {
        u8 rsv1[18];
 };
 
+/* capabilities bits map between imp firmware and local driver */
+struct hclgevf_caps_bit_map {
+       u16 imp_bit;
+       u16 local_bit;
+};
+
 static inline void hclgevf_write_reg(void __iomem *base, u32 reg, u32 value)
 {
        writel(value, base + reg);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c
new file mode 100644 (file)
index 0000000..f478770
--- /dev/null
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (c) 2021 Hisilicon Limited. */
+
+#include <net/devlink.h>
+
+#include "hclgevf_devlink.h"
+
+static int hclgevf_devlink_info_get(struct devlink *devlink,
+                                   struct devlink_info_req *req,
+                                   struct netlink_ext_ack *extack)
+{
+#define        HCLGEVF_DEVLINK_FW_STRING_LEN   32
+       struct hclgevf_devlink_priv *priv = devlink_priv(devlink);
+       char version_str[HCLGEVF_DEVLINK_FW_STRING_LEN];
+       struct hclgevf_dev *hdev = priv->hdev;
+       int ret;
+
+       ret = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+       if (ret)
+               return ret;
+
+       snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu",
+                hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK,
+                                HNAE3_FW_VERSION_BYTE3_SHIFT),
+                hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE2_MASK,
+                                HNAE3_FW_VERSION_BYTE2_SHIFT),
+                hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE1_MASK,
+                                HNAE3_FW_VERSION_BYTE1_SHIFT),
+                hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE0_MASK,
+                                HNAE3_FW_VERSION_BYTE0_SHIFT));
+
+       return devlink_info_version_running_put(req,
+                                               DEVLINK_INFO_VERSION_GENERIC_FW,
+                                               version_str);
+}
+
+static int hclgevf_devlink_reload_down(struct devlink *devlink,
+                                      bool netns_change,
+                                      enum devlink_reload_action action,
+                                      enum devlink_reload_limit limit,
+                                      struct netlink_ext_ack *extack)
+{
+       struct hclgevf_devlink_priv *priv = devlink_priv(devlink);
+       struct hclgevf_dev *hdev = priv->hdev;
+       struct hnae3_handle *h = &hdev->nic;
+       struct pci_dev *pdev = hdev->pdev;
+       int ret;
+
+       if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) {
+               dev_err(&pdev->dev, "reset is handling\n");
+               return -EBUSY;
+       }
+
+       switch (action) {
+       case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+               rtnl_lock();
+               ret = hdev->nic_client->ops->reset_notify(h, HNAE3_DOWN_CLIENT);
+               if (ret) {
+                       rtnl_unlock();
+                       return ret;
+               }
+
+               ret = hdev->nic_client->ops->reset_notify(h,
+                                                         HNAE3_UNINIT_CLIENT);
+               rtnl_unlock();
+               return ret;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int hclgevf_devlink_reload_up(struct devlink *devlink,
+                                    enum devlink_reload_action action,
+                                    enum devlink_reload_limit limit,
+                                    u32 *actions_performed,
+                                    struct netlink_ext_ack *extack)
+{
+       struct hclgevf_devlink_priv *priv = devlink_priv(devlink);
+       struct hclgevf_dev *hdev = priv->hdev;
+       struct hnae3_handle *h = &hdev->nic;
+       int ret;
+
+       *actions_performed = BIT(action);
+       switch (action) {
+       case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+               rtnl_lock();
+               ret = hdev->nic_client->ops->reset_notify(h, HNAE3_INIT_CLIENT);
+               if (ret) {
+                       rtnl_unlock();
+                       return ret;
+               }
+
+               ret = hdev->nic_client->ops->reset_notify(h, HNAE3_UP_CLIENT);
+               rtnl_unlock();
+               return ret;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static const struct devlink_ops hclgevf_devlink_ops = {
+       .info_get = hclgevf_devlink_info_get,
+       .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT),
+       .reload_down = hclgevf_devlink_reload_down,
+       .reload_up = hclgevf_devlink_reload_up,
+};
+
+int hclgevf_devlink_init(struct hclgevf_dev *hdev)
+{
+       struct pci_dev *pdev = hdev->pdev;
+       struct hclgevf_devlink_priv *priv;
+       struct devlink *devlink;
+       int ret;
+
+       devlink =
+               devlink_alloc(&hclgevf_devlink_ops,
+                             sizeof(struct hclgevf_devlink_priv), &pdev->dev);
+       if (!devlink)
+               return -ENOMEM;
+
+       priv = devlink_priv(devlink);
+       priv->hdev = hdev;
+       hdev->devlink = devlink;
+
+       ret = devlink_register(devlink);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to register devlink, ret = %d\n",
+                       ret);
+               goto out_reg_fail;
+       }
+
+       devlink_reload_enable(devlink);
+
+       return 0;
+
+out_reg_fail:
+       devlink_free(devlink);
+       return ret;
+}
+
+void hclgevf_devlink_uninit(struct hclgevf_dev *hdev)
+{
+       struct devlink *devlink = hdev->devlink;
+
+       devlink_reload_disable(devlink);
+
+       devlink_unregister(devlink);
+
+       devlink_free(devlink);
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.h
new file mode 100644 (file)
index 0000000..e09ea3d
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2021 Hisilicon Limited. */
+
+#ifndef __HCLGEVF_DEVLINK_H
+#define __HCLGEVF_DEVLINK_H
+
+#include "hclgevf_main.h"
+
+struct hclgevf_devlink_priv {
+       struct hclgevf_dev *hdev;
+};
+
+int hclgevf_devlink_init(struct hclgevf_dev *hdev);
+void hclgevf_devlink_uninit(struct hclgevf_dev *hdev);
+#endif
index 9386547..82e7270 100644 (file)
@@ -8,6 +8,7 @@
 #include "hclgevf_main.h"
 #include "hclge_mbx.h"
 #include "hnae3.h"
+#include "hclgevf_devlink.h"
 
 #define HCLGEVF_NAME   "hclgevf"
 
@@ -39,16 +40,16 @@ static const u8 hclgevf_hash_key[] = {
 
 MODULE_DEVICE_TABLE(pci, ae_algovf_pci_tbl);
 
-static const u32 cmdq_reg_addr_list[] = {HCLGEVF_CMDQ_TX_ADDR_L_REG,
-                                        HCLGEVF_CMDQ_TX_ADDR_H_REG,
-                                        HCLGEVF_CMDQ_TX_DEPTH_REG,
-                                        HCLGEVF_CMDQ_TX_TAIL_REG,
-                                        HCLGEVF_CMDQ_TX_HEAD_REG,
-                                        HCLGEVF_CMDQ_RX_ADDR_L_REG,
-                                        HCLGEVF_CMDQ_RX_ADDR_H_REG,
-                                        HCLGEVF_CMDQ_RX_DEPTH_REG,
-                                        HCLGEVF_CMDQ_RX_TAIL_REG,
-                                        HCLGEVF_CMDQ_RX_HEAD_REG,
+static const u32 cmdq_reg_addr_list[] = {HCLGEVF_NIC_CSQ_BASEADDR_L_REG,
+                                        HCLGEVF_NIC_CSQ_BASEADDR_H_REG,
+                                        HCLGEVF_NIC_CSQ_DEPTH_REG,
+                                        HCLGEVF_NIC_CSQ_TAIL_REG,
+                                        HCLGEVF_NIC_CSQ_HEAD_REG,
+                                        HCLGEVF_NIC_CRQ_BASEADDR_L_REG,
+                                        HCLGEVF_NIC_CRQ_BASEADDR_H_REG,
+                                        HCLGEVF_NIC_CRQ_DEPTH_REG,
+                                        HCLGEVF_NIC_CRQ_TAIL_REG,
+                                        HCLGEVF_NIC_CRQ_HEAD_REG,
                                         HCLGEVF_VECTOR0_CMDQ_SRC_REG,
                                         HCLGEVF_VECTOR0_CMDQ_STATE_REG,
                                         HCLGEVF_CMDQ_INTR_EN_REG,
@@ -538,6 +539,7 @@ static int hclgevf_set_handle_info(struct hclgevf_dev *hdev)
        nic->pdev = hdev->pdev;
        nic->numa_node_mask = hdev->numa_node_mask;
        nic->flags |= HNAE3_SUPPORT_VF;
+       nic->kinfo.io_base = hdev->hw.io_base;
 
        ret = hclgevf_knic_setup(hdev);
        if (ret)
@@ -1961,7 +1963,7 @@ static void hclgevf_dump_rst_info(struct hclgevf_dev *hdev)
        dev_info(&hdev->pdev->dev, "vector0 interrupt status: 0x%x\n",
                 hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STATE_REG));
        dev_info(&hdev->pdev->dev, "handshake status: 0x%x\n",
-                hclgevf_read_dev(&hdev->hw, HCLGEVF_CMDQ_TX_DEPTH_REG));
+                hclgevf_read_dev(&hdev->hw, HCLGEVF_NIC_CSQ_DEPTH_REG));
        dev_info(&hdev->pdev->dev, "function reset status: 0x%x\n",
                 hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING));
        dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state);
@@ -3339,6 +3341,10 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
        if (ret)
                return ret;
 
+       ret = hclgevf_devlink_init(hdev);
+       if (ret)
+               goto err_devlink_init;
+
        ret = hclgevf_cmd_queue_init(hdev);
        if (ret)
                goto err_cmd_queue_init;
@@ -3443,6 +3449,8 @@ err_misc_irq_init:
 err_cmd_init:
        hclgevf_cmd_uninit(hdev);
 err_cmd_queue_init:
+       hclgevf_devlink_uninit(hdev);
+err_devlink_init:
        hclgevf_pci_uninit(hdev);
        clear_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state);
        return ret;
@@ -3464,6 +3472,7 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
        }
 
        hclgevf_cmd_uninit(hdev);
+       hclgevf_devlink_uninit(hdev);
        hclgevf_pci_uninit(hdev);
        hclgevf_uninit_mac_list(hdev);
 }
index e8013be..883130a 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/fs.h>
 #include <linux/if_vlan.h>
 #include <linux/types.h>
+#include <net/devlink.h>
 #include "hclge_mbx.h"
 #include "hclgevf_cmd.h"
 #include "hnae3.h"
 #define HCLGEVF_VECTOR_VF_OFFSET               0x100000
 
 /* bar registers for cmdq */
-#define HCLGEVF_CMDQ_TX_ADDR_L_REG             0x27000
-#define HCLGEVF_CMDQ_TX_ADDR_H_REG             0x27004
-#define HCLGEVF_CMDQ_TX_DEPTH_REG              0x27008
-#define HCLGEVF_CMDQ_TX_TAIL_REG               0x27010
-#define HCLGEVF_CMDQ_TX_HEAD_REG               0x27014
-#define HCLGEVF_CMDQ_RX_ADDR_L_REG             0x27018
-#define HCLGEVF_CMDQ_RX_ADDR_H_REG             0x2701C
-#define HCLGEVF_CMDQ_RX_DEPTH_REG              0x27020
-#define HCLGEVF_CMDQ_RX_TAIL_REG               0x27024
-#define HCLGEVF_CMDQ_RX_HEAD_REG               0x27028
+#define HCLGEVF_NIC_CSQ_BASEADDR_L_REG         0x27000
+#define HCLGEVF_NIC_CSQ_BASEADDR_H_REG         0x27004
+#define HCLGEVF_NIC_CSQ_DEPTH_REG              0x27008
+#define HCLGEVF_NIC_CSQ_TAIL_REG               0x27010
+#define HCLGEVF_NIC_CSQ_HEAD_REG               0x27014
+#define HCLGEVF_NIC_CRQ_BASEADDR_L_REG         0x27018
+#define HCLGEVF_NIC_CRQ_BASEADDR_H_REG         0x2701C
+#define HCLGEVF_NIC_CRQ_DEPTH_REG              0x27020
+#define HCLGEVF_NIC_CRQ_TAIL_REG               0x27024
+#define HCLGEVF_NIC_CRQ_HEAD_REG               0x27028
+
 #define HCLGEVF_CMDQ_INTR_EN_REG               0x27108
 #define HCLGEVF_CMDQ_INTR_GEN_REG              0x2710C
 
@@ -316,7 +318,6 @@ struct hclgevf_dev {
 
        struct hclgevf_mac_table_cfg mac_table;
 
-       bool mbx_event_pending;
        struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
        struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
 
@@ -332,6 +333,8 @@ struct hclgevf_dev {
        u32 flag;
        unsigned long serv_processed_cnt;
        unsigned long last_serv_processed;
+
+       struct devlink *devlink;
 };
 
 static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev)
index b339b9b..fdc66fa 100644 (file)
@@ -155,18 +155,66 @@ static bool hclgevf_cmd_crq_empty(struct hclgevf_hw *hw)
        return tail == hw->cmq.crq.next_to_use;
 }
 
+static void hclgevf_handle_mbx_response(struct hclgevf_dev *hdev,
+                                       struct hclge_mbx_pf_to_vf_cmd *req)
+{
+       struct hclgevf_mbx_resp_status *resp = &hdev->mbx_resp;
+
+       if (resp->received_resp)
+               dev_warn(&hdev->pdev->dev,
+                        "VF mbx resp flag not clear(%u)\n",
+                        req->msg.vf_mbx_msg_code);
+
+       resp->origin_mbx_msg =
+                       (req->msg.vf_mbx_msg_code << 16);
+       resp->origin_mbx_msg |= req->msg.vf_mbx_msg_subcode;
+       resp->resp_status =
+               hclgevf_resp_to_errno(req->msg.resp_status);
+       memcpy(resp->additional_info, req->msg.resp_data,
+              HCLGE_MBX_MAX_RESP_DATA_SIZE * sizeof(u8));
+       if (req->match_id) {
+               /* If match_id is not zero, it means PF support match_id.
+                * if the match_id is right, VF get the right response, or
+                * ignore the response. and driver will clear hdev->mbx_resp
+                * when send next message which need response.
+                */
+               if (req->match_id == resp->match_id)
+                       resp->received_resp = true;
+       } else {
+               resp->received_resp = true;
+       }
+}
+
+static void hclgevf_handle_mbx_msg(struct hclgevf_dev *hdev,
+                                  struct hclge_mbx_pf_to_vf_cmd *req)
+{
+       /* we will drop the async msg if we find ARQ as full
+        * and continue with next message
+        */
+       if (atomic_read(&hdev->arq.count) >=
+           HCLGE_MBX_MAX_ARQ_MSG_NUM) {
+               dev_warn(&hdev->pdev->dev,
+                        "Async Q full, dropping msg(%u)\n",
+                        req->msg.code);
+               return;
+       }
+
+       /* tail the async message in arq */
+       memcpy(hdev->arq.msg_q[hdev->arq.tail], &req->msg,
+              HCLGE_MBX_MAX_ARQ_MSG_SIZE * sizeof(u16));
+       hclge_mbx_tail_ptr_move_arq(hdev->arq);
+       atomic_inc(&hdev->arq.count);
+
+       hclgevf_mbx_task_schedule(hdev);
+}
+
 void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 {
-       struct hclgevf_mbx_resp_status *resp;
        struct hclge_mbx_pf_to_vf_cmd *req;
        struct hclgevf_cmq_ring *crq;
        struct hclgevf_desc *desc;
-       u16 *msg_q;
        u16 flag;
-       u8 *temp;
-       int i;
 
-       resp = &hdev->mbx_resp;
        crq = &hdev->hw.cmq.crq;
 
        while (!hclgevf_cmd_crq_empty(&hdev->hw)) {
@@ -200,69 +248,14 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
                 */
                switch (req->msg.code) {
                case HCLGE_MBX_PF_VF_RESP:
-                       if (resp->received_resp)
-                               dev_warn(&hdev->pdev->dev,
-                                        "VF mbx resp flag not clear(%u)\n",
-                                        req->msg.vf_mbx_msg_code);
-                       resp->received_resp = true;
-
-                       resp->origin_mbx_msg =
-                                       (req->msg.vf_mbx_msg_code << 16);
-                       resp->origin_mbx_msg |= req->msg.vf_mbx_msg_subcode;
-                       resp->resp_status =
-                               hclgevf_resp_to_errno(req->msg.resp_status);
-
-                       temp = (u8 *)req->msg.resp_data;
-                       for (i = 0; i < HCLGE_MBX_MAX_RESP_DATA_SIZE; i++) {
-                               resp->additional_info[i] = *temp;
-                               temp++;
-                       }
-
-                       /* If match_id is not zero, it means PF support
-                        * match_id. If the match_id is right, VF get the
-                        * right response, otherwise ignore the response.
-                        * Driver will clear hdev->mbx_resp when send
-                        * next message which need response.
-                        */
-                       if (req->match_id) {
-                               if (req->match_id == resp->match_id)
-                                       resp->received_resp = true;
-                       } else {
-                               resp->received_resp = true;
-                       }
+                       hclgevf_handle_mbx_response(hdev, req);
                        break;
                case HCLGE_MBX_LINK_STAT_CHANGE:
                case HCLGE_MBX_ASSERTING_RESET:
                case HCLGE_MBX_LINK_STAT_MODE:
                case HCLGE_MBX_PUSH_VLAN_INFO:
                case HCLGE_MBX_PUSH_PROMISC_INFO:
-                       /* set this mbx event as pending. This is required as we
-                        * might loose interrupt event when mbx task is busy
-                        * handling. This shall be cleared when mbx task just
-                        * enters handling state.
-                        */
-                       hdev->mbx_event_pending = true;
-
-                       /* we will drop the async msg if we find ARQ as full
-                        * and continue with next message
-                        */
-                       if (atomic_read(&hdev->arq.count) >=
-                           HCLGE_MBX_MAX_ARQ_MSG_NUM) {
-                               dev_warn(&hdev->pdev->dev,
-                                        "Async Q full, dropping msg(%u)\n",
-                                        req->msg.code);
-                               break;
-                       }
-
-                       /* tail the async message in arq */
-                       msg_q = hdev->arq.msg_q[hdev->arq.tail];
-                       memcpy(&msg_q[0], &req->msg,
-                              HCLGE_MBX_MAX_ARQ_MSG_SIZE * sizeof(u16));
-                       hclge_mbx_tail_ptr_move_arq(hdev->arq);
-                       atomic_inc(&hdev->arq.count);
-
-                       hclgevf_mbx_task_schedule(hdev);
-
+                       hclgevf_handle_mbx_msg(hdev, req);
                        break;
                default:
                        dev_err(&hdev->pdev->dev,
@@ -298,11 +291,6 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
        u8 flag;
        u8 idx;
 
-       /* we can safely clear it now as we are at start of the async message
-        * processing
-        */
-       hdev->mbx_event_pending = false;
-
        tail = hdev->arq.tail;
 
        /* process all the async queue messages */
index 58d5646..6e11ee3 100644 (file)
@@ -293,9 +293,9 @@ static const struct devlink_ops hinic_devlink_ops = {
        .flash_update = hinic_devlink_flash_update,
 };
 
-struct devlink *hinic_devlink_alloc(void)
+struct devlink *hinic_devlink_alloc(struct device *dev)
 {
-       return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev));
+       return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev), dev);
 }
 
 void hinic_devlink_free(struct devlink *devlink)
@@ -303,11 +303,11 @@ void hinic_devlink_free(struct devlink *devlink)
        devlink_free(devlink);
 }
 
-int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev)
+int hinic_devlink_register(struct hinic_devlink_priv *priv)
 {
        struct devlink *devlink = priv_to_devlink(priv);
 
-       return devlink_register(devlink, dev);
+       return devlink_register(devlink);
 }
 
 void hinic_devlink_unregister(struct hinic_devlink_priv *priv)
index a090ebc..9e31501 100644 (file)
@@ -108,9 +108,9 @@ struct host_image_st {
        u32 device_id;
 };
 
-struct devlink *hinic_devlink_alloc(void);
+struct devlink *hinic_devlink_alloc(struct device *dev);
 void hinic_devlink_free(struct devlink *devlink);
-int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev);
+int hinic_devlink_register(struct hinic_devlink_priv *priv);
 void hinic_devlink_unregister(struct hinic_devlink_priv *priv);
 
 int hinic_health_reporters_create(struct hinic_devlink_priv *priv);
index 162d3c3..b431c30 100644 (file)
@@ -795,13 +795,17 @@ static int __hinic_set_coalesce(struct net_device *netdev,
 }
 
 static int hinic_get_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *coal)
+                             struct ethtool_coalesce *coal,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        return __hinic_get_coalesce(netdev, coal, COALESCE_ALL_QUEUE);
 }
 
 static int hinic_set_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *coal)
+                             struct ethtool_coalesce *coal,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        return __hinic_set_coalesce(netdev, coal, COALESCE_ALL_QUEUE);
 }
index 428108e..56b6b04 100644 (file)
@@ -754,7 +754,7 @@ static int init_pfhwdev(struct hinic_pfhwdev *pfhwdev)
                return err;
        }
 
-       err = hinic_devlink_register(hwdev->devlink_dev, &pdev->dev);
+       err = hinic_devlink_register(hwdev->devlink_dev);
        if (err) {
                dev_err(&hwif->pdev->dev, "Failed to register devlink\n");
                hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
index 405ee4d..ae707e3 100644 (file)
@@ -1183,7 +1183,7 @@ static int nic_dev_init(struct pci_dev *pdev)
        struct devlink *devlink;
        int err, num_qps;
 
-       devlink = hinic_devlink_alloc();
+       devlink = hinic_devlink_alloc(&pdev->dev);
        if (!devlink) {
                dev_err(&pdev->dev, "Hinic devlink alloc failed\n");
                return -ENOMEM;
@@ -1392,28 +1392,16 @@ static int hinic_probe(struct pci_dev *pdev,
 
        pci_set_master(pdev);
 
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
        if (err) {
                dev_warn(&pdev->dev, "Couldn't set 64-bit DMA mask\n");
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
                if (err) {
                        dev_err(&pdev->dev, "Failed to set DMA mask\n");
                        goto err_dma_mask;
                }
        }
 
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (err) {
-               dev_warn(&pdev->dev,
-                        "Couldn't set 64-bit consistent DMA mask\n");
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-               if (err) {
-                       dev_err(&pdev->dev,
-                               "Failed to set consistent DMA mask\n");
-                       goto err_dma_consistent_mask;
-               }
-       }
-
        err = nic_dev_init(pdev);
        if (err) {
                dev_err(&pdev->dev, "Failed to initialize NIC device\n");
@@ -1424,7 +1412,6 @@ static int hinic_probe(struct pci_dev *pdev,
        return 0;
 
 err_nic_dev_init:
-err_dma_consistent_mask:
 err_dma_mask:
        pci_release_regions(pdev);
 
index f8a2645..a78c398 100644 (file)
@@ -836,8 +836,10 @@ int hinic_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
 int hinic_ndo_set_vf_bw(struct net_device *netdev,
                        int vf, int min_tx_rate, int max_tx_rate)
 {
-       u32 speeds[] = {SPEED_10, SPEED_100, SPEED_1000, SPEED_10000,
-                       SPEED_25000, SPEED_40000, SPEED_100000};
+       static const u32 speeds[] = {
+               SPEED_10, SPEED_100, SPEED_1000, SPEED_10000,
+               SPEED_25000, SPEED_40000, SPEED_100000
+       };
        struct hinic_dev *nic_dev = netdev_priv(netdev);
        struct hinic_port_cap port_cap = { 0 };
        enum hinic_port_link_state link_state;
index fc8c7cd..b8a4014 100644 (file)
@@ -1110,9 +1110,6 @@ static void print_eth(unsigned char *add, char *str)
               add, add + 6, add, add[12], add[13], str);
 }
 
-static int io = 0x300;
-static int irq = 10;
-
 static const struct net_device_ops i596_netdev_ops = {
        .ndo_open               = i596_open,
        .ndo_stop               = i596_close,
@@ -1123,7 +1120,7 @@ static const struct net_device_ops i596_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
 };
 
-struct net_device * __init i82596_probe(int unit)
+static struct net_device * __init i82596_probe(void)
 {
        struct net_device *dev;
        int i;
@@ -1140,14 +1137,6 @@ struct net_device * __init i82596_probe(int unit)
        if (!dev)
                return ERR_PTR(-ENOMEM);
 
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       } else {
-               dev->base_addr = io;
-               dev->irq = irq;
-       }
-
 #ifdef ENABLE_MVME16x_NET
        if (MACH_IS_MVME16x) {
                if (mvme16x_config & MVME16x_CONFIG_NO_ETHERNET) {
@@ -1515,22 +1504,22 @@ static void set_multicast_list(struct net_device *dev)
        }
 }
 
-#ifdef MODULE
 static struct net_device *dev_82596;
 
 static int debug = -1;
 module_param(debug, int, 0);
 MODULE_PARM_DESC(debug, "i82596 debug mask");
 
-int __init init_module(void)
+static int __init i82596_init(void)
 {
        if (debug >= 0)
                i596_debug = debug;
-       dev_82596 = i82596_probe(-1);
+       dev_82596 = i82596_probe();
        return PTR_ERR_OR_ZERO(dev_82596);
 }
+module_init(i82596_init);
 
-void __exit cleanup_module(void)
+static void __exit i82596_cleanup(void)
 {
        unregister_netdev(dev_82596);
 #ifdef __mc68000__
@@ -1544,5 +1533,4 @@ void __exit cleanup_module(void)
        free_page ((u32)(dev_82596->mem_start));
        free_netdev(dev_82596);
 }
-
-#endif                         /* MODULE */
+module_exit(i82596_cleanup);
index 4564ee0..893e0dd 100644 (file)
@@ -29,6 +29,7 @@ static int rfdadd = 0; /* rfdadd=1 may be better for 8K MEM cards */
 static int fifo=0x8;   /* don't change */
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
@@ -276,7 +277,7 @@ static void alloc586(struct net_device *dev)
        memset((char *)p->scb,0,sizeof(struct scb_struct));
 }
 
-struct net_device * __init sun3_82586_probe(int unit)
+static int __init sun3_82586_probe(void)
 {
        struct net_device *dev;
        unsigned long ioaddr;
@@ -291,25 +292,20 @@ struct net_device * __init sun3_82586_probe(int unit)
                break;
 
        default:
-               return ERR_PTR(-ENODEV);
+               return -ENODEV;
        }
 
        if (found)
-               return ERR_PTR(-ENODEV);
+               return -ENODEV;
 
        ioaddr = (unsigned long)ioremap(IE_OBIO, SUN3_82586_TOTAL_SIZE);
        if (!ioaddr)
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
        found = 1;
 
        dev = alloc_etherdev(sizeof(struct priv));
        if (!dev)
                goto out;
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
-
        dev->irq = IE_IRQ;
        dev->base_addr = ioaddr;
        err = sun3_82586_probe1(dev, ioaddr);
@@ -326,8 +322,9 @@ out1:
        free_netdev(dev);
 out:
        iounmap((void __iomem *)ioaddr);
-       return ERR_PTR(err);
+       return err;
 }
+module_init(sun3_82586_probe);
 
 static const struct net_device_ops sun3_82586_netdev_ops = {
        .ndo_open               = sun3_82586_open,
index 471be6e..664a91a 100644 (file)
@@ -3011,7 +3011,7 @@ static const struct net_device_ops emac_netdev_ops = {
        .ndo_stop               = emac_close,
        .ndo_get_stats          = emac_stats,
        .ndo_set_rx_mode        = emac_set_multicast_list,
-       .ndo_do_ioctl           = emac_ioctl,
+       .ndo_eth_ioctl          = emac_ioctl,
        .ndo_tx_timeout         = emac_tx_timeout,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = emac_set_mac_address,
@@ -3023,7 +3023,7 @@ static const struct net_device_ops emac_gige_netdev_ops = {
        .ndo_stop               = emac_close,
        .ndo_get_stats          = emac_stats,
        .ndo_set_rx_mode        = emac_set_multicast_list,
-       .ndo_do_ioctl           = emac_ioctl,
+       .ndo_eth_ioctl          = emac_ioctl,
        .ndo_tx_timeout         = emac_tx_timeout,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = emac_set_mac_address,
index 737ba85..3d9b4f9 100644 (file)
@@ -1630,7 +1630,7 @@ static const struct net_device_ops ibmveth_netdev_ops = {
        .ndo_stop               = ibmveth_close,
        .ndo_start_xmit         = ibmveth_start_xmit,
        .ndo_set_rx_mode        = ibmveth_set_multicast_list,
-       .ndo_do_ioctl           = ibmveth_ioctl,
+       .ndo_eth_ioctl          = ibmveth_ioctl,
        .ndo_change_mtu         = ibmveth_change_mtu,
        .ndo_fix_features       = ibmveth_fix_features,
        .ndo_set_features       = ibmveth_set_features,
index 82744a7..b0b6f90 100644 (file)
@@ -58,8 +58,8 @@ config E1000
 config E1000E
        tristate "Intel(R) PRO/1000 PCI-Express Gigabit Ethernet support"
        depends on PCI && (!SPARC32 || BROKEN)
+       depends on PTP_1588_CLOCK_OPTIONAL
        select CRC32
-       imply PTP_1588_CLOCK
        help
          This driver supports the PCI-Express Intel(R) PRO/1000 gigabit
          ethernet family of adapters. For PCI or PCI-X e1000 adapters,
@@ -87,7 +87,7 @@ config E1000E_HWTS
 config IGB
        tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support"
        depends on PCI
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK_OPTIONAL
        select I2C
        select I2C_ALGOBIT
        help
@@ -159,9 +159,9 @@ config IXGB
 config IXGBE
        tristate "Intel(R) 10GbE PCI Express adapters support"
        depends on PCI
+       depends on PTP_1588_CLOCK_OPTIONAL
        select MDIO
        select PHYLIB
-       imply PTP_1588_CLOCK
        help
          This driver supports Intel(R) 10GbE PCI Express family of
          adapters.  For more information on how to identify your adapter, go
@@ -239,7 +239,7 @@ config IXGBEVF_IPSEC
 
 config I40E
        tristate "Intel(R) Ethernet Controller XL710 Family support"
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK_OPTIONAL
        depends on PCI
        select AUXILIARY_BUS
        help
@@ -295,11 +295,11 @@ config ICE
        tristate "Intel(R) Ethernet Connection E800 Series Support"
        default n
        depends on PCI_MSI
+       depends on PTP_1588_CLOCK_OPTIONAL
        select AUXILIARY_BUS
        select DIMLIB
        select NET_DEVLINK
        select PLDMFW
-       imply PTP_1588_CLOCK
        help
          This driver supports Intel(R) Ethernet Connection E800 Series of
          devices.  For more information on how to identify your adapter, go
@@ -317,7 +317,7 @@ config FM10K
        tristate "Intel(R) FM10000 Ethernet Switch Host Interface Support"
        default n
        depends on PCI_MSI
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK_OPTIONAL
        help
          This driver supports Intel(R) FM10000 Ethernet Switch Host
          Interface.  For more information on how to identify your adapter,
index 1b0958b..373eb02 100644 (file)
@@ -2715,10 +2715,10 @@ static void e100_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
        switch (stringset) {
        case ETH_SS_TEST:
-               memcpy(data, *e100_gstrings_test, sizeof(e100_gstrings_test));
+               memcpy(data, e100_gstrings_test, sizeof(e100_gstrings_test));
                break;
        case ETH_SS_STATS:
-               memcpy(data, *e100_gstrings_stats, sizeof(e100_gstrings_stats));
+               memcpy(data, e100_gstrings_stats, sizeof(e100_gstrings_stats));
                break;
        }
 }
@@ -2809,7 +2809,7 @@ static const struct net_device_ops e100_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = e100_set_multicast_list,
        .ndo_set_mac_address    = e100_set_mac_address,
-       .ndo_do_ioctl           = e100_do_ioctl,
+       .ndo_eth_ioctl          = e100_do_ioctl,
        .ndo_tx_timeout         = e100_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = e100_netpoll,
index 3c51ee9..0a57172 100644 (file)
@@ -1739,7 +1739,9 @@ static int e1000_set_phys_id(struct net_device *netdev,
 }
 
 static int e1000_get_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct e1000_adapter *adapter = netdev_priv(netdev);
 
@@ -1755,7 +1757,9 @@ static int e1000_get_coalesce(struct net_device *netdev,
 }
 
 static int e1000_set_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
index c2a1091..bed4f04 100644 (file)
@@ -832,7 +832,7 @@ static const struct net_device_ops e1000_netdev_ops = {
        .ndo_set_mac_address    = e1000_set_mac,
        .ndo_tx_timeout         = e1000_tx_timeout,
        .ndo_change_mtu         = e1000_change_mtu,
-       .ndo_do_ioctl           = e1000_ioctl,
+       .ndo_eth_ioctl          = e1000_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_vlan_rx_add_vid    = e1000_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = e1000_vlan_rx_kill_vid,
index 06442e6..8515e00 100644 (file)
@@ -903,6 +903,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
        case e1000_pch_tgp:
        case e1000_pch_adp:
        case e1000_pch_mtp:
+       case e1000_pch_lnp:
                mask |= BIT(18);
                break;
        default:
@@ -1569,6 +1570,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
        case e1000_pch_tgp:
        case e1000_pch_adp:
        case e1000_pch_mtp:
+       case e1000_pch_lnp:
                fext_nvm11 = er32(FEXTNVM11);
                fext_nvm11 &= ~E1000_FEXTNVM11_DISABLE_MULR_FIX;
                ew32(FEXTNVM11, fext_nvm11);
@@ -1991,7 +1993,9 @@ static int e1000_set_phys_id(struct net_device *netdev,
 }
 
 static int e1000_get_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct e1000_adapter *adapter = netdev_priv(netdev);
 
@@ -2004,7 +2008,9 @@ static int e1000_get_coalesce(struct net_device *netdev,
 }
 
 static int e1000_set_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct e1000_adapter *adapter = netdev_priv(netdev);
 
index db79c4e..bcf680e 100644 (file)
@@ -98,14 +98,22 @@ struct e1000_hw;
 #define E1000_DEV_ID_PCH_TGP_I219_V14          0x15FA
 #define E1000_DEV_ID_PCH_TGP_I219_LM15         0x15F4
 #define E1000_DEV_ID_PCH_TGP_I219_V15          0x15F5
+#define E1000_DEV_ID_PCH_RPL_I219_LM23         0x0DC5
+#define E1000_DEV_ID_PCH_RPL_I219_V23          0x0DC6
 #define E1000_DEV_ID_PCH_ADP_I219_LM16         0x1A1E
 #define E1000_DEV_ID_PCH_ADP_I219_V16          0x1A1F
 #define E1000_DEV_ID_PCH_ADP_I219_LM17         0x1A1C
 #define E1000_DEV_ID_PCH_ADP_I219_V17          0x1A1D
+#define E1000_DEV_ID_PCH_RPL_I219_LM22         0x0DC7
+#define E1000_DEV_ID_PCH_RPL_I219_V22          0x0DC8
 #define E1000_DEV_ID_PCH_MTP_I219_LM18         0x550A
 #define E1000_DEV_ID_PCH_MTP_I219_V18          0x550B
 #define E1000_DEV_ID_PCH_MTP_I219_LM19         0x550C
 #define E1000_DEV_ID_PCH_MTP_I219_V19          0x550D
+#define E1000_DEV_ID_PCH_LNP_I219_LM20         0x550E
+#define E1000_DEV_ID_PCH_LNP_I219_V20          0x550F
+#define E1000_DEV_ID_PCH_LNP_I219_LM21         0x5510
+#define E1000_DEV_ID_PCH_LNP_I219_V21          0x5511
 
 #define E1000_REVISION_4       4
 
@@ -132,6 +140,7 @@ enum e1000_mac_type {
        e1000_pch_tgp,
        e1000_pch_adp,
        e1000_pch_mtp,
+       e1000_pch_lnp,
 };
 
 enum e1000_media_type {
index a80336c..60c582a 100644 (file)
@@ -321,6 +321,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
        case e1000_pch_tgp:
        case e1000_pch_adp:
        case e1000_pch_mtp:
+       case e1000_pch_lnp:
                if (e1000_phy_is_accessible_pchlan(hw))
                        break;
 
@@ -466,6 +467,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
                case e1000_pch_tgp:
                case e1000_pch_adp:
                case e1000_pch_mtp:
+               case e1000_pch_lnp:
                        /* In case the PHY needs to be in mdio slow mode,
                         * set slow mode and try to get the PHY id again.
                         */
@@ -711,6 +713,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
        case e1000_pch_tgp:
        case e1000_pch_adp:
        case e1000_pch_mtp:
+       case e1000_pch_lnp:
        case e1000_pchlan:
                /* check management mode */
                mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan;
@@ -1278,9 +1281,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
                        usleep_range(10000, 11000);
                }
                if (firmware_bug)
-                       e_warn("ULP_CONFIG_DONE took %dmsec.  This is a firmware bug\n", i * 10);
+                       e_warn("ULP_CONFIG_DONE took %d msec. This is a firmware bug\n",
+                              i * 10);
                else
-                       e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10);
+                       e_dbg("ULP_CONFIG_DONE cleared after %d msec\n",
+                             i * 10);
 
                if (force) {
                        mac_reg = er32(H2ME);
@@ -1675,6 +1680,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
        case e1000_pch_tgp:
        case e1000_pch_adp:
        case e1000_pch_mtp:
+       case e1000_pch_lnp:
                rc = e1000_init_phy_params_pchlan(hw);
                break;
        default:
@@ -2130,6 +2136,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
        case e1000_pch_tgp:
        case e1000_pch_adp:
        case e1000_pch_mtp:
+       case e1000_pch_lnp:
                sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M;
                break;
        default:
@@ -3174,6 +3181,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
        case e1000_pch_tgp:
        case e1000_pch_adp:
        case e1000_pch_mtp:
+       case e1000_pch_lnp:
                bank1_offset = nvm->flash_bank_size;
                act_offset = E1000_ICH_NVM_SIG_WORD;
 
@@ -4113,6 +4121,7 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
        case e1000_pch_tgp:
        case e1000_pch_adp:
        case e1000_pch_mtp:
+       case e1000_pch_lnp:
                word = NVM_COMPAT;
                valid_csum_mask = NVM_COMPAT_VALID_CSUM;
                break;
index e757896..d6a092e 100644 (file)
 #define E1000_FWSM_WLOCK_MAC_MASK      0x0380
 #define E1000_FWSM_WLOCK_MAC_SHIFT     7
 #define E1000_FWSM_ULP_CFG_DONE                0x00000400      /* Low power cfg done */
+#define E1000_EXFWSM_DPG_EXIT_DONE     0x00000001
 
 /* Shared Receive Address Registers */
 #define E1000_SHRAL_PCH_LPT(_i)                (0x05408 + ((_i) * 8))
 #define E1000_SHRAH_PCH_LPT(_i)                (0x0540C + ((_i) * 8))
 
 #define E1000_H2ME             0x05B50 /* Host to ME */
+#define E1000_H2ME_START_DPG   0x00000001      /* indicate the ME of DPG */
+#define E1000_H2ME_EXIT_DPG    0x00000002      /* indicate the ME exit DPG */
 #define E1000_H2ME_ULP         0x00000800      /* ULP Indication Bit */
 #define E1000_H2ME_ENFORCE_SETTINGS    0x00001000      /* Enforce Settings */
 
index 757a54c..900b3ab 100644 (file)
@@ -3550,6 +3550,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca)
        case e1000_pch_tgp:
        case e1000_pch_adp:
        case e1000_pch_mtp:
+       case e1000_pch_lnp:
                if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) {
                        /* Stable 24MHz frequency */
                        incperiod = INCPERIOD_24MHZ;
@@ -4068,6 +4069,7 @@ void e1000e_reset(struct e1000_adapter *adapter)
        case e1000_pch_tgp:
        case e1000_pch_adp:
        case e1000_pch_mtp:
+       case e1000_pch_lnp:
                fc->refresh_time = 0xFFFF;
                fc->pause_time = 0xFFFF;
 
@@ -6343,42 +6345,110 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
        u32 mac_data;
        u16 phy_data;
 
-       /* Disable the periodic inband message,
-        * don't request PCIe clock in K1 page770_17[10:9] = 10b
-        */
-       e1e_rphy(hw, HV_PM_CTRL, &phy_data);
-       phy_data &= ~HV_PM_CTRL_K1_CLK_REQ;
-       phy_data |= BIT(10);
-       e1e_wphy(hw, HV_PM_CTRL, phy_data);
+       if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
+               /* Request ME configure the device for S0ix */
+               mac_data = er32(H2ME);
+               mac_data |= E1000_H2ME_START_DPG;
+               mac_data &= ~E1000_H2ME_EXIT_DPG;
+               ew32(H2ME, mac_data);
+       } else {
+               /* Request driver configure the device to S0ix */
+               /* Disable the periodic inband message,
+                * don't request PCIe clock in K1 page770_17[10:9] = 10b
+                */
+               e1e_rphy(hw, HV_PM_CTRL, &phy_data);
+               phy_data &= ~HV_PM_CTRL_K1_CLK_REQ;
+               phy_data |= BIT(10);
+               e1e_wphy(hw, HV_PM_CTRL, phy_data);
 
-       /* Make sure we don't exit K1 every time a new packet arrives
-        * 772_29[5] = 1 CS_Mode_Stay_In_K1
-        */
-       e1e_rphy(hw, I217_CGFREG, &phy_data);
-       phy_data |= BIT(5);
-       e1e_wphy(hw, I217_CGFREG, phy_data);
+               /* Make sure we don't exit K1 every time a new packet arrives
+                * 772_29[5] = 1 CS_Mode_Stay_In_K1
+                */
+               e1e_rphy(hw, I217_CGFREG, &phy_data);
+               phy_data |= BIT(5);
+               e1e_wphy(hw, I217_CGFREG, phy_data);
 
-       /* Change the MAC/PHY interface to SMBus
-        * Force the SMBus in PHY page769_23[0] = 1
-        * Force the SMBus in MAC CTRL_EXT[11] = 1
-        */
-       e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
-       phy_data |= CV_SMB_CTRL_FORCE_SMBUS;
-       e1e_wphy(hw, CV_SMB_CTRL, phy_data);
-       mac_data = er32(CTRL_EXT);
-       mac_data |= E1000_CTRL_EXT_FORCE_SMBUS;
-       ew32(CTRL_EXT, mac_data);
+               /* Change the MAC/PHY interface to SMBus
+                * Force the SMBus in PHY page769_23[0] = 1
+                * Force the SMBus in MAC CTRL_EXT[11] = 1
+                */
+               e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
+               phy_data |= CV_SMB_CTRL_FORCE_SMBUS;
+               e1e_wphy(hw, CV_SMB_CTRL, phy_data);
+               mac_data = er32(CTRL_EXT);
+               mac_data |= E1000_CTRL_EXT_FORCE_SMBUS;
+               ew32(CTRL_EXT, mac_data);
+
+               /* DFT control: PHY bit: page769_20[0] = 1
+                * page769_20[7] - PHY PLL stop
+                * page769_20[8] - PHY go to the electrical idle
+                * page769_20[9] - PHY serdes disable
+                * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1
+                */
+               e1e_rphy(hw, I82579_DFT_CTRL, &phy_data);
+               phy_data |= BIT(0);
+               phy_data |= BIT(7);
+               phy_data |= BIT(8);
+               phy_data |= BIT(9);
+               e1e_wphy(hw, I82579_DFT_CTRL, phy_data);
+
+               mac_data = er32(EXTCNF_CTRL);
+               mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
+               ew32(EXTCNF_CTRL, mac_data);
+
+               /* Enable the Dynamic Power Gating in the MAC */
+               mac_data = er32(FEXTNVM7);
+               mac_data |= BIT(22);
+               ew32(FEXTNVM7, mac_data);
+
+               /* Disable disconnected cable conditioning for Power Gating */
+               mac_data = er32(DPGFR);
+               mac_data |= BIT(2);
+               ew32(DPGFR, mac_data);
+
+               /* Don't wake from dynamic Power Gating with clock request */
+               mac_data = er32(FEXTNVM12);
+               mac_data |= BIT(12);
+               ew32(FEXTNVM12, mac_data);
+
+               /* Ungate PGCB clock */
+               mac_data = er32(FEXTNVM9);
+               mac_data &= ~BIT(28);
+               ew32(FEXTNVM9, mac_data);
+
+               /* Enable K1 off to enable mPHY Power Gating */
+               mac_data = er32(FEXTNVM6);
+               mac_data |= BIT(31);
+               ew32(FEXTNVM6, mac_data);
+
+               /* Enable mPHY power gating for any link and speed */
+               mac_data = er32(FEXTNVM8);
+               mac_data |= BIT(9);
+               ew32(FEXTNVM8, mac_data);
+
+               /* Enable the Dynamic Clock Gating in the DMA and MAC */
+               mac_data = er32(CTRL_EXT);
+               mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN;
+               ew32(CTRL_EXT, mac_data);
+
+               /* No MAC DPG gating SLP_S0 in modern standby
+                * Switch the logic of the lanphypc to use PMC counter
+                */
+               mac_data = er32(FEXTNVM5);
+               mac_data |= BIT(7);
+               ew32(FEXTNVM5, mac_data);
+       }
 
-       /* DFT control: PHY bit: page769_20[0] = 1
-        * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1
-        */
-       e1e_rphy(hw, I82579_DFT_CTRL, &phy_data);
-       phy_data |= BIT(0);
-       e1e_wphy(hw, I82579_DFT_CTRL, phy_data);
+       /* Disable the time synchronization clock */
+       mac_data = er32(FEXTNVM7);
+       mac_data |= BIT(31);
+       mac_data &= ~BIT(0);
+       ew32(FEXTNVM7, mac_data);
 
-       mac_data = er32(EXTCNF_CTRL);
-       mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
-       ew32(EXTCNF_CTRL, mac_data);
+       /* Dynamic Power Gating Enable */
+       mac_data = er32(CTRL_EXT);
+       mac_data |= BIT(3);
+       ew32(CTRL_EXT, mac_data);
 
        /* Check MAC Tx/Rx packet buffer pointers.
         * Reset MAC Tx/Rx packet buffer pointers to suppress any
@@ -6414,148 +6484,130 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
        mac_data = er32(RDFPC);
        if (mac_data)
                ew32(RDFPC, 0);
-
-       /* Enable the Dynamic Power Gating in the MAC */
-       mac_data = er32(FEXTNVM7);
-       mac_data |= BIT(22);
-       ew32(FEXTNVM7, mac_data);
-
-       /* Disable the time synchronization clock */
-       mac_data = er32(FEXTNVM7);
-       mac_data |= BIT(31);
-       mac_data &= ~BIT(0);
-       ew32(FEXTNVM7, mac_data);
-
-       /* Dynamic Power Gating Enable */
-       mac_data = er32(CTRL_EXT);
-       mac_data |= BIT(3);
-       ew32(CTRL_EXT, mac_data);
-
-       /* Disable disconnected cable conditioning for Power Gating */
-       mac_data = er32(DPGFR);
-       mac_data |= BIT(2);
-       ew32(DPGFR, mac_data);
-
-       /* Don't wake from dynamic Power Gating with clock request */
-       mac_data = er32(FEXTNVM12);
-       mac_data |= BIT(12);
-       ew32(FEXTNVM12, mac_data);
-
-       /* Ungate PGCB clock */
-       mac_data = er32(FEXTNVM9);
-       mac_data &= ~BIT(28);
-       ew32(FEXTNVM9, mac_data);
-
-       /* Enable K1 off to enable mPHY Power Gating */
-       mac_data = er32(FEXTNVM6);
-       mac_data |= BIT(31);
-       ew32(FEXTNVM6, mac_data);
-
-       /* Enable mPHY power gating for any link and speed */
-       mac_data = er32(FEXTNVM8);
-       mac_data |= BIT(9);
-       ew32(FEXTNVM8, mac_data);
-
-       /* Enable the Dynamic Clock Gating in the DMA and MAC */
-       mac_data = er32(CTRL_EXT);
-       mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN;
-       ew32(CTRL_EXT, mac_data);
-
-       /* No MAC DPG gating SLP_S0 in modern standby
-        * Switch the logic of the lanphypc to use PMC counter
-        */
-       mac_data = er32(FEXTNVM5);
-       mac_data |= BIT(7);
-       ew32(FEXTNVM5, mac_data);
 }
 
 static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
 {
        struct e1000_hw *hw = &adapter->hw;
+       bool firmware_bug = false;
        u32 mac_data;
        u16 phy_data;
+       u32 i = 0;
+
+       if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
+               /* Request ME unconfigure the device from S0ix */
+               mac_data = er32(H2ME);
+               mac_data &= ~E1000_H2ME_START_DPG;
+               mac_data |= E1000_H2ME_EXIT_DPG;
+               ew32(H2ME, mac_data);
+
+               /* Poll up to 2.5 seconds for ME to unconfigure DPG.
+                * If this takes more than 1 second, show a warning indicating a
+                * firmware bug
+                */
+               while (!(er32(EXFWSM) & E1000_EXFWSM_DPG_EXIT_DONE)) {
+                       if (i > 100 && !firmware_bug)
+                               firmware_bug = true;
 
-       /* Disable the Dynamic Power Gating in the MAC */
-       mac_data = er32(FEXTNVM7);
-       mac_data &= 0xFFBFFFFF;
-       ew32(FEXTNVM7, mac_data);
+                       if (i++ == 250) {
+                               e_dbg("Timeout (firmware bug): %d msec\n",
+                                     i * 10);
+                               break;
+                       }
 
-       /* Enable the time synchronization clock */
-       mac_data = er32(FEXTNVM7);
-       mac_data |= BIT(0);
-       ew32(FEXTNVM7, mac_data);
+                       usleep_range(10000, 11000);
+               }
+               if (firmware_bug)
+                       e_warn("DPG_EXIT_DONE took %d msec. This is a firmware bug\n",
+                              i * 10);
+               else
+                       e_dbg("DPG_EXIT_DONE cleared after %d msec\n", i * 10);
+       } else {
+               /* Request driver unconfigure the device from S0ix */
+
+               /* Disable the Dynamic Power Gating in the MAC */
+               mac_data = er32(FEXTNVM7);
+               mac_data &= 0xFFBFFFFF;
+               ew32(FEXTNVM7, mac_data);
+
+               /* Disable mPHY power gating for any link and speed */
+               mac_data = er32(FEXTNVM8);
+               mac_data &= ~BIT(9);
+               ew32(FEXTNVM8, mac_data);
+
+               /* Disable K1 off */
+               mac_data = er32(FEXTNVM6);
+               mac_data &= ~BIT(31);
+               ew32(FEXTNVM6, mac_data);
+
+               /* Disable Ungate PGCB clock */
+               mac_data = er32(FEXTNVM9);
+               mac_data |= BIT(28);
+               ew32(FEXTNVM9, mac_data);
+
+               /* Cancel not waking from dynamic
+                * Power Gating with clock request
+                */
+               mac_data = er32(FEXTNVM12);
+               mac_data &= ~BIT(12);
+               ew32(FEXTNVM12, mac_data);
 
-       /* Disable mPHY power gating for any link and speed */
-       mac_data = er32(FEXTNVM8);
-       mac_data &= ~BIT(9);
-       ew32(FEXTNVM8, mac_data);
+               /* Cancel disable disconnected cable conditioning
+                * for Power Gating
+                */
+               mac_data = er32(DPGFR);
+               mac_data &= ~BIT(2);
+               ew32(DPGFR, mac_data);
 
-       /* Disable K1 off */
-       mac_data = er32(FEXTNVM6);
-       mac_data &= ~BIT(31);
-       ew32(FEXTNVM6, mac_data);
+               /* Disable the Dynamic Clock Gating in the DMA and MAC */
+               mac_data = er32(CTRL_EXT);
+               mac_data &= 0xFFF7FFFF;
+               ew32(CTRL_EXT, mac_data);
 
-       /* Disable Ungate PGCB clock */
-       mac_data = er32(FEXTNVM9);
-       mac_data |= BIT(28);
-       ew32(FEXTNVM9, mac_data);
+               /* Revert the lanphypc logic to use the internal Gbe counter
+                * and not the PMC counter
+                */
+               mac_data = er32(FEXTNVM5);
+               mac_data &= 0xFFFFFF7F;
+               ew32(FEXTNVM5, mac_data);
 
-       /* Cancel not waking from dynamic
-        * Power Gating with clock request
-        */
-       mac_data = er32(FEXTNVM12);
-       mac_data &= ~BIT(12);
-       ew32(FEXTNVM12, mac_data);
+               /* Enable the periodic inband message,
+                * Request PCIe clock in K1 page770_17[10:9] =01b
+                */
+               e1e_rphy(hw, HV_PM_CTRL, &phy_data);
+               phy_data &= 0xFBFF;
+               phy_data |= HV_PM_CTRL_K1_CLK_REQ;
+               e1e_wphy(hw, HV_PM_CTRL, phy_data);
 
-       /* Cancel disable disconnected cable conditioning
-        * for Power Gating
-        */
-       mac_data = er32(DPGFR);
-       mac_data &= ~BIT(2);
-       ew32(DPGFR, mac_data);
+               /* Return back configuration
+                * 772_29[5] = 0 CS_Mode_Stay_In_K1
+                */
+               e1e_rphy(hw, I217_CGFREG, &phy_data);
+               phy_data &= 0xFFDF;
+               e1e_wphy(hw, I217_CGFREG, phy_data);
+
+               /* Change the MAC/PHY interface to Kumeran
+                * Unforce the SMBus in PHY page769_23[0] = 0
+                * Unforce the SMBus in MAC CTRL_EXT[11] = 0
+                */
+               e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
+               phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS;
+               e1e_wphy(hw, CV_SMB_CTRL, phy_data);
+               mac_data = er32(CTRL_EXT);
+               mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+               ew32(CTRL_EXT, mac_data);
+       }
 
        /* Disable Dynamic Power Gating */
        mac_data = er32(CTRL_EXT);
        mac_data &= 0xFFFFFFF7;
        ew32(CTRL_EXT, mac_data);
 
-       /* Disable the Dynamic Clock Gating in the DMA and MAC */
-       mac_data = er32(CTRL_EXT);
-       mac_data &= 0xFFF7FFFF;
-       ew32(CTRL_EXT, mac_data);
-
-       /* Revert the lanphypc logic to use the internal Gbe counter
-        * and not the PMC counter
-        */
-       mac_data = er32(FEXTNVM5);
-       mac_data &= 0xFFFFFF7F;
-       ew32(FEXTNVM5, mac_data);
-
-       /* Enable the periodic inband message,
-        * Request PCIe clock in K1 page770_17[10:9] =01b
-        */
-       e1e_rphy(hw, HV_PM_CTRL, &phy_data);
-       phy_data &= 0xFBFF;
-       phy_data |= HV_PM_CTRL_K1_CLK_REQ;
-       e1e_wphy(hw, HV_PM_CTRL, phy_data);
-
-       /* Return back configuration
-        * 772_29[5] = 0 CS_Mode_Stay_In_K1
-        */
-       e1e_rphy(hw, I217_CGFREG, &phy_data);
-       phy_data &= 0xFFDF;
-       e1e_wphy(hw, I217_CGFREG, phy_data);
-
-       /* Change the MAC/PHY interface to Kumeran
-        * Unforce the SMBus in PHY page769_23[0] = 0
-        * Unforce the SMBus in MAC CTRL_EXT[11] = 0
-        */
-       e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
-       phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS;
-       e1e_wphy(hw, CV_SMB_CTRL, phy_data);
-       mac_data = er32(CTRL_EXT);
-       mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS;
-       ew32(CTRL_EXT, mac_data);
+       /* Enable the time synchronization clock */
+       mac_data = er32(FEXTNVM7);
+       mac_data &= ~BIT(31);
+       mac_data |= BIT(0);
+       ew32(FEXTNVM7, mac_data);
 }
 
 static int e1000e_pm_freeze(struct device *dev)
@@ -7302,7 +7354,7 @@ static const struct net_device_ops e1000e_netdev_ops = {
        .ndo_set_rx_mode        = e1000e_set_rx_mode,
        .ndo_set_mac_address    = e1000_set_mac,
        .ndo_change_mtu         = e1000_change_mtu,
-       .ndo_do_ioctl           = e1000_ioctl,
+       .ndo_eth_ioctl          = e1000_ioctl,
        .ndo_tx_timeout         = e1000_tx_timeout,
        .ndo_validate_addr      = eth_validate_addr,
 
@@ -7677,7 +7729,7 @@ err_dma:
  * @pdev: PCI device information struct
  *
  * e1000_remove is called by the PCI subsystem to alert the driver
- * that it should release a PCI device.  The could be caused by a
+ * that it should release a PCI device.  This could be caused by a
  * Hot-Plug event, or because the driver is going to be removed from
  * memory.
  **/
@@ -7850,14 +7902,22 @@ static const struct pci_device_id e1000_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_cnp },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_cnp },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_cnp },
 
        { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */
 };
index 9e79d67..eb5c014 100644 (file)
@@ -298,6 +298,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter)
        case e1000_pch_tgp:
        case e1000_pch_adp:
        case e1000_pch_mtp:
+       case e1000_pch_lnp:
                if ((hw->mac.type < e1000_pch_lpt) ||
                    (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) {
                        adapter->ptp_clock_info.max_adj = 24000000 - 1;
index 8165ba2..6c0cd8c 100644 (file)
 #define E1000_FACTPS   0x05B30 /* Function Active and Power State to MNG */
 #define E1000_SWSM     0x05B50 /* SW Semaphore */
 #define E1000_FWSM     0x05B54 /* FW Semaphore */
+#define E1000_EXFWSM   0x05B58 /* Extended FW Semaphore */
 /* Driver-only SW semaphore (not used by BOOT agents) */
 #define E1000_SWSM2    0x05B58
 #define E1000_FFLT_DBG 0x05F04 /* Debug Register */
index 66776ba..0d37f01 100644 (file)
@@ -632,7 +632,9 @@ clear_reset:
 }
 
 static int fm10k_get_coalesce(struct net_device *dev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct fm10k_intfc *interface = netdev_priv(dev);
 
@@ -646,7 +648,9 @@ static int fm10k_get_coalesce(struct net_device *dev,
 }
 
 static int fm10k_set_coalesce(struct net_device *dev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct fm10k_intfc *interface = netdev_priv(dev);
        u16 tx_itr, rx_itr;
index b9417dc..39fb3d5 100644 (file)
@@ -428,6 +428,8 @@ struct i40e_channel {
        struct i40e_vsi *parent_vsi;
 };
 
+struct i40e_ptp_pins_settings;
+
 static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch)
 {
        return !!ch->fwd;
@@ -644,12 +646,83 @@ struct i40e_pf {
        struct i40e_rx_pb_config pb_cfg; /* Current Rx packet buffer config */
        struct i40e_dcbx_config tmp_cfg;
 
+/* GPIO defines used by PTP */
+#define I40E_SDP3_2                    18
+#define I40E_SDP3_3                    19
+#define I40E_GPIO_4                    20
+#define I40E_LED2_0                    26
+#define I40E_LED2_1                    27
+#define I40E_LED3_0                    28
+#define I40E_LED3_1                    29
+#define I40E_GLGEN_GPIO_SET_SDP_DATA_HI \
+       (1 << I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT)
+#define I40E_GLGEN_GPIO_SET_DRV_SDP_DATA \
+       (1 << I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_0 \
+       (0 << I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_1 \
+       (1 << I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_RESERVED   BIT(2)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_Z \
+       (1 << I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_DIR_OUT \
+       (1 << I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_TRI_DRV_HI \
+       (1 << I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_HI_RST \
+       (1 << I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_TIMESYNC_0 \
+       (3 << I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_TIMESYNC_1 \
+       (4 << I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN \
+       (0x3F << I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT \
+       (1 << I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PORT_0_IN_TIMESYNC_0 \
+       (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+        I40E_GLGEN_GPIO_CTL_TIMESYNC_0 | \
+        I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_0)
+#define I40E_GLGEN_GPIO_CTL_PORT_1_IN_TIMESYNC_0 \
+       (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+        I40E_GLGEN_GPIO_CTL_TIMESYNC_0 | \
+        I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_1)
+#define I40E_GLGEN_GPIO_CTL_PORT_0_OUT_TIMESYNC_1 \
+       (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+        I40E_GLGEN_GPIO_CTL_TIMESYNC_1 | I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \
+        I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | I40E_GLGEN_GPIO_CTL_DIR_OUT | \
+        I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_0)
+#define I40E_GLGEN_GPIO_CTL_PORT_1_OUT_TIMESYNC_1 \
+       (I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+        I40E_GLGEN_GPIO_CTL_TIMESYNC_1 | I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \
+        I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | I40E_GLGEN_GPIO_CTL_DIR_OUT | \
+        I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_1)
+#define I40E_GLGEN_GPIO_CTL_LED_INIT \
+       (I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_Z | \
+        I40E_GLGEN_GPIO_CTL_DIR_OUT | \
+        I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | \
+        I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \
+        I40E_GLGEN_GPIO_CTL_OUT_DEFAULT | \
+        I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN)
+#define I40E_PRTTSYN_AUX_1_INSTNT \
+       (1 << I40E_PRTTSYN_AUX_1_INSTNT_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUT_ENABLE \
+       (1 << I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUT_CLK_MOD (3 << I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUT_ENABLE_CLK_MOD \
+       (I40E_PRTTSYN_AUX_0_OUT_ENABLE | I40E_PRTTSYN_AUX_0_OUT_CLK_MOD)
+#define I40E_PTP_HALF_SECOND           500000000LL /* nano seconds */
+#define I40E_PTP_2_SEC_DELAY           2
+
        struct ptp_clock *ptp_clock;
        struct ptp_clock_info ptp_caps;
        struct sk_buff *ptp_tx_skb;
        unsigned long ptp_tx_start;
        struct hwtstamp_config tstamp_config;
        struct timespec64 ptp_prev_hw_time;
+       struct work_struct ptp_pps_work;
+       struct work_struct ptp_extts0_work;
+       struct work_struct ptp_extts1_work;
        ktime_t ptp_reset_start;
        struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */
        u32 ptp_adj_mult;
@@ -657,10 +730,14 @@ struct i40e_pf {
        u32 tx_hwtstamp_skipped;
        u32 rx_hwtstamp_cleared;
        u32 latch_event_flags;
+       u64 ptp_pps_start;
+       u32 pps_delay;
        spinlock_t ptp_rx_lock; /* Used to protect Rx timestamp registers. */
+       struct ptp_pin_desc ptp_pin[3];
        unsigned long latch_events[4];
        bool ptp_tx;
        bool ptp_rx;
+       struct i40e_ptp_pins_settings *ptp_pins;
        u16 rss_table_size; /* HW RSS table size */
        u32 max_bw;
        u32 min_bw;
@@ -1169,6 +1246,7 @@ void i40e_ptp_save_hw_time(struct i40e_pf *pf);
 void i40e_ptp_restore_hw_time(struct i40e_pf *pf);
 void i40e_ptp_init(struct i40e_pf *pf);
 void i40e_ptp_stop(struct i40e_pf *pf);
+int i40e_ptp_alloc_pins(struct i40e_pf *pf);
 int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi);
 i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf);
 i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf);
index 2c9e4ee..513ba69 100644 (file)
@@ -2812,13 +2812,17 @@ static int __i40e_get_coalesce(struct net_device *netdev,
  * i40e_get_coalesce - get a netdev's coalesce settings
  * @netdev: the netdev to check
  * @ec: ethtool coalesce data structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
  *
  * Gets the coalesce settings for a particular netdev. Note that if user has
  * modified per-queue settings, this only guarantees to represent queue 0. See
  * __i40e_get_coalesce for more details.
  **/
 static int i40e_get_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *ec)
+                            struct ethtool_coalesce *ec,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        return __i40e_get_coalesce(netdev, ec, -1);
 }
@@ -2986,11 +2990,15 @@ static int __i40e_set_coalesce(struct net_device *netdev,
  * i40e_set_coalesce - set coalesce settings for every queue on the netdev
  * @netdev: the netdev to change
  * @ec: ethtool coalesce settings
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
  *
  * This will set each queue to the same coalesce settings.
  **/
 static int i40e_set_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *ec)
+                            struct ethtool_coalesce *ec,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        return __i40e_set_coalesce(netdev, ec, -1);
 }
index 1d1f527..2f20980 100644 (file)
@@ -4079,10 +4079,13 @@ static irqreturn_t i40e_intr(int irq, void *data)
        if (icr0 & I40E_PFINT_ICR0_TIMESYNC_MASK) {
                u32 prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_0);
 
-               if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK) {
-                       icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
+               if (prttsyn_stat & I40E_PRTTSYN_STAT_0_EVENT0_MASK)
+                       schedule_work(&pf->ptp_extts0_work);
+
+               if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK)
                        i40e_ptp_tx_hwtstamp(pf);
-               }
+
+               icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
        }
 
        /* If a critical error is pending we have no choice but to reset the
@@ -4635,7 +4638,7 @@ void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
                err = i40e_control_wait_rx_q(pf, pf_q, false);
                if (err)
                        dev_info(&pf->pdev->dev,
-                                "VSI seid %d Rx ring %d dissable timeout\n",
+                                "VSI seid %d Rx ring %d disable timeout\n",
                                 vsi->seid, pf_q);
        }
 
@@ -13265,7 +13268,7 @@ static const struct net_device_ops i40e_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = i40e_set_mac,
        .ndo_change_mtu         = i40e_change_mtu,
-       .ndo_do_ioctl           = i40e_ioctl,
+       .ndo_eth_ioctl          = i40e_ioctl,
        .ndo_tx_timeout         = i40e_tx_timeout,
        .ndo_vlan_rx_add_vid    = i40e_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = i40e_vlan_rx_kill_vid,
@@ -15180,6 +15183,22 @@ err_switch_setup:
        return err;
 }
 
+/**
+ * i40e_set_subsystem_device_id - set subsystem device id
+ * @hw: pointer to the hardware info
+ *
+ * Set PCI subsystem device id either from a pci_dev structure or
+ * a specific FW register.
+ **/
+static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw)
+{
+       struct pci_dev *pdev = ((struct i40e_pf *)hw->back)->pdev;
+
+       hw->subsystem_device_id = pdev->subsystem_device ?
+               pdev->subsystem_device :
+               (ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX);
+}
+
 /**
  * i40e_probe - Device initialization routine
  * @pdev: PCI device information struct
@@ -15275,7 +15294,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        hw->device_id = pdev->device;
        pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
        hw->subsystem_vendor_id = pdev->subsystem_vendor;
-       hw->subsystem_device_id = pdev->subsystem_device;
+       i40e_set_subsystem_device_id(hw);
        hw->bus.device = PCI_SLOT(pdev->devfn);
        hw->bus.func = PCI_FUNC(pdev->devfn);
        hw->bus.bus_id = pdev->bus->number;
@@ -15455,6 +15474,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (is_valid_ether_addr(hw->mac.port_addr))
                pf->hw_features |= I40E_HW_PORT_ID_VALID;
 
+       i40e_ptp_alloc_pins(pf);
        pci_set_drvdata(pdev, pf);
        pci_save_state(pdev);
 
index 7b971b2..09b1d5a 100644 (file)
@@ -3,6 +3,7 @@
 
 #include "i40e.h"
 #include <linux/ptp_classify.h>
+#include <linux/posix-clock.h>
 
 /* The XL710 timesync is very much like Intel's 82599 design when it comes to
  * the fundamental clock design. However, the clock operations are much simpler
 #define I40E_PTP_10GB_INCVAL_MULT      2
 #define I40E_PTP_5GB_INCVAL_MULT       2
 #define I40E_PTP_1GB_INCVAL_MULT       20
+#define I40E_ISGN                      0x80000000
 
 #define I40E_PRTTSYN_CTL1_TSYNTYPE_V1  BIT(I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
 #define I40E_PRTTSYN_CTL1_TSYNTYPE_V2  (2 << \
                                        I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
+#define I40E_SUBDEV_ID_25G_PTP_PIN     0xB
+#define to_dev(obj) container_of(obj, struct device, kobj)
+
+enum i40e_ptp_pin {
+       SDP3_2 = 0,
+       SDP3_3,
+       GPIO_4
+};
+
+enum i40e_can_set_pins_t {
+       CANT_DO_PINS = -1,
+       CAN_SET_PINS,
+       CAN_DO_PINS
+};
+
+static struct ptp_pin_desc sdp_desc[] = {
+       /* name     idx      func      chan */
+       {"SDP3_2", SDP3_2, PTP_PF_NONE, 0},
+       {"SDP3_3", SDP3_3, PTP_PF_NONE, 1},
+       {"GPIO_4", GPIO_4, PTP_PF_NONE, 1},
+};
+
+enum i40e_ptp_gpio_pin_state {
+       end = -2,
+       invalid,
+       off,
+       in_A,
+       in_B,
+       out_A,
+       out_B,
+};
+
+static const char * const i40e_ptp_gpio_pin_state2str[] = {
+       "off", "in_A", "in_B", "out_A", "out_B"
+};
+
+enum i40e_ptp_led_pin_state {
+       led_end = -2,
+       low = 0,
+       high,
+};
+
+struct i40e_ptp_pins_settings {
+       enum i40e_ptp_gpio_pin_state sdp3_2;
+       enum i40e_ptp_gpio_pin_state sdp3_3;
+       enum i40e_ptp_gpio_pin_state gpio_4;
+       enum i40e_ptp_led_pin_state led2_0;
+       enum i40e_ptp_led_pin_state led2_1;
+       enum i40e_ptp_led_pin_state led3_0;
+       enum i40e_ptp_led_pin_state led3_1;
+};
+
+static const struct i40e_ptp_pins_settings
+       i40e_ptp_pin_led_allowed_states[] = {
+       {off,   off,    off,            high,   high,   high,   high},
+       {off,   in_A,   off,            high,   high,   high,   low},
+       {off,   out_A,  off,            high,   low,    high,   high},
+       {off,   in_B,   off,            high,   high,   high,   low},
+       {off,   out_B,  off,            high,   low,    high,   high},
+       {in_A,  off,    off,            high,   high,   high,   low},
+       {in_A,  in_B,   off,            high,   high,   high,   low},
+       {in_A,  out_B,  off,            high,   low,    high,   high},
+       {out_A, off,    off,            high,   low,    high,   high},
+       {out_A, in_B,   off,            high,   low,    high,   high},
+       {in_B,  off,    off,            high,   high,   high,   low},
+       {in_B,  in_A,   off,            high,   high,   high,   low},
+       {in_B,  out_A,  off,            high,   low,    high,   high},
+       {out_B, off,    off,            high,   low,    high,   high},
+       {out_B, in_A,   off,            high,   low,    high,   high},
+       {off,   off,    in_A,           high,   high,   low,    high},
+       {off,   out_A,  in_A,           high,   low,    low,    high},
+       {off,   in_B,   in_A,           high,   high,   low,    low},
+       {off,   out_B,  in_A,           high,   low,    low,    high},
+       {out_A, off,    in_A,           high,   low,    low,    high},
+       {out_A, in_B,   in_A,           high,   low,    low,    high},
+       {in_B,  off,    in_A,           high,   high,   low,    low},
+       {in_B,  out_A,  in_A,           high,   low,    low,    high},
+       {out_B, off,    in_A,           high,   low,    low,    high},
+       {off,   off,    out_A,          low,    high,   high,   high},
+       {off,   in_A,   out_A,          low,    high,   high,   low},
+       {off,   in_B,   out_A,          low,    high,   high,   low},
+       {off,   out_B,  out_A,          low,    low,    high,   high},
+       {in_A,  off,    out_A,          low,    high,   high,   low},
+       {in_A,  in_B,   out_A,          low,    high,   high,   low},
+       {in_A,  out_B,  out_A,          low,    low,    high,   high},
+       {in_B,  off,    out_A,          low,    high,   high,   low},
+       {in_B,  in_A,   out_A,          low,    high,   high,   low},
+       {out_B, off,    out_A,          low,    low,    high,   high},
+       {out_B, in_A,   out_A,          low,    low,    high,   high},
+       {off,   off,    in_B,           high,   high,   low,    high},
+       {off,   in_A,   in_B,           high,   high,   low,    low},
+       {off,   out_A,  in_B,           high,   low,    low,    high},
+       {off,   out_B,  in_B,           high,   low,    low,    high},
+       {in_A,  off,    in_B,           high,   high,   low,    low},
+       {in_A,  out_B,  in_B,           high,   low,    low,    high},
+       {out_A, off,    in_B,           high,   low,    low,    high},
+       {out_B, off,    in_B,           high,   low,    low,    high},
+       {out_B, in_A,   in_B,           high,   low,    low,    high},
+       {off,   off,    out_B,          low,    high,   high,   high},
+       {off,   in_A,   out_B,          low,    high,   high,   low},
+       {off,   out_A,  out_B,          low,    low,    high,   high},
+       {off,   in_B,   out_B,          low,    high,   high,   low},
+       {in_A,  off,    out_B,          low,    high,   high,   low},
+       {in_A,  in_B,   out_B,          low,    high,   high,   low},
+       {out_A, off,    out_B,          low,    low,    high,   high},
+       {out_A, in_B,   out_B,          low,    low,    high,   high},
+       {in_B,  off,    out_B,          low,    high,   high,   low},
+       {in_B,  in_A,   out_B,          low,    high,   high,   low},
+       {in_B,  out_A,  out_B,          low,    low,    high,   high},
+       {end,   end,    end,    led_end, led_end, led_end, led_end}
+};
+
+static int i40e_ptp_set_pins(struct i40e_pf *pf,
+                            struct i40e_ptp_pins_settings *pins);
+
+/**
+ * i40e_ptp_extts0_work - workqueue task function
+ * @work: workqueue task structure
+ *
+ * Service for PTP external clock event
+ **/
+static void i40e_ptp_extts0_work(struct work_struct *work)
+{
+       struct i40e_pf *pf = container_of(work, struct i40e_pf,
+                                         ptp_extts0_work);
+       struct i40e_hw *hw = &pf->hw;
+       struct ptp_clock_event event;
+       u32 hi, lo;
+
+       /* Event time is captured by one of the two matched registers
+        *      PRTTSYN_EVNT_L: 32 LSB of sampled time event
+        *      PRTTSYN_EVNT_H: 32 MSB of sampled time event
+        * Event is defined in PRTTSYN_EVNT_0 register
+        */
+       lo = rd32(hw, I40E_PRTTSYN_EVNT_L(0));
+       hi = rd32(hw, I40E_PRTTSYN_EVNT_H(0));
+
+       event.timestamp = (((u64)hi) << 32) | lo;
+
+       event.type = PTP_CLOCK_EXTTS;
+       event.index = hw->pf_id;
+
+       /* fire event */
+       ptp_clock_event(pf->ptp_clock, &event);
+}
+
+/**
+ * i40e_is_ptp_pin_dev - check if device supports PTP pins
+ * @hw: pointer to the hardware structure
+ *
+ * Return true if device supports PTP pins, false otherwise.
+ **/
+static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw)
+{
+       return hw->device_id == I40E_DEV_ID_25G_SFP28 &&
+              hw->subsystem_device_id == I40E_SUBDEV_ID_25G_PTP_PIN;
+}
+
+/**
+ * i40e_can_set_pins - check possibility of manipulating the pins
+ * @pf: board private structure
+ *
+ * Check if all conditions are satisfied to manipulate PTP pins.
+ * Return CAN_SET_PINS if pins can be set on a specific PF or
+ * return CAN_DO_PINS if pins can be manipulated within a NIC or
+ * return CANT_DO_PINS otherwise.
+ **/
+static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf)
+{
+       if (!i40e_is_ptp_pin_dev(&pf->hw)) {
+               dev_warn(&pf->pdev->dev,
+                        "PTP external clock not supported.\n");
+               return CANT_DO_PINS;
+       }
+
+       if (!pf->ptp_pins) {
+               dev_warn(&pf->pdev->dev,
+                        "PTP PIN manipulation not allowed.\n");
+               return CANT_DO_PINS;
+       }
+
+       if (pf->hw.pf_id) {
+               dev_warn(&pf->pdev->dev,
+                        "PTP PINs should be accessed via PF0.\n");
+               return CAN_DO_PINS;
+       }
+
+       return CAN_SET_PINS;
+}
+
+/**
+ * i40_ptp_reset_timing_events - Reset PTP timing events
+ * @pf: Board private structure
+ *
+ * This function resets timing events for pf.
+ **/
+static void i40_ptp_reset_timing_events(struct i40e_pf *pf)
+{
+       u32 i;
+
+       spin_lock_bh(&pf->ptp_rx_lock);
+       for (i = 0; i <= I40E_PRTTSYN_RXTIME_L_MAX_INDEX; i++) {
+               /* reading and automatically clearing timing events registers */
+               rd32(&pf->hw, I40E_PRTTSYN_RXTIME_L(i));
+               rd32(&pf->hw, I40E_PRTTSYN_RXTIME_H(i));
+               pf->latch_events[i] = 0;
+       }
+       /* reading and automatically clearing timing events registers */
+       rd32(&pf->hw, I40E_PRTTSYN_TXTIME_L);
+       rd32(&pf->hw, I40E_PRTTSYN_TXTIME_H);
+
+       pf->tx_hwtstamp_timeouts = 0;
+       pf->tx_hwtstamp_skipped = 0;
+       pf->rx_hwtstamp_cleared = 0;
+       pf->latch_event_flags = 0;
+       spin_unlock_bh(&pf->ptp_rx_lock);
+}
+
+/**
+ * i40e_ptp_verify - check pins
+ * @ptp: ptp clock
+ * @pin: pin index
+ * @func: assigned function
+ * @chan: channel
+ *
+ * Check pins consistency.
+ * Return 0 on success or error on failure.
+ **/
+static int i40e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+                          enum ptp_pin_function func, unsigned int chan)
+{
+       switch (func) {
+       case PTP_PF_NONE:
+       case PTP_PF_EXTTS:
+       case PTP_PF_PEROUT:
+               break;
+       case PTP_PF_PHYSYNC:
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
 
 /**
  * i40e_ptp_read - Read the PHC time from the device
@@ -136,6 +379,37 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
        return 0;
 }
 
+/**
+ * i40e_ptp_set_1pps_signal_hw - configure 1PPS PTP signal for pins
+ * @pf: the PF private data structure
+ *
+ * Configure 1PPS signal used for PTP pins
+ **/
+static void i40e_ptp_set_1pps_signal_hw(struct i40e_pf *pf)
+{
+       struct i40e_hw *hw = &pf->hw;
+       struct timespec64 now;
+       u64 ns;
+
+       wr32(hw, I40E_PRTTSYN_AUX_0(1), 0);
+       wr32(hw, I40E_PRTTSYN_AUX_1(1), I40E_PRTTSYN_AUX_1_INSTNT);
+       wr32(hw, I40E_PRTTSYN_AUX_0(1), I40E_PRTTSYN_AUX_0_OUT_ENABLE);
+
+       i40e_ptp_read(pf, &now, NULL);
+       now.tv_sec += I40E_PTP_2_SEC_DELAY;
+       now.tv_nsec = 0;
+       ns = timespec64_to_ns(&now);
+
+       /* I40E_PRTTSYN_TGT_L(1) */
+       wr32(hw, I40E_PRTTSYN_TGT_L(1), ns & 0xFFFFFFFF);
+       /* I40E_PRTTSYN_TGT_H(1) */
+       wr32(hw, I40E_PRTTSYN_TGT_H(1), ns >> 32);
+       wr32(hw, I40E_PRTTSYN_CLKO(1), I40E_PTP_HALF_SECOND);
+       wr32(hw, I40E_PRTTSYN_AUX_1(1), I40E_PRTTSYN_AUX_1_INSTNT);
+       wr32(hw, I40E_PRTTSYN_AUX_0(1),
+            I40E_PRTTSYN_AUX_0_OUT_ENABLE_CLK_MOD);
+}
+
 /**
  * i40e_ptp_adjtime - Adjust the PHC time
  * @ptp: The PTP clock structure
@@ -146,14 +420,35 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
        struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
-       struct timespec64 now, then;
+       struct i40e_hw *hw = &pf->hw;
 
-       then = ns_to_timespec64(delta);
        mutex_lock(&pf->tmreg_lock);
 
-       i40e_ptp_read(pf, &now, NULL);
-       now = timespec64_add(now, then);
-       i40e_ptp_write(pf, (const struct timespec64 *)&now);
+       if (delta > -999999900LL && delta < 999999900LL) {
+               int neg_adj = 0;
+               u32 timadj;
+               u64 tohw;
+
+               if (delta < 0) {
+                       neg_adj = 1;
+                       tohw = -delta;
+               } else {
+                       tohw = delta;
+               }
+
+               timadj = tohw & 0x3FFFFFFF;
+               if (neg_adj)
+                       timadj |= I40E_ISGN;
+               wr32(hw, I40E_PRTTSYN_ADJ, timadj);
+       } else {
+               struct timespec64 then, now;
+
+               then = ns_to_timespec64(delta);
+               i40e_ptp_read(pf, &now, NULL);
+               now = timespec64_add(now, then);
+               i40e_ptp_write(pf, (const struct timespec64 *)&now);
+               i40e_ptp_set_1pps_signal_hw(pf);
+       }
 
        mutex_unlock(&pf->tmreg_lock);
 
@@ -184,7 +479,7 @@ static int i40e_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
 /**
  * i40e_ptp_settime - Set the time of the PHC
  * @ptp: The PTP clock structure
- * @ts: timespec structure that holds the new time value
+ * @ts: timespec64 structure that holds the new time value
  *
  * Set the device clock to the user input value. The conversion from timespec
  * to ns happens in the write function.
@@ -202,18 +497,145 @@ static int i40e_ptp_settime(struct ptp_clock_info *ptp,
 }
 
 /**
- * i40e_ptp_feature_enable - Enable/disable ancillary features of the PHC subsystem
+ * i40e_pps_configure - configure PPS events
+ * @ptp: ptp clock
+ * @rq: clock request
+ * @on: status
+ *
+ * Configure PPS events for external clock source.
+ * Return 0 on success or error on failure.
+ **/
+static int i40e_pps_configure(struct ptp_clock_info *ptp,
+                             struct ptp_clock_request *rq,
+                             int on)
+{
+       struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
+
+       if (!!on)
+               i40e_ptp_set_1pps_signal_hw(pf);
+
+       return 0;
+}
+
+/**
+ * i40e_pin_state - determine PIN state
+ * @index: PIN index
+ * @func: function assigned to PIN
+ *
+ * Determine PIN state based on PIN index and function assigned.
+ * Return PIN state.
+ **/
+static enum i40e_ptp_gpio_pin_state i40e_pin_state(int index, int func)
+{
+       enum i40e_ptp_gpio_pin_state state = off;
+
+       if (index == 0 && func == PTP_PF_EXTTS)
+               state = in_A;
+       if (index == 1 && func == PTP_PF_EXTTS)
+               state = in_B;
+       if (index == 0 && func == PTP_PF_PEROUT)
+               state = out_A;
+       if (index == 1 && func == PTP_PF_PEROUT)
+               state = out_B;
+
+       return state;
+}
+
+/**
+ * i40e_ptp_enable_pin - enable PINs.
+ * @pf: private board structure
+ * @chan: channel
+ * @func: PIN function
+ * @on: state
+ *
+ * Enable PTP pins for external clock source.
+ * Return 0 on success or error code on failure.
+ **/
+static int i40e_ptp_enable_pin(struct i40e_pf *pf, unsigned int chan,
+                              enum ptp_pin_function func, int on)
+{
+       enum i40e_ptp_gpio_pin_state *pin = NULL;
+       struct i40e_ptp_pins_settings pins;
+       int pin_index;
+
+       /* Use PF0 to set pins. Return success for user space tools */
+       if (pf->hw.pf_id)
+               return 0;
+
+       /* Preserve previous state of pins that we don't touch */
+       pins.sdp3_2 = pf->ptp_pins->sdp3_2;
+       pins.sdp3_3 = pf->ptp_pins->sdp3_3;
+       pins.gpio_4 = pf->ptp_pins->gpio_4;
+
+       /* To turn on the pin - find the corresponding one based on
+        * the given index. To to turn the function off - find
+        * which pin had it assigned. Don't use ptp_find_pin here
+        * because it tries to lock the pincfg_mux which is locked by
+        * ptp_pin_store() that calls here.
+        */
+       if (on) {
+               pin_index = ptp_find_pin(pf->ptp_clock, func, chan);
+               if (pin_index < 0)
+                       return -EBUSY;
+
+               switch (pin_index) {
+               case SDP3_2:
+                       pin = &pins.sdp3_2;
+                       break;
+               case SDP3_3:
+                       pin = &pins.sdp3_3;
+                       break;
+               case GPIO_4:
+                       pin = &pins.gpio_4;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
+               *pin = i40e_pin_state(chan, func);
+       } else {
+               pins.sdp3_2 = off;
+               pins.sdp3_3 = off;
+               pins.gpio_4 = off;
+       }
+
+       return i40e_ptp_set_pins(pf, &pins) ? -EINVAL : 0;
+}
+
+/**
+ * i40e_ptp_feature_enable - Enable external clock pins
  * @ptp: The PTP clock structure
- * @rq: The requested feature to change
- * @on: Enable/disable flag
+ * @rq: The PTP clock request structure
+ * @on: To turn feature on/off
  *
- * The XL710 does not support any of the ancillary features of the PHC
- * subsystem, so this function may just return.
+ * Setting on/off PTP PPS feature for pin.
  **/
 static int i40e_ptp_feature_enable(struct ptp_clock_info *ptp,
-                                  struct ptp_clock_request *rq, int on)
+                                  struct ptp_clock_request *rq,
+                                  int on)
 {
-       return -EOPNOTSUPP;
+       struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
+
+       enum ptp_pin_function func;
+       unsigned int chan;
+
+       /* TODO: Implement flags handling for EXTTS and PEROUT */
+       switch (rq->type) {
+       case PTP_CLK_REQ_EXTTS:
+               func = PTP_PF_EXTTS;
+               chan = rq->extts.index;
+               break;
+       case PTP_CLK_REQ_PEROUT:
+               func = PTP_PF_PEROUT;
+               chan = rq->perout.index;
+               break;
+       case PTP_CLK_REQ_PPS:
+               return i40e_pps_configure(ptp, rq, on);
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return i40e_ptp_enable_pin(pf, chan, func, on);
 }
 
 /**
@@ -527,6 +949,229 @@ int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr)
                -EFAULT : 0;
 }
 
+/**
+ * i40e_ptp_free_pins - free memory used by PTP pins
+ * @pf: Board private structure
+ *
+ * Release memory allocated for PTP pins.
+ **/
+static void i40e_ptp_free_pins(struct i40e_pf *pf)
+{
+       if (i40e_is_ptp_pin_dev(&pf->hw)) {
+               kfree(pf->ptp_pins);
+               kfree(pf->ptp_caps.pin_config);
+               pf->ptp_pins = NULL;
+       }
+}
+
+/**
+ * i40e_ptp_set_pin_hw - Set HW GPIO pin
+ * @hw: pointer to the hardware structure
+ * @pin: pin index
+ * @state: pin state
+ *
+ * Set status of GPIO pin for external clock handling.
+ **/
+static void i40e_ptp_set_pin_hw(struct i40e_hw *hw,
+                               unsigned int pin,
+                               enum i40e_ptp_gpio_pin_state state)
+{
+       switch (state) {
+       case off:
+               wr32(hw, I40E_GLGEN_GPIO_CTL(pin), 0);
+               break;
+       case in_A:
+               wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+                    I40E_GLGEN_GPIO_CTL_PORT_0_IN_TIMESYNC_0);
+               break;
+       case in_B:
+               wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+                    I40E_GLGEN_GPIO_CTL_PORT_1_IN_TIMESYNC_0);
+               break;
+       case out_A:
+               wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+                    I40E_GLGEN_GPIO_CTL_PORT_0_OUT_TIMESYNC_1);
+               break;
+       case out_B:
+               wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+                    I40E_GLGEN_GPIO_CTL_PORT_1_OUT_TIMESYNC_1);
+               break;
+       default:
+               break;
+       }
+}
+
+/**
+ * i40e_ptp_set_led_hw - Set HW GPIO led
+ * @hw: pointer to the hardware structure
+ * @led: led index
+ * @state: led state
+ *
+ * Set status of GPIO led for external clock handling.
+ **/
+static void i40e_ptp_set_led_hw(struct i40e_hw *hw,
+                               unsigned int led,
+                               enum i40e_ptp_led_pin_state state)
+{
+       switch (state) {
+       case low:
+               wr32(hw, I40E_GLGEN_GPIO_SET,
+                    I40E_GLGEN_GPIO_SET_DRV_SDP_DATA | led);
+               break;
+       case high:
+               wr32(hw, I40E_GLGEN_GPIO_SET,
+                    I40E_GLGEN_GPIO_SET_DRV_SDP_DATA |
+                    I40E_GLGEN_GPIO_SET_SDP_DATA_HI | led);
+               break;
+       default:
+               break;
+       }
+}
+
+/**
+ * i40e_ptp_init_leds_hw - init LEDs
+ * @hw: pointer to a hardware structure
+ *
+ * Set initial state of LEDs
+ **/
+static void i40e_ptp_init_leds_hw(struct i40e_hw *hw)
+{
+       wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED2_0),
+            I40E_GLGEN_GPIO_CTL_LED_INIT);
+       wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED2_1),
+            I40E_GLGEN_GPIO_CTL_LED_INIT);
+       wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED3_0),
+            I40E_GLGEN_GPIO_CTL_LED_INIT);
+       wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED3_1),
+            I40E_GLGEN_GPIO_CTL_LED_INIT);
+}
+
+/**
+ * i40e_ptp_set_pins_hw - Set HW GPIO pins
+ * @pf: Board private structure
+ *
+ * This function sets GPIO pins for PTP
+ **/
+static void i40e_ptp_set_pins_hw(struct i40e_pf *pf)
+{
+       const struct i40e_ptp_pins_settings *pins = pf->ptp_pins;
+       struct i40e_hw *hw = &pf->hw;
+
+       /* pin must be disabled before it may be used */
+       i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, off);
+       i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, off);
+       i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, off);
+
+       i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, pins->sdp3_2);
+       i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, pins->sdp3_3);
+       i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, pins->gpio_4);
+
+       i40e_ptp_set_led_hw(hw, I40E_LED2_0, pins->led2_0);
+       i40e_ptp_set_led_hw(hw, I40E_LED2_1, pins->led2_1);
+       i40e_ptp_set_led_hw(hw, I40E_LED3_0, pins->led3_0);
+       i40e_ptp_set_led_hw(hw, I40E_LED3_1, pins->led3_1);
+
+       dev_info(&pf->pdev->dev,
+                "PTP configuration set to: SDP3_2: %s,  SDP3_3: %s,  GPIO_4: %s.\n",
+                i40e_ptp_gpio_pin_state2str[pins->sdp3_2],
+                i40e_ptp_gpio_pin_state2str[pins->sdp3_3],
+                i40e_ptp_gpio_pin_state2str[pins->gpio_4]);
+}
+
+/**
+ * i40e_ptp_set_pins - set PTP pins in HW
+ * @pf: Board private structure
+ * @pins: PTP pins to be applied
+ *
+ * Validate and set PTP pins in HW for specific PF.
+ * Return 0 on success or negative value on error.
+ **/
+static int i40e_ptp_set_pins(struct i40e_pf *pf,
+                            struct i40e_ptp_pins_settings *pins)
+{
+       enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf);
+       int i = 0;
+
+       if (pin_caps == CANT_DO_PINS)
+               return -EOPNOTSUPP;
+       else if (pin_caps == CAN_DO_PINS)
+               return 0;
+
+       if (pins->sdp3_2 == invalid)
+               pins->sdp3_2 = pf->ptp_pins->sdp3_2;
+       if (pins->sdp3_3 == invalid)
+               pins->sdp3_3 = pf->ptp_pins->sdp3_3;
+       if (pins->gpio_4 == invalid)
+               pins->gpio_4 = pf->ptp_pins->gpio_4;
+       while (i40e_ptp_pin_led_allowed_states[i].sdp3_2 != end) {
+               if (pins->sdp3_2 == i40e_ptp_pin_led_allowed_states[i].sdp3_2 &&
+                   pins->sdp3_3 == i40e_ptp_pin_led_allowed_states[i].sdp3_3 &&
+                   pins->gpio_4 == i40e_ptp_pin_led_allowed_states[i].gpio_4) {
+                       pins->led2_0 =
+                               i40e_ptp_pin_led_allowed_states[i].led2_0;
+                       pins->led2_1 =
+                               i40e_ptp_pin_led_allowed_states[i].led2_1;
+                       pins->led3_0 =
+                               i40e_ptp_pin_led_allowed_states[i].led3_0;
+                       pins->led3_1 =
+                               i40e_ptp_pin_led_allowed_states[i].led3_1;
+                       break;
+               }
+               i++;
+       }
+       if (i40e_ptp_pin_led_allowed_states[i].sdp3_2 == end) {
+               dev_warn(&pf->pdev->dev,
+                        "Unsupported PTP pin configuration: SDP3_2: %s,  SDP3_3: %s,  GPIO_4: %s.\n",
+                        i40e_ptp_gpio_pin_state2str[pins->sdp3_2],
+                        i40e_ptp_gpio_pin_state2str[pins->sdp3_3],
+                        i40e_ptp_gpio_pin_state2str[pins->gpio_4]);
+
+               return -EPERM;
+       }
+       memcpy(pf->ptp_pins, pins, sizeof(*pins));
+       i40e_ptp_set_pins_hw(pf);
+       i40_ptp_reset_timing_events(pf);
+
+       return 0;
+}
+
+/**
+ * i40e_ptp_alloc_pins - allocate PTP pins structure
+ * @pf: Board private structure
+ *
+ * allocate PTP pins structure
+ **/
+int i40e_ptp_alloc_pins(struct i40e_pf *pf)
+{
+       if (!i40e_is_ptp_pin_dev(&pf->hw))
+               return 0;
+
+       pf->ptp_pins =
+               kzalloc(sizeof(struct i40e_ptp_pins_settings), GFP_KERNEL);
+
+       if (!pf->ptp_pins) {
+               dev_warn(&pf->pdev->dev, "Cannot allocate memory for PTP pins structure.\n");
+               return -I40E_ERR_NO_MEMORY;
+       }
+
+       pf->ptp_pins->sdp3_2 = off;
+       pf->ptp_pins->sdp3_3 = off;
+       pf->ptp_pins->gpio_4 = off;
+       pf->ptp_pins->led2_0 = high;
+       pf->ptp_pins->led2_1 = high;
+       pf->ptp_pins->led3_0 = high;
+       pf->ptp_pins->led3_1 = high;
+
+       /* Use PF0 to set pins in HW. Return success for user space tools */
+       if (pf->hw.pf_id)
+               return 0;
+
+       i40e_ptp_init_leds_hw(&pf->hw);
+       i40e_ptp_set_pins_hw(pf);
+
+       return 0;
+}
+
 /**
  * i40e_ptp_set_timestamp_mode - setup hardware for requested timestamp mode
  * @pf: Board private structure
@@ -545,6 +1190,21 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
        struct i40e_hw *hw = &pf->hw;
        u32 tsyntype, regval;
 
+       /* Selects external trigger to cause event */
+       regval = rd32(hw, I40E_PRTTSYN_AUX_0(0));
+       /* Bit 17:16 is EVNTLVL, 01B rising edge */
+       regval &= 0;
+       regval |= (1 << I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT);
+       /* regval: 0001 0000 0000 0000 0000 */
+       wr32(hw, I40E_PRTTSYN_AUX_0(0), regval);
+
+       /* Enabel interrupts */
+       regval = rd32(hw, I40E_PRTTSYN_CTL0);
+       regval |= 1 << I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT;
+       wr32(hw, I40E_PRTTSYN_CTL0, regval);
+
+       INIT_WORK(&pf->ptp_extts0_work, i40e_ptp_extts0_work);
+
        /* Reserved for future extensions. */
        if (config->flags)
                return -EINVAL;
@@ -687,6 +1347,45 @@ int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr)
                -EFAULT : 0;
 }
 
+/**
+ * i40e_init_pin_config - initialize pins.
+ * @pf: private board structure
+ *
+ * Initialize pins for external clock source.
+ * Return 0 on success or error code on failure.
+ **/
+static int i40e_init_pin_config(struct i40e_pf *pf)
+{
+       int i;
+
+       pf->ptp_caps.n_pins = 3;
+       pf->ptp_caps.n_ext_ts = 2;
+       pf->ptp_caps.pps = 1;
+       pf->ptp_caps.n_per_out = 2;
+
+       pf->ptp_caps.pin_config = kcalloc(pf->ptp_caps.n_pins,
+                                         sizeof(*pf->ptp_caps.pin_config),
+                                         GFP_KERNEL);
+       if (!pf->ptp_caps.pin_config)
+               return -ENOMEM;
+
+       for (i = 0; i < pf->ptp_caps.n_pins; i++) {
+               snprintf(pf->ptp_caps.pin_config[i].name,
+                        sizeof(pf->ptp_caps.pin_config[i].name),
+                        "%s", sdp_desc[i].name);
+               pf->ptp_caps.pin_config[i].index = sdp_desc[i].index;
+               pf->ptp_caps.pin_config[i].func = PTP_PF_NONE;
+               pf->ptp_caps.pin_config[i].chan = sdp_desc[i].chan;
+       }
+
+       pf->ptp_caps.verify = i40e_ptp_verify;
+       pf->ptp_caps.enable = i40e_ptp_feature_enable;
+
+       pf->ptp_caps.pps = 1;
+
+       return 0;
+}
+
 /**
  * i40e_ptp_create_clock - Create PTP clock device for userspace
  * @pf: Board private structure
@@ -707,13 +1406,16 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf)
                sizeof(pf->ptp_caps.name) - 1);
        pf->ptp_caps.owner = THIS_MODULE;
        pf->ptp_caps.max_adj = 999999999;
-       pf->ptp_caps.n_ext_ts = 0;
-       pf->ptp_caps.pps = 0;
        pf->ptp_caps.adjfreq = i40e_ptp_adjfreq;
        pf->ptp_caps.adjtime = i40e_ptp_adjtime;
        pf->ptp_caps.gettimex64 = i40e_ptp_gettimex;
        pf->ptp_caps.settime64 = i40e_ptp_settime;
-       pf->ptp_caps.enable = i40e_ptp_feature_enable;
+       if (i40e_is_ptp_pin_dev(&pf->hw)) {
+               int err = i40e_init_pin_config(pf);
+
+               if (err)
+                       return err;
+       }
 
        /* Attempt to register the clock before enabling the hardware. */
        pf->ptp_clock = ptp_clock_register(&pf->ptp_caps, &pf->pdev->dev);
@@ -843,6 +1545,8 @@ void i40e_ptp_init(struct i40e_pf *pf)
                /* Restore the clock time based on last known value */
                i40e_ptp_restore_hw_time(pf);
        }
+
+       i40e_ptp_set_1pps_signal_hw(pf);
 }
 
 /**
@@ -854,6 +1558,9 @@ void i40e_ptp_init(struct i40e_pf *pf)
  **/
 void i40e_ptp_stop(struct i40e_pf *pf)
 {
+       struct i40e_hw *hw = &pf->hw;
+       u32 regval;
+
        pf->flags &= ~I40E_FLAG_PTP;
        pf->ptp_tx = false;
        pf->ptp_rx = false;
@@ -872,4 +1579,21 @@ void i40e_ptp_stop(struct i40e_pf *pf)
                dev_info(&pf->pdev->dev, "%s: removed PHC on %s\n", __func__,
                         pf->vsi[pf->lan_vsi]->netdev->name);
        }
+
+       if (i40e_is_ptp_pin_dev(&pf->hw)) {
+               i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, off);
+               i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, off);
+               i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, off);
+       }
+
+       regval = rd32(hw, I40E_PRTTSYN_AUX_0(0));
+       regval &= ~I40E_PRTTSYN_AUX_0_PTPFLAG_MASK;
+       wr32(hw, I40E_PRTTSYN_AUX_0(0), regval);
+
+       /* Disable interrupts */
+       regval = rd32(hw, I40E_PRTTSYN_CTL0);
+       regval &= ~I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK;
+       wr32(hw, I40E_PRTTSYN_CTL0, regval);
+
+       i40e_ptp_free_pins(pf);
 }
index 36f7b27..8d0588a 100644 (file)
 #define I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK I40E_MASK(0x3, I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT)
 #define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT 3
 #define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT 4
+#define I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT 5
+#define I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT 6
 #define I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT 7
 #define I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK I40E_MASK(0x7, I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT)
 #define I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT 11
 #define I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT 12
 #define I40E_GLGEN_GPIO_CTL_LED_MODE_MASK I40E_MASK(0x1F, I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT  19
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_MASK   I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT 20
+#define I40E_GLGEN_GPIO_SET 0x00088184 /* Reset: POR */
+#define I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT 5
+#define I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT 6
 #define I40E_GLGEN_MDIO_I2C_SEL(_i) (0x000881C0 + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
 #define I40E_GLGEN_MSCA(_i) (0x0008818C + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
 #define I40E_GLGEN_MSCA_MDIADD_SHIFT 0
 #define I40E_PF_PCI_CIAA_VF_NUM_SHIFT 12
 #define I40E_PF_PCI_CIAD 0x0009C100 /* Reset: FLR */
 #define I40E_PRTPM_EEE_STAT 0x001E4320 /* Reset: GLOBR */
+#define I40E_PFPCI_SUBSYSID 0x000BE100 /* Reset: PCIR */
 #define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT 30
 #define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK I40E_MASK(0x1, I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT)
 #define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT 31
 #define I40E_PRTTSYN_CTL0 0x001E4200 /* Reset: GLOBR */
 #define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT 1
 #define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT 2
+#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT)
 #define I40E_PRTTSYN_CTL0_PF_ID_SHIFT 8
 #define I40E_PRTTSYN_CTL0_PF_ID_MASK I40E_MASK(0xF, I40E_PRTTSYN_CTL0_PF_ID_SHIFT)
 #define I40E_PRTTSYN_CTL0_TSYNENA_SHIFT 31
 #define I40E_PRTTSYN_INC_L 0x001E4040 /* Reset: GLOBR */
 #define I40E_PRTTSYN_RXTIME_H(_i) (0x00085040 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */
 #define I40E_PRTTSYN_RXTIME_L(_i) (0x000850C0 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_PRTTSYN_RXTIME_L_MAX_INDEX 3
 #define I40E_PRTTSYN_STAT_0 0x001E4220 /* Reset: GLOBR */
+#define I40E_PRTTSYN_STAT_0_EVENT0_SHIFT 0
+#define I40E_PRTTSYN_STAT_0_EVENT0_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_EVENT0_SHIFT)
 #define I40E_PRTTSYN_STAT_0_TXTIME_SHIFT 4
 #define I40E_PRTTSYN_STAT_0_TXTIME_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_TXTIME_SHIFT)
 #define I40E_PRTTSYN_STAT_1 0x00085140 /* Reset: CORER */
 #define I40E_PRTTSYN_TIME_L 0x001E4100 /* Reset: GLOBR */
 #define I40E_PRTTSYN_TXTIME_H 0x001E41E0 /* Reset: GLOBR */
 #define I40E_PRTTSYN_TXTIME_L 0x001E41C0 /* Reset: GLOBR */
+#define I40E_PRTTSYN_EVNT_H(_i) (0x001E40C0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_EVNT_L(_i) (0x001E4080 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_AUX_0(_i) (0x001E42A0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT 0
+#define I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT 1
+#define I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT 16
+#define I40E_PRTTSYN_AUX_0_PTPFLAG_SHIFT 17
+#define I40E_PRTTSYN_AUX_0_PTPFLAG_MASK I40E_MASK(0x1, I40E_PRTTSYN_AUX_0_PTPFLAG_SHIFT)
+#define I40E_PRTTSYN_AUX_1(_i) (0x001E42E0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_AUX_1_INSTNT_SHIFT 0
+#define I40E_PRTTSYN_TGT_H(_i) (0x001E4180 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_TGT_L(_i) (0x001E4140 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_CLKO(_i) (0x001E4240 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_ADJ 0x001E4280 /* Reset: GLOBR */
 #define I40E_GL_MDET_RX 0x0012A510 /* Reset: CORER */
 #define I40E_GL_MDET_RX_FUNCTION_SHIFT 0
 #define I40E_GL_MDET_RX_FUNCTION_MASK I40E_MASK(0xFF, I40E_GL_MDET_RX_FUNCTION_SHIFT)
index eff0a30..472f56b 100644 (file)
@@ -1160,12 +1160,12 @@ static int i40e_quiesce_vf_pci(struct i40e_vf *vf)
 }
 
 /**
- * i40e_getnum_vf_vsi_vlan_filters
+ * __i40e_getnum_vf_vsi_vlan_filters
  * @vsi: pointer to the vsi
  *
  * called to get the number of VLANs offloaded on this VF
  **/
-static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
+static int __i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
 {
        struct i40e_mac_filter *f;
        u16 num_vlans = 0, bkt;
@@ -1178,6 +1178,23 @@ static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
        return num_vlans;
 }
 
+/**
+ * i40e_getnum_vf_vsi_vlan_filters
+ * @vsi: pointer to the vsi
+ *
+ * wrapper for __i40e_getnum_vf_vsi_vlan_filters() with spinlock held
+ **/
+static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
+{
+       int num_vlans;
+
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
+       num_vlans = __i40e_getnum_vf_vsi_vlan_filters(vsi);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+       return num_vlans;
+}
+
 /**
  * i40e_get_vlan_list_sync
  * @vsi: pointer to the VSI
@@ -1195,7 +1212,7 @@ static void i40e_get_vlan_list_sync(struct i40e_vsi *vsi, u16 *num_vlans,
        int bkt;
 
        spin_lock_bh(&vsi->mac_filter_hash_lock);
-       *num_vlans = i40e_getnum_vf_vsi_vlan_filters(vsi);
+       *num_vlans = __i40e_getnum_vf_vsi_vlan_filters(vsi);
        *vlan_list = kcalloc(*num_vlans, sizeof(**vlan_list), GFP_ATOMIC);
        if (!(*vlan_list))
                goto err;
index 90793b3..68c80f0 100644 (file)
@@ -186,12 +186,6 @@ enum iavf_state_t {
        __IAVF_RUNNING,         /* opened, working */
 };
 
-enum iavf_critical_section_t {
-       __IAVF_IN_CRITICAL_TASK,        /* cannot be interrupted */
-       __IAVF_IN_CLIENT_TASK,
-       __IAVF_IN_REMOVE_TASK,  /* device being removed */
-};
-
 #define IAVF_CLOUD_FIELD_OMAC          0x01
 #define IAVF_CLOUD_FIELD_IMAC          0x02
 #define IAVF_CLOUD_FIELD_IVLAN 0x04
@@ -236,6 +230,9 @@ struct iavf_adapter {
        struct iavf_q_vector *q_vectors;
        struct list_head vlan_filter_list;
        struct list_head mac_filter_list;
+       struct mutex crit_lock;
+       struct mutex client_lock;
+       struct mutex remove_lock;
        /* Lock to protect accesses to MAC and VLAN lists */
        spinlock_t mac_vlan_list_lock;
        char misc_vector_name[IFNAMSIZ + 9];
index af43fbd..5a359a0 100644 (file)
@@ -685,6 +685,8 @@ static int __iavf_get_coalesce(struct net_device *netdev,
  * iavf_get_coalesce - Get interrupt coalescing settings
  * @netdev: network interface device structure
  * @ec: ethtool coalesce structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
  *
  * Returns current coalescing settings. This is referred to elsewhere in the
  * driver as Interrupt Throttle Rate, as this is how the hardware describes
@@ -692,7 +694,9 @@ static int __iavf_get_coalesce(struct net_device *netdev,
  * only represents the settings of queue 0.
  **/
 static int iavf_get_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *ec)
+                            struct ethtool_coalesce *ec,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        return __iavf_get_coalesce(netdev, ec, -1);
 }
@@ -804,11 +808,15 @@ static int __iavf_set_coalesce(struct net_device *netdev,
  * iavf_set_coalesce - Set interrupt coalescing settings
  * @netdev: network interface device structure
  * @ec: ethtool coalesce structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
  *
  * Change current coalescing settings for every queue.
  **/
 static int iavf_set_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *ec)
+                            struct ethtool_coalesce *ec,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        return __iavf_set_coalesce(netdev, ec, -1);
 }
@@ -1352,8 +1360,7 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
        if (!fltr)
                return -ENOMEM;
 
-       while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
-                               &adapter->crit_section)) {
+       while (!mutex_trylock(&adapter->crit_lock)) {
                if (--count == 0) {
                        kfree(fltr);
                        return -EINVAL;
@@ -1378,7 +1385,7 @@ ret:
        if (err && fltr)
                kfree(fltr);
 
-       clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->crit_lock);
        return err;
 }
 
@@ -1563,8 +1570,7 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter,
                return -EINVAL;
        }
 
-       while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
-                               &adapter->crit_section)) {
+       while (!mutex_trylock(&adapter->crit_lock)) {
                if (--count == 0) {
                        kfree(rss_new);
                        return -EINVAL;
@@ -1600,7 +1606,7 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter,
        if (!err)
                mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
 
-       clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->crit_lock);
 
        if (!rss_new_add)
                kfree(rss_new);
index 606a01c..23762a7 100644 (file)
@@ -131,6 +131,27 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
        return 0;
 }
 
+/**
+ * iavf_lock_timeout - try to lock mutex but give up after timeout
+ * @lock: mutex that should be locked
+ * @msecs: timeout in msecs
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
+{
+       unsigned int wait, delay = 10;
+
+       for (wait = 0; wait < msecs; wait += delay) {
+               if (mutex_trylock(lock))
+                       return 0;
+
+               msleep(delay);
+       }
+
+       return -1;
+}
+
 /**
  * iavf_schedule_reset - Set the flags and schedule a reset event
  * @adapter: board private structure
@@ -1916,7 +1937,7 @@ static void iavf_watchdog_task(struct work_struct *work)
        struct iavf_hw *hw = &adapter->hw;
        u32 reg_val;
 
-       if (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section))
+       if (!mutex_trylock(&adapter->crit_lock))
                goto restart_watchdog;
 
        if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
@@ -1934,8 +1955,7 @@ static void iavf_watchdog_task(struct work_struct *work)
                        adapter->state = __IAVF_STARTUP;
                        adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
                        queue_delayed_work(iavf_wq, &adapter->init_task, 10);
-                       clear_bit(__IAVF_IN_CRITICAL_TASK,
-                                 &adapter->crit_section);
+                       mutex_unlock(&adapter->crit_lock);
                        /* Don't reschedule the watchdog, since we've restarted
                         * the init task. When init_task contacts the PF and
                         * gets everything set up again, it'll restart the
@@ -1945,14 +1965,13 @@ static void iavf_watchdog_task(struct work_struct *work)
                }
                adapter->aq_required = 0;
                adapter->current_op = VIRTCHNL_OP_UNKNOWN;
-               clear_bit(__IAVF_IN_CRITICAL_TASK,
-                         &adapter->crit_section);
+               mutex_unlock(&adapter->crit_lock);
                queue_delayed_work(iavf_wq,
                                   &adapter->watchdog_task,
                                   msecs_to_jiffies(10));
                goto watchdog_done;
        case __IAVF_RESETTING:
-               clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+               mutex_unlock(&adapter->crit_lock);
                queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2);
                return;
        case __IAVF_DOWN:
@@ -1975,7 +1994,7 @@ static void iavf_watchdog_task(struct work_struct *work)
                }
                break;
        case __IAVF_REMOVE:
-               clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+               mutex_unlock(&adapter->crit_lock);
                return;
        default:
                goto restart_watchdog;
@@ -1984,7 +2003,6 @@ static void iavf_watchdog_task(struct work_struct *work)
                /* check for hw reset */
        reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK;
        if (!reg_val) {
-               adapter->state = __IAVF_RESETTING;
                adapter->flags |= IAVF_FLAG_RESET_PENDING;
                adapter->aq_required = 0;
                adapter->current_op = VIRTCHNL_OP_UNKNOWN;
@@ -1998,7 +2016,7 @@ watchdog_done:
        if (adapter->state == __IAVF_RUNNING ||
            adapter->state == __IAVF_COMM_FAILED)
                iavf_detect_recover_hung(&adapter->vsi);
-       clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->crit_lock);
 restart_watchdog:
        if (adapter->aq_required)
                queue_delayed_work(iavf_wq, &adapter->watchdog_task,
@@ -2062,7 +2080,7 @@ static void iavf_disable_vf(struct iavf_adapter *adapter)
        memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE);
        iavf_shutdown_adminq(&adapter->hw);
        adapter->netdev->flags &= ~IFF_UP;
-       clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->crit_lock);
        adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
        adapter->state = __IAVF_DOWN;
        wake_up(&adapter->down_waitqueue);
@@ -2095,11 +2113,14 @@ static void iavf_reset_task(struct work_struct *work)
        /* When device is being removed it doesn't make sense to run the reset
         * task, just return in such a case.
         */
-       if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
+       if (mutex_is_locked(&adapter->remove_lock))
                return;
 
-       while (test_and_set_bit(__IAVF_IN_CLIENT_TASK,
-                               &adapter->crit_section))
+       if (iavf_lock_timeout(&adapter->crit_lock, 200)) {
+               schedule_work(&adapter->reset_task);
+               return;
+       }
+       while (!mutex_trylock(&adapter->client_lock))
                usleep_range(500, 1000);
        if (CLIENT_ENABLED(adapter)) {
                adapter->flags &= ~(IAVF_FLAG_CLIENT_NEEDS_OPEN |
@@ -2151,7 +2172,7 @@ static void iavf_reset_task(struct work_struct *work)
                dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
                        reg_val);
                iavf_disable_vf(adapter);
-               clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
+               mutex_unlock(&adapter->client_lock);
                return; /* Do not attempt to reinit. It's dead, Jim. */
        }
 
@@ -2278,13 +2299,13 @@ continue_reset:
                adapter->state = __IAVF_DOWN;
                wake_up(&adapter->down_waitqueue);
        }
-       clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
-       clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->client_lock);
+       mutex_unlock(&adapter->crit_lock);
 
        return;
 reset_err:
-       clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
-       clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->client_lock);
+       mutex_unlock(&adapter->crit_lock);
        dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
        iavf_close(netdev);
 }
@@ -2312,6 +2333,8 @@ static void iavf_adminq_task(struct work_struct *work)
        if (!event.msg_buf)
                goto out;
 
+       if (iavf_lock_timeout(&adapter->crit_lock, 200))
+               goto freedom;
        do {
                ret = iavf_clean_arq_element(hw, &event, &pending);
                v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
@@ -2325,6 +2348,7 @@ static void iavf_adminq_task(struct work_struct *work)
                if (pending != 0)
                        memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE);
        } while (pending);
+       mutex_unlock(&adapter->crit_lock);
 
        if ((adapter->flags &
             (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
@@ -2391,7 +2415,7 @@ static void iavf_client_task(struct work_struct *work)
         * later.
         */
 
-       if (test_and_set_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section))
+       if (!mutex_trylock(&adapter->client_lock))
                return;
 
        if (adapter->flags & IAVF_FLAG_SERVICE_CLIENT_REQUESTED) {
@@ -2414,7 +2438,7 @@ static void iavf_client_task(struct work_struct *work)
                adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_OPEN;
        }
 out:
-       clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->client_lock);
 }
 
 /**
@@ -3017,8 +3041,7 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter,
        if (!filter)
                return -ENOMEM;
 
-       while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
-                               &adapter->crit_section)) {
+       while (!mutex_trylock(&adapter->crit_lock)) {
                if (--count == 0)
                        goto err;
                udelay(1);
@@ -3049,7 +3072,7 @@ err:
        if (err)
                kfree(filter);
 
-       clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->crit_lock);
        return err;
 }
 
@@ -3196,8 +3219,7 @@ static int iavf_open(struct net_device *netdev)
                return -EIO;
        }
 
-       while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
-                               &adapter->crit_section))
+       while (!mutex_trylock(&adapter->crit_lock))
                usleep_range(500, 1000);
 
        if (adapter->state != __IAVF_DOWN) {
@@ -3232,7 +3254,7 @@ static int iavf_open(struct net_device *netdev)
 
        iavf_irq_enable(adapter, true);
 
-       clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->crit_lock);
 
        return 0;
 
@@ -3244,7 +3266,7 @@ err_setup_rx:
 err_setup_tx:
        iavf_free_all_tx_resources(adapter);
 err_unlock:
-       clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->crit_lock);
 
        return err;
 }
@@ -3268,8 +3290,7 @@ static int iavf_close(struct net_device *netdev)
        if (adapter->state <= __IAVF_DOWN_PENDING)
                return 0;
 
-       while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
-                               &adapter->crit_section))
+       while (!mutex_trylock(&adapter->crit_lock))
                usleep_range(500, 1000);
 
        set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
@@ -3280,7 +3301,7 @@ static int iavf_close(struct net_device *netdev)
        adapter->state = __IAVF_DOWN_PENDING;
        iavf_free_traffic_irqs(adapter);
 
-       clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->crit_lock);
 
        /* We explicitly don't free resources here because the hardware is
         * still active and can DMA into memory. Resources are cleared in
@@ -3629,6 +3650,10 @@ static void iavf_init_task(struct work_struct *work)
                                                    init_task.work);
        struct iavf_hw *hw = &adapter->hw;
 
+       if (iavf_lock_timeout(&adapter->crit_lock, 5000)) {
+               dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__);
+               return;
+       }
        switch (adapter->state) {
        case __IAVF_STARTUP:
                if (iavf_startup(adapter) < 0)
@@ -3641,14 +3666,14 @@ static void iavf_init_task(struct work_struct *work)
        case __IAVF_INIT_GET_RESOURCES:
                if (iavf_init_get_resources(adapter) < 0)
                        goto init_failed;
-               return;
+               goto out;
        default:
                goto init_failed;
        }
 
        queue_delayed_work(iavf_wq, &adapter->init_task,
                           msecs_to_jiffies(30));
-       return;
+       goto out;
 init_failed:
        if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
                dev_err(&adapter->pdev->dev,
@@ -3657,9 +3682,11 @@ init_failed:
                iavf_shutdown_adminq(hw);
                adapter->state = __IAVF_STARTUP;
                queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5);
-               return;
+               goto out;
        }
        queue_delayed_work(iavf_wq, &adapter->init_task, HZ);
+out:
+       mutex_unlock(&adapter->crit_lock);
 }
 
 /**
@@ -3676,9 +3703,12 @@ static void iavf_shutdown(struct pci_dev *pdev)
        if (netif_running(netdev))
                iavf_close(netdev);
 
+       if (iavf_lock_timeout(&adapter->crit_lock, 5000))
+               dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__);
        /* Prevent the watchdog from running. */
        adapter->state = __IAVF_REMOVE;
        adapter->aq_required = 0;
+       mutex_unlock(&adapter->crit_lock);
 
 #ifdef CONFIG_PM
        pci_save_state(pdev);
@@ -3772,6 +3802,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        /* set up the locks for the AQ, do this only once in probe
         * and destroy them only once in remove
         */
+       mutex_init(&adapter->crit_lock);
+       mutex_init(&adapter->client_lock);
+       mutex_init(&adapter->remove_lock);
        mutex_init(&hw->aq.asq_mutex);
        mutex_init(&hw->aq.arq_mutex);
 
@@ -3823,8 +3856,7 @@ static int __maybe_unused iavf_suspend(struct device *dev_d)
 
        netif_device_detach(netdev);
 
-       while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
-                               &adapter->crit_section))
+       while (!mutex_trylock(&adapter->crit_lock))
                usleep_range(500, 1000);
 
        if (netif_running(netdev)) {
@@ -3835,7 +3867,7 @@ static int __maybe_unused iavf_suspend(struct device *dev_d)
        iavf_free_misc_irq(adapter);
        iavf_reset_interrupt_capability(adapter);
 
-       clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+       mutex_unlock(&adapter->crit_lock);
 
        return 0;
 }
@@ -3897,7 +3929,7 @@ static void iavf_remove(struct pci_dev *pdev)
        struct iavf_hw *hw = &adapter->hw;
        int err;
        /* Indicate we are in remove and not to run reset_task */
-       set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section);
+       mutex_lock(&adapter->remove_lock);
        cancel_delayed_work_sync(&adapter->init_task);
        cancel_work_sync(&adapter->reset_task);
        cancel_delayed_work_sync(&adapter->client_task);
@@ -3912,10 +3944,6 @@ static void iavf_remove(struct pci_dev *pdev)
                                 err);
        }
 
-       /* Shut down all the garbage mashers on the detention level */
-       adapter->state = __IAVF_REMOVE;
-       adapter->aq_required = 0;
-       adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
        iavf_request_reset(adapter);
        msleep(50);
        /* If the FW isn't responding, kick it once, but only once. */
@@ -3923,6 +3951,13 @@ static void iavf_remove(struct pci_dev *pdev)
                iavf_request_reset(adapter);
                msleep(50);
        }
+       if (iavf_lock_timeout(&adapter->crit_lock, 5000))
+               dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__);
+
+       /* Shut down all the garbage mashers on the detention level */
+       adapter->state = __IAVF_REMOVE;
+       adapter->aq_required = 0;
+       adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
        iavf_free_all_tx_resources(adapter);
        iavf_free_all_rx_resources(adapter);
        iavf_misc_irq_disable(adapter);
@@ -3942,6 +3977,11 @@ static void iavf_remove(struct pci_dev *pdev)
        /* destroy the locks only once, here */
        mutex_destroy(&hw->aq.arq_mutex);
        mutex_destroy(&hw->aq.asq_mutex);
+       mutex_destroy(&adapter->client_lock);
+       mutex_unlock(&adapter->crit_lock);
+       mutex_destroy(&adapter->crit_lock);
+       mutex_unlock(&adapter->remove_lock);
+       mutex_destroy(&adapter->remove_lock);
 
        iounmap(hw->hw_addr);
        pci_release_regions(pdev);
index 7fe6e8e..14afce8 100644 (file)
@@ -477,7 +477,7 @@ struct ice_pf *ice_allocate_pf(struct device *dev)
 {
        struct devlink *devlink;
 
-       devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf));
+       devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev);
        if (!devlink)
                return NULL;
 
@@ -504,7 +504,7 @@ int ice_devlink_register(struct ice_pf *pf)
        struct device *dev = ice_pf_to_dev(pf);
        int err;
 
-       err = devlink_register(devlink, dev);
+       err = devlink_register(devlink);
        if (err) {
                dev_err(dev, "devlink registration failed: %d\n", err);
                return err;
index d95a5da..c451cf4 100644 (file)
@@ -3568,8 +3568,10 @@ __ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
        return 0;
 }
 
-static int
-ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+static int ice_get_coalesce(struct net_device *netdev,
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        return __ice_get_coalesce(netdev, ec, -1);
 }
@@ -3787,8 +3789,10 @@ set_complete:
        return 0;
 }
 
-static int
-ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+static int ice_set_coalesce(struct net_device *netdev,
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        return __ice_set_coalesce(netdev, ec, -1);
 }
index a8bd512..0d6c143 100644 (file)
@@ -6575,12 +6575,12 @@ event_after:
 }
 
 /**
- * ice_do_ioctl - Access the hwtstamp interface
+ * ice_eth_ioctl - Access the hwtstamp interface
  * @netdev: network interface device structure
  * @ifr: interface request data
  * @cmd: ioctl command
  */
-static int ice_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+static int ice_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
        struct ice_netdev_priv *np = netdev_priv(netdev);
        struct ice_pf *pf = np->vsi->back;
@@ -7246,7 +7246,7 @@ static const struct net_device_ops ice_netdev_ops = {
        .ndo_change_mtu = ice_change_mtu,
        .ndo_get_stats64 = ice_get_stats64,
        .ndo_set_tx_maxrate = ice_set_tx_maxrate,
-       .ndo_do_ioctl = ice_do_ioctl,
+       .ndo_eth_ioctl = ice_eth_ioctl,
        .ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
        .ndo_set_vf_mac = ice_set_vf_mac,
        .ndo_get_vf_config = ice_get_vf_cfg,
index e63ee3c..1277c5c 100644 (file)
@@ -492,6 +492,7 @@ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
  **/
 static void igb_i21x_hw_doublecheck(struct e1000_hw *hw)
 {
+       int failed_cnt = 3;
        bool is_failed;
        int i;
 
@@ -502,9 +503,12 @@ static void igb_i21x_hw_doublecheck(struct e1000_hw *hw)
                                is_failed = true;
                                array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
                                wrfl();
-                               break;
                        }
                }
+               if (is_failed && --failed_cnt <= 0) {
+                       hw_dbg("Failed to update MTA_REGISTER, too many retries");
+                       break;
+               }
        } while (is_failed);
 }
 
index 636a1b1..fb10293 100644 (file)
@@ -2182,7 +2182,9 @@ static int igb_set_phys_id(struct net_device *netdev,
 }
 
 static int igb_set_coalesce(struct net_device *netdev,
-                           struct ethtool_coalesce *ec)
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
        int i;
@@ -2238,7 +2240,9 @@ static int igb_set_coalesce(struct net_device *netdev,
 }
 
 static int igb_get_coalesce(struct net_device *netdev,
-                           struct ethtool_coalesce *ec)
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
 
@@ -2343,8 +2347,7 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 
        switch (stringset) {
        case ETH_SS_TEST:
-               memcpy(data, *igb_gstrings_test,
-                       IGB_TEST_LEN*ETH_GSTRING_LEN);
+               memcpy(data, igb_gstrings_test, sizeof(igb_gstrings_test));
                break;
        case ETH_SS_STATS:
                for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++)
index 171a7a6..751de06 100644 (file)
@@ -2991,7 +2991,7 @@ static const struct net_device_ops igb_netdev_ops = {
        .ndo_set_rx_mode        = igb_set_rx_mode,
        .ndo_set_mac_address    = igb_set_mac,
        .ndo_change_mtu         = igb_change_mtu,
-       .ndo_do_ioctl           = igb_ioctl,
+       .ndo_eth_ioctl          = igb_ioctl,
        .ndo_tx_timeout         = igb_tx_timeout,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
index f4835eb..06e5bd6 100644 (file)
@@ -314,7 +314,9 @@ static int igbvf_set_wol(struct net_device *netdev,
 }
 
 static int igbvf_get_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct igbvf_adapter *adapter = netdev_priv(netdev);
 
@@ -327,7 +329,9 @@ static int igbvf_get_coalesce(struct net_device *netdev,
 }
 
 static int igbvf_set_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct igbvf_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
index 1bbe986..d32e72d 100644 (file)
@@ -2657,7 +2657,7 @@ static const struct net_device_ops igbvf_netdev_ops = {
        .ndo_set_rx_mode        = igbvf_set_rx_mode,
        .ndo_set_mac_address    = igbvf_set_mac,
        .ndo_change_mtu         = igbvf_change_mtu,
-       .ndo_do_ioctl           = igbvf_ioctl,
+       .ndo_eth_ioctl          = igbvf_ioctl,
        .ndo_tx_timeout         = igbvf_tx_timeout,
        .ndo_vlan_rx_add_vid    = igbvf_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = igbvf_vlan_rx_kill_vid,
index 5901ed9..3e386c3 100644 (file)
@@ -33,6 +33,8 @@ void igc_ethtool_set_ops(struct net_device *);
 #define IGC_N_PEROUT   2
 #define IGC_N_SDP      4
 
+#define MAX_FLEX_FILTER                        32
+
 enum igc_mac_filter_type {
        IGC_MAC_FILTER_TYPE_DST = 0,
        IGC_MAC_FILTER_TYPE_SRC
@@ -96,6 +98,13 @@ struct igc_ring {
        u32 start_time;
        u32 end_time;
 
+       /* CBS parameters */
+       bool cbs_enable;                /* indicates if CBS is enabled */
+       s32 idleslope;                  /* idleSlope in kbps */
+       s32 sendslope;                  /* sendSlope in kbps */
+       s32 hicredit;                   /* hiCredit in bytes */
+       s32 locredit;                   /* loCredit in bytes */
+
        /* everything past this point are written often */
        u16 next_to_clean;
        u16 next_to_use;
@@ -225,6 +234,7 @@ struct igc_adapter {
        struct timecounter tc;
        struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */
        ktime_t ptp_reset_start; /* Reset time in clock mono */
+       struct system_time_snapshot snapshot;
 
        char fw_version[32];
 
@@ -287,6 +297,10 @@ extern char igc_driver_name[];
 #define IGC_FLAG_VLAN_PROMISC          BIT(15)
 #define IGC_FLAG_RX_LEGACY             BIT(16)
 #define IGC_FLAG_TSN_QBV_ENABLED       BIT(17)
+#define IGC_FLAG_TSN_QAV_ENABLED       BIT(18)
+
+#define IGC_FLAG_TSN_ANY_ENABLED \
+       (IGC_FLAG_TSN_QBV_ENABLED | IGC_FLAG_TSN_QAV_ENABLED)
 
 #define IGC_FLAG_RSS_FIELD_IPV4_UDP    BIT(6)
 #define IGC_FLAG_RSS_FIELD_IPV6_UDP    BIT(7)
@@ -476,18 +490,28 @@ struct igc_q_vector {
 };
 
 enum igc_filter_match_flags {
-       IGC_FILTER_FLAG_ETHER_TYPE =    0x1,
-       IGC_FILTER_FLAG_VLAN_TCI   =    0x2,
-       IGC_FILTER_FLAG_SRC_MAC_ADDR =  0x4,
-       IGC_FILTER_FLAG_DST_MAC_ADDR =  0x8,
+       IGC_FILTER_FLAG_ETHER_TYPE =    BIT(0),
+       IGC_FILTER_FLAG_VLAN_TCI   =    BIT(1),
+       IGC_FILTER_FLAG_SRC_MAC_ADDR =  BIT(2),
+       IGC_FILTER_FLAG_DST_MAC_ADDR =  BIT(3),
+       IGC_FILTER_FLAG_USER_DATA =     BIT(4),
+       IGC_FILTER_FLAG_VLAN_ETYPE =    BIT(5),
 };
 
 struct igc_nfc_filter {
        u8 match_flags;
        u16 etype;
+       __be16 vlan_etype;
        u16 vlan_tci;
        u8 src_addr[ETH_ALEN];
        u8 dst_addr[ETH_ALEN];
+       u8 user_data[8];
+       u8 user_mask[8];
+       u8 flex_index;
+       u8 rx_queue;
+       u8 prio;
+       u8 immediate_irq;
+       u8 drop;
 };
 
 struct igc_nfc_rule {
@@ -495,12 +519,24 @@ struct igc_nfc_rule {
        struct igc_nfc_filter filter;
        u32 location;
        u16 action;
+       bool flex;
 };
 
-/* IGC supports a total of 32 NFC rules: 16 MAC address based,, 8 VLAN priority
- * based, and 8 ethertype based.
+/* IGC supports a total of 32 NFC rules: 16 MAC address based, 8 VLAN priority
+ * based, 8 ethertype based and 32 Flex filter based rules.
  */
-#define IGC_MAX_RXNFC_RULES            32
+#define IGC_MAX_RXNFC_RULES            64
+
+struct igc_flex_filter {
+       u8 index;
+       u8 data[128];
+       u8 mask[16];
+       u8 length;
+       u8 rx_queue;
+       u8 prio;
+       u8 immediate_irq;
+       u8 drop;
+};
 
 /* igc_desc_unused - calculate if we have unused descriptors */
 static inline u16 igc_desc_unused(const struct igc_ring *ring)
index d0700d4..84f142f 100644 (file)
@@ -187,15 +187,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw)
 
        igc_check_for_copper_link(hw);
 
-       /* Verify phy id and set remaining function pointers */
-       switch (phy->id) {
-       case I225_I_PHY_ID:
-               phy->type       = igc_phy_i225;
-               break;
-       default:
-               ret_val = -IGC_ERR_PHY;
-               goto out;
-       }
+       phy->type = igc_phy_i225;
 
 out:
        return ret_val;
index c3a5a55..a4bbee7 100644 (file)
 #define IGC_WUC_PME_EN 0x00000002 /* PME Enable */
 
 /* Wake Up Filter Control */
-#define IGC_WUFC_LNKC  0x00000001 /* Link Status Change Wakeup Enable */
-#define IGC_WUFC_MAG   0x00000002 /* Magic Packet Wakeup Enable */
-#define IGC_WUFC_EX    0x00000004 /* Directed Exact Wakeup Enable */
-#define IGC_WUFC_MC    0x00000008 /* Directed Multicast Wakeup Enable */
-#define IGC_WUFC_BC    0x00000010 /* Broadcast Wakeup Enable */
+#define IGC_WUFC_LNKC          0x00000001 /* Link Status Change Wakeup Enable */
+#define IGC_WUFC_MAG           0x00000002 /* Magic Packet Wakeup Enable */
+#define IGC_WUFC_EX            0x00000004 /* Directed Exact Wakeup Enable */
+#define IGC_WUFC_MC            0x00000008 /* Directed Multicast Wakeup Enable */
+#define IGC_WUFC_BC            0x00000010 /* Broadcast Wakeup Enable */
+#define IGC_WUFC_FLEX_HQ       BIT(14)    /* Flex Filters Host Queuing */
+#define IGC_WUFC_FLX0          BIT(16)    /* Flexible Filter 0 Enable */
+#define IGC_WUFC_FLX1          BIT(17)    /* Flexible Filter 1 Enable */
+#define IGC_WUFC_FLX2          BIT(18)    /* Flexible Filter 2 Enable */
+#define IGC_WUFC_FLX3          BIT(19)    /* Flexible Filter 3 Enable */
+#define IGC_WUFC_FLX4          BIT(20)    /* Flexible Filter 4 Enable */
+#define IGC_WUFC_FLX5          BIT(21)    /* Flexible Filter 5 Enable */
+#define IGC_WUFC_FLX6          BIT(22)    /* Flexible Filter 6 Enable */
+#define IGC_WUFC_FLX7          BIT(23)    /* Flexible Filter 7 Enable */
+
+#define IGC_WUFC_FILTER_MASK GENMASK(23, 14)
 
 #define IGC_CTRL_ADVD3WUC      0x00100000  /* D3 WUC */
 
 /* Wake Up Packet Memory stores the first 128 bytes of the wake up packet */
 #define IGC_WUPM_BYTES 128
 
+/* Wakeup Filter Control Extended */
+#define IGC_WUFC_EXT_FLX8      BIT(8)  /* Flexible Filter 8 Enable */
+#define IGC_WUFC_EXT_FLX9      BIT(9)  /* Flexible Filter 9 Enable */
+#define IGC_WUFC_EXT_FLX10     BIT(10) /* Flexible Filter 10 Enable */
+#define IGC_WUFC_EXT_FLX11     BIT(11) /* Flexible Filter 11 Enable */
+#define IGC_WUFC_EXT_FLX12     BIT(12) /* Flexible Filter 12 Enable */
+#define IGC_WUFC_EXT_FLX13     BIT(13) /* Flexible Filter 13 Enable */
+#define IGC_WUFC_EXT_FLX14     BIT(14) /* Flexible Filter 14 Enable */
+#define IGC_WUFC_EXT_FLX15     BIT(15) /* Flexible Filter 15 Enable */
+#define IGC_WUFC_EXT_FLX16     BIT(16) /* Flexible Filter 16 Enable */
+#define IGC_WUFC_EXT_FLX17     BIT(17) /* Flexible Filter 17 Enable */
+#define IGC_WUFC_EXT_FLX18     BIT(18) /* Flexible Filter 18 Enable */
+#define IGC_WUFC_EXT_FLX19     BIT(19) /* Flexible Filter 19 Enable */
+#define IGC_WUFC_EXT_FLX20     BIT(20) /* Flexible Filter 20 Enable */
+#define IGC_WUFC_EXT_FLX21     BIT(21) /* Flexible Filter 21 Enable */
+#define IGC_WUFC_EXT_FLX22     BIT(22) /* Flexible Filter 22 Enable */
+#define IGC_WUFC_EXT_FLX23     BIT(23) /* Flexible Filter 23 Enable */
+#define IGC_WUFC_EXT_FLX24     BIT(24) /* Flexible Filter 24 Enable */
+#define IGC_WUFC_EXT_FLX25     BIT(25) /* Flexible Filter 25 Enable */
+#define IGC_WUFC_EXT_FLX26     BIT(26) /* Flexible Filter 26 Enable */
+#define IGC_WUFC_EXT_FLX27     BIT(27) /* Flexible Filter 27 Enable */
+#define IGC_WUFC_EXT_FLX28     BIT(28) /* Flexible Filter 28 Enable */
+#define IGC_WUFC_EXT_FLX29     BIT(29) /* Flexible Filter 29 Enable */
+#define IGC_WUFC_EXT_FLX30     BIT(30) /* Flexible Filter 30 Enable */
+#define IGC_WUFC_EXT_FLX31     BIT(31) /* Flexible Filter 31 Enable */
+
+#define IGC_WUFC_EXT_FILTER_MASK GENMASK(31, 8)
+
+/* Physical Func Reset Done Indication */
+#define IGC_CTRL_EXT_LINK_MODE_MASK    0x00C00000
+
 /* Loop limit on how long we wait for auto-negotiation to complete */
 #define COPPER_LINK_UP_LIMIT           10
 #define PHY_AUTO_NEG_LIMIT             45
 #define IGC_TXQCTL_QUEUE_MODE_LAUNCHT  0x00000001
 #define IGC_TXQCTL_STRICT_CYCLE                0x00000002
 #define IGC_TXQCTL_STRICT_END          0x00000004
+#define IGC_TXQCTL_QAV_SEL_MASK                0x000000C0
+#define IGC_TXQCTL_QAV_SEL_CBS0                0x00000080
+#define IGC_TXQCTL_QAV_SEL_CBS1                0x000000C0
+
+#define IGC_TQAVCC_IDLESLOPE_MASK      0xFFFF
+#define IGC_TQAVCC_KEEP_CREDITS                BIT(30)
+
+#define IGC_MAX_SR_QUEUES              2
 
 /* Receive Checksum Control */
 #define IGC_RXCSUM_CRCOFL      0x00000800   /* CRC32 offload enable */
 #define IGC_RXCSUM_PCSD                0x00002000   /* packet checksum disabled */
 
+/* PCIe PTM Control */
+#define IGC_PTM_CTRL_START_NOW BIT(29) /* Start PTM Now */
+#define IGC_PTM_CTRL_EN                BIT(30) /* Enable PTM */
+#define IGC_PTM_CTRL_TRIG      BIT(31) /* PTM Cycle trigger */
+#define IGC_PTM_CTRL_SHRT_CYC(usec)    (((usec) & 0x2f) << 2)
+#define IGC_PTM_CTRL_PTM_TO(usec)      (((usec) & 0xff) << 8)
+
+#define IGC_PTM_SHORT_CYC_DEFAULT      10  /* Default Short/interrupted cycle interval */
+#define IGC_PTM_CYC_TIME_DEFAULT       5   /* Default PTM cycle time */
+#define IGC_PTM_TIMEOUT_DEFAULT                255 /* Default timeout for PTM errors */
+
+/* PCIe Digital Delay */
+#define IGC_PCIE_DIG_DELAY_DEFAULT     0x01440000
+
+/* PCIe PHY Delay */
+#define IGC_PCIE_PHY_DELAY_DEFAULT     0x40900000
+
+#define IGC_TIMADJ_ADJUST_METH         0x40000000
+
+/* PCIe PTM Status */
+#define IGC_PTM_STAT_VALID             BIT(0) /* PTM Status */
+#define IGC_PTM_STAT_RET_ERR           BIT(1) /* Root port timeout */
+#define IGC_PTM_STAT_BAD_PTM_RES       BIT(2) /* PTM Response msg instead of PTM Response Data */
+#define IGC_PTM_STAT_T4M1_OVFL         BIT(3) /* T4 minus T1 overflow */
+#define IGC_PTM_STAT_ADJUST_1ST                BIT(4) /* 1588 timer adjusted during 1st PTM cycle */
+#define IGC_PTM_STAT_ADJUST_CYC                BIT(5) /* 1588 timer adjusted during non-1st PTM cycle */
+
+/* PCIe PTM Cycle Control */
+#define IGC_PTM_CYCLE_CTRL_CYC_TIME(msec)      ((msec) & 0x3ff) /* PTM Cycle Time (msec) */
+#define IGC_PTM_CYCLE_CTRL_AUTO_CYC_EN         BIT(31) /* PTM Cycle Control */
+
 /* GPY211 - I225 defines */
 #define GPY_MMD_MASK           0xFFFF0000
 #define GPY_MMD_SHIFT          16
index fa41718..e0a76ac 100644 (file)
@@ -862,7 +862,9 @@ static void igc_ethtool_get_stats(struct net_device *netdev,
 }
 
 static int igc_ethtool_get_coalesce(struct net_device *netdev,
-                                   struct ethtool_coalesce *ec)
+                                   struct ethtool_coalesce *ec,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct igc_adapter *adapter = netdev_priv(netdev);
 
@@ -882,7 +884,9 @@ static int igc_ethtool_get_coalesce(struct net_device *netdev,
 }
 
 static int igc_ethtool_set_coalesce(struct net_device *netdev,
-                                   struct ethtool_coalesce *ec)
+                                   struct ethtool_coalesce *ec,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct igc_adapter *adapter = netdev_priv(netdev);
        int i;
@@ -979,6 +983,12 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
                eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
        }
 
+       if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) {
+               fsp->flow_type |= FLOW_EXT;
+               memcpy(fsp->h_ext.data, rule->filter.user_data, sizeof(fsp->h_ext.data));
+               memcpy(fsp->m_ext.data, rule->filter.user_mask, sizeof(fsp->m_ext.data));
+       }
+
        mutex_unlock(&adapter->nfc_rule_lock);
        return 0;
 
@@ -1215,6 +1225,30 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
                ether_addr_copy(rule->filter.dst_addr,
                                fsp->h_u.ether_spec.h_dest);
        }
+
+       /* VLAN etype matching */
+       if ((fsp->flow_type & FLOW_EXT) && fsp->h_ext.vlan_etype) {
+               rule->filter.vlan_etype = fsp->h_ext.vlan_etype;
+               rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_ETYPE;
+       }
+
+       /* Check for user defined data */
+       if ((fsp->flow_type & FLOW_EXT) &&
+           (fsp->h_ext.data[0] || fsp->h_ext.data[1])) {
+               rule->filter.match_flags |= IGC_FILTER_FLAG_USER_DATA;
+               memcpy(rule->filter.user_data, fsp->h_ext.data, sizeof(fsp->h_ext.data));
+               memcpy(rule->filter.user_mask, fsp->m_ext.data, sizeof(fsp->m_ext.data));
+       }
+
+       /* When multiple filter options or user data or vlan etype is set, use a
+        * flex filter.
+        */
+       if ((rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) ||
+           (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) ||
+           (rule->filter.match_flags & (rule->filter.match_flags - 1)))
+               rule->flex = true;
+       else
+               rule->flex = false;
 }
 
 /**
@@ -1244,11 +1278,6 @@ static int igc_ethtool_check_nfc_rule(struct igc_adapter *adapter,
                return -EINVAL;
        }
 
-       if (flags & (flags - 1)) {
-               netdev_dbg(dev, "Rule with multiple matches not supported\n");
-               return -EOPNOTSUPP;
-       }
-
        list_for_each_entry(tmp, &adapter->nfc_rule_list, list) {
                if (!memcmp(&rule->filter, &tmp->filter,
                            sizeof(rule->filter)) &&
@@ -1280,12 +1309,6 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
                return -EOPNOTSUPP;
        }
 
-       if ((fsp->flow_type & FLOW_EXT) &&
-           fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
-               netdev_dbg(netdev, "VLAN mask not supported\n");
-               return -EOPNOTSUPP;
-       }
-
        if (fsp->ring_cookie >= adapter->num_rx_queues) {
                netdev_dbg(netdev, "Invalid action\n");
                return -EINVAL;
index ed2d66b..b877efa 100644 (file)
@@ -12,6 +12,8 @@
 #include <net/pkt_sched.h>
 #include <linux/bpf_trace.h>
 #include <net/xdp_sock_drv.h>
+#include <linux/pci.h>
+
 #include <net/ipv6.h>
 
 #include "igc.h"
@@ -118,7 +120,7 @@ void igc_reset(struct igc_adapter *adapter)
        igc_ptp_reset(adapter);
 
        /* Re-enable TSN offloading, where applicable. */
-       igc_tsn_offload_apply(adapter);
+       igc_tsn_reset(adapter);
 
        igc_get_phy_info(hw);
 }
@@ -3078,11 +3080,320 @@ static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
                   etype);
 }
 
+static int igc_flex_filter_select(struct igc_adapter *adapter,
+                                 struct igc_flex_filter *input,
+                                 u32 *fhft)
+{
+       struct igc_hw *hw = &adapter->hw;
+       u8 fhft_index;
+       u32 fhftsl;
+
+       if (input->index >= MAX_FLEX_FILTER) {
+               dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n");
+               return -EINVAL;
+       }
+
+       /* Indirect table select register */
+       fhftsl = rd32(IGC_FHFTSL);
+       fhftsl &= ~IGC_FHFTSL_FTSL_MASK;
+       switch (input->index) {
+       case 0 ... 7:
+               fhftsl |= 0x00;
+               break;
+       case 8 ... 15:
+               fhftsl |= 0x01;
+               break;
+       case 16 ... 23:
+               fhftsl |= 0x02;
+               break;
+       case 24 ... 31:
+               fhftsl |= 0x03;
+               break;
+       }
+       wr32(IGC_FHFTSL, fhftsl);
+
+       /* Normalize index down to host table register */
+       fhft_index = input->index % 8;
+
+       *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) :
+               IGC_FHFT_EXT(fhft_index - 4);
+
+       return 0;
+}
+
+static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
+                                   struct igc_flex_filter *input)
+{
+       struct device *dev = &adapter->pdev->dev;
+       struct igc_hw *hw = &adapter->hw;
+       u8 *data = input->data;
+       u8 *mask = input->mask;
+       u32 queuing;
+       u32 fhft;
+       u32 wufc;
+       int ret;
+       int i;
+
+       /* Length has to be aligned to 8. Otherwise the filter will fail. Bail
+        * out early to avoid surprises later.
+        */
+       if (input->length % 8 != 0) {
+               dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n");
+               return -EINVAL;
+       }
+
+       /* Select corresponding flex filter register and get base for host table. */
+       ret = igc_flex_filter_select(adapter, input, &fhft);
+       if (ret)
+               return ret;
+
+       /* When adding a filter globally disable flex filter feature. That is
+        * recommended within the datasheet.
+        */
+       wufc = rd32(IGC_WUFC);
+       wufc &= ~IGC_WUFC_FLEX_HQ;
+       wr32(IGC_WUFC, wufc);
+
+       /* Configure filter */
+       queuing = input->length & IGC_FHFT_LENGTH_MASK;
+       queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK;
+       queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK;
+
+       if (input->immediate_irq)
+               queuing |= IGC_FHFT_IMM_INT;
+
+       if (input->drop)
+               queuing |= IGC_FHFT_DROP;
+
+       wr32(fhft + 0xFC, queuing);
+
+       /* Write data (128 byte) and mask (128 bit) */
+       for (i = 0; i < 16; ++i) {
+               const size_t data_idx = i * 8;
+               const size_t row_idx = i * 16;
+               u32 dw0 =
+                       (data[data_idx + 0] << 0) |
+                       (data[data_idx + 1] << 8) |
+                       (data[data_idx + 2] << 16) |
+                       (data[data_idx + 3] << 24);
+               u32 dw1 =
+                       (data[data_idx + 4] << 0) |
+                       (data[data_idx + 5] << 8) |
+                       (data[data_idx + 6] << 16) |
+                       (data[data_idx + 7] << 24);
+               u32 tmp;
+
+               /* Write row: dw0, dw1 and mask */
+               wr32(fhft + row_idx, dw0);
+               wr32(fhft + row_idx + 4, dw1);
+
+               /* mask is only valid for MASK(7, 0) */
+               tmp = rd32(fhft + row_idx + 8);
+               tmp &= ~GENMASK(7, 0);
+               tmp |= mask[i];
+               wr32(fhft + row_idx + 8, tmp);
+       }
+
+       /* Enable filter. */
+       wufc |= IGC_WUFC_FLEX_HQ;
+       if (input->index > 8) {
+               /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */
+               u32 wufc_ext = rd32(IGC_WUFC_EXT);
+
+               wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8));
+
+               wr32(IGC_WUFC_EXT, wufc_ext);
+       } else {
+               wufc |= (IGC_WUFC_FLX0 << input->index);
+       }
+       wr32(IGC_WUFC, wufc);
+
+       dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n",
+               input->index);
+
+       return 0;
+}
+
+static void igc_flex_filter_add_field(struct igc_flex_filter *flex,
+                                     const void *src, unsigned int offset,
+                                     size_t len, const void *mask)
+{
+       int i;
+
+       /* data */
+       memcpy(&flex->data[offset], src, len);
+
+       /* mask */
+       for (i = 0; i < len; ++i) {
+               const unsigned int idx = i + offset;
+               const u8 *ptr = mask;
+
+               if (mask) {
+                       if (ptr[i] & 0xff)
+                               flex->mask[idx / 8] |= BIT(idx % 8);
+
+                       continue;
+               }
+
+               flex->mask[idx / 8] |= BIT(idx % 8);
+       }
+}
+
+static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter)
+{
+       struct igc_hw *hw = &adapter->hw;
+       u32 wufc, wufc_ext;
+       int i;
+
+       wufc = rd32(IGC_WUFC);
+       wufc_ext = rd32(IGC_WUFC_EXT);
+
+       for (i = 0; i < MAX_FLEX_FILTER; i++) {
+               if (i < 8) {
+                       if (!(wufc & (IGC_WUFC_FLX0 << i)))
+                               return i;
+               } else {
+                       if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8))))
+                               return i;
+               }
+       }
+
+       return -ENOSPC;
+}
+
+static bool igc_flex_filter_in_use(struct igc_adapter *adapter)
+{
+       struct igc_hw *hw = &adapter->hw;
+       u32 wufc, wufc_ext;
+
+       wufc = rd32(IGC_WUFC);
+       wufc_ext = rd32(IGC_WUFC_EXT);
+
+       if (wufc & IGC_WUFC_FILTER_MASK)
+               return true;
+
+       if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK)
+               return true;
+
+       return false;
+}
+
+static int igc_add_flex_filter(struct igc_adapter *adapter,
+                              struct igc_nfc_rule *rule)
+{
+       struct igc_flex_filter flex = { };
+       struct igc_nfc_filter *filter = &rule->filter;
+       unsigned int eth_offset, user_offset;
+       int ret, index;
+       bool vlan;
+
+       index = igc_find_avail_flex_filter_slot(adapter);
+       if (index < 0)
+               return -ENOSPC;
+
+       /* Construct the flex filter:
+        *  -> dest_mac [6]
+        *  -> src_mac [6]
+        *  -> tpid [2]
+        *  -> vlan tci [2]
+        *  -> ether type [2]
+        *  -> user data [8]
+        *  -> = 26 bytes => 32 length
+        */
+       flex.index    = index;
+       flex.length   = 32;
+       flex.rx_queue = rule->action;
+
+       vlan = rule->filter.vlan_tci || rule->filter.vlan_etype;
+       eth_offset = vlan ? 16 : 12;
+       user_offset = vlan ? 18 : 14;
+
+       /* Add destination MAC  */
+       if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
+               igc_flex_filter_add_field(&flex, &filter->dst_addr, 0,
+                                         ETH_ALEN, NULL);
+
+       /* Add source MAC */
+       if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
+               igc_flex_filter_add_field(&flex, &filter->src_addr, 6,
+                                         ETH_ALEN, NULL);
+
+       /* Add VLAN etype */
+       if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE)
+               igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12,
+                                         sizeof(filter->vlan_etype),
+                                         NULL);
+
+       /* Add VLAN TCI */
+       if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
+               igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14,
+                                         sizeof(filter->vlan_tci), NULL);
+
+       /* Add Ether type */
+       if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+               __be16 etype = cpu_to_be16(filter->etype);
+
+               igc_flex_filter_add_field(&flex, &etype, eth_offset,
+                                         sizeof(etype), NULL);
+       }
+
+       /* Add user data */
+       if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA)
+               igc_flex_filter_add_field(&flex, &filter->user_data,
+                                         user_offset,
+                                         sizeof(filter->user_data),
+                                         filter->user_mask);
+
+       /* Add it down to the hardware and enable it. */
+       ret = igc_write_flex_filter_ll(adapter, &flex);
+       if (ret)
+               return ret;
+
+       filter->flex_index = index;
+
+       return 0;
+}
+
+static void igc_del_flex_filter(struct igc_adapter *adapter,
+                               u16 reg_index)
+{
+       struct igc_hw *hw = &adapter->hw;
+       u32 wufc;
+
+       /* Just disable the filter. The filter table itself is kept
+        * intact. Another flex_filter_add() should override the "old" data
+        * then.
+        */
+       if (reg_index > 8) {
+               u32 wufc_ext = rd32(IGC_WUFC_EXT);
+
+               wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8));
+               wr32(IGC_WUFC_EXT, wufc_ext);
+       } else {
+               wufc = rd32(IGC_WUFC);
+
+               wufc &= ~(IGC_WUFC_FLX0 << reg_index);
+               wr32(IGC_WUFC, wufc);
+       }
+
+       if (igc_flex_filter_in_use(adapter))
+               return;
+
+       /* No filters are in use, we may disable flex filters */
+       wufc = rd32(IGC_WUFC);
+       wufc &= ~IGC_WUFC_FLEX_HQ;
+       wr32(IGC_WUFC, wufc);
+}
+
 static int igc_enable_nfc_rule(struct igc_adapter *adapter,
-                              const struct igc_nfc_rule *rule)
+                              struct igc_nfc_rule *rule)
 {
        int err;
 
+       if (rule->flex) {
+               return igc_add_flex_filter(adapter, rule);
+       }
+
        if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
                err = igc_add_etype_filter(adapter, rule->filter.etype,
                                           rule->action);
@@ -3119,6 +3430,11 @@ static int igc_enable_nfc_rule(struct igc_adapter *adapter,
 static void igc_disable_nfc_rule(struct igc_adapter *adapter,
                                 const struct igc_nfc_rule *rule)
 {
+       if (rule->flex) {
+               igc_del_flex_filter(adapter, rule->filter.flex_index);
+               return;
+       }
+
        if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
                igc_del_etype_filter(adapter, rule->filter.etype);
 
@@ -4817,6 +5133,7 @@ static irqreturn_t igc_msix_ring(int irq, void *data)
  */
 static int igc_request_msix(struct igc_adapter *adapter)
 {
+       unsigned int num_q_vectors = adapter->num_q_vectors;
        int i = 0, err = 0, vector = 0, free_vector = 0;
        struct net_device *netdev = adapter->netdev;
 
@@ -4825,7 +5142,13 @@ static int igc_request_msix(struct igc_adapter *adapter)
        if (err)
                goto err_out;
 
-       for (i = 0; i < adapter->num_q_vectors; i++) {
+       if (num_q_vectors > MAX_Q_VECTORS) {
+               num_q_vectors = MAX_Q_VECTORS;
+               dev_warn(&adapter->pdev->dev,
+                        "The number of queue vectors (%d) is higher than max allowed (%d)\n",
+                        adapter->num_q_vectors, MAX_Q_VECTORS);
+       }
+       for (i = 0; i < num_q_vectors; i++) {
                struct igc_q_vector *q_vector = adapter->q_vector[i];
 
                vector++;
@@ -4904,20 +5227,12 @@ bool igc_has_link(struct igc_adapter *adapter)
         * false until the igc_check_for_link establishes link
         * for copper adapters ONLY
         */
-       switch (hw->phy.media_type) {
-       case igc_media_type_copper:
-               if (!hw->mac.get_link_status)
-                       return true;
-               hw->mac.ops.check_for_link(hw);
-               link_active = !hw->mac.get_link_status;
-               break;
-       default:
-       case igc_media_type_unknown:
-               break;
-       }
+       if (!hw->mac.get_link_status)
+               return true;
+       hw->mac.ops.check_for_link(hw);
+       link_active = !hw->mac.get_link_status;
 
-       if (hw->mac.type == igc_i225 &&
-           hw->phy.id == I225_I_PHY_ID) {
+       if (hw->mac.type == igc_i225) {
                if (!netif_carrier_ok(adapter->netdev)) {
                        adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
                } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
@@ -5005,7 +5320,9 @@ static void igc_watchdog_task(struct work_struct *work)
                                adapter->tx_timeout_factor = 14;
                                break;
                        case SPEED_100:
-                               /* maybe add some timeout factor ? */
+                       case SPEED_1000:
+                       case SPEED_2500:
+                               adapter->tx_timeout_factor = 7;
                                break;
                        }
 
@@ -5432,7 +5749,6 @@ static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
                                      bool enable)
 {
        struct igc_ring *ring;
-       int i;
 
        if (queue < 0 || queue >= adapter->num_tx_queues)
                return -EINVAL;
@@ -5440,17 +5756,6 @@ static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
        ring = adapter->tx_ring[queue];
        ring->launchtime_enable = enable;
 
-       if (adapter->base_time)
-               return 0;
-
-       adapter->cycle_time = NSEC_PER_SEC;
-
-       for (i = 0; i < adapter->num_tx_queues; i++) {
-               ring = adapter->tx_ring[i];
-               ring->start_time = 0;
-               ring->end_time = NSEC_PER_SEC;
-       }
-
        return 0;
 }
 
@@ -5523,16 +5828,31 @@ static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
        return igc_tsn_offload_apply(adapter);
 }
 
+static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+{
+       int i;
+
+       adapter->base_time = 0;
+       adapter->cycle_time = NSEC_PER_SEC;
+
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igc_ring *ring = adapter->tx_ring[i];
+
+               ring->start_time = 0;
+               ring->end_time = NSEC_PER_SEC;
+       }
+
+       return 0;
+}
+
 static int igc_save_qbv_schedule(struct igc_adapter *adapter,
                                 struct tc_taprio_qopt_offload *qopt)
 {
        u32 start_time = 0, end_time = 0;
        size_t n;
 
-       if (!qopt->enable) {
-               adapter->base_time = 0;
-               return 0;
-       }
+       if (!qopt->enable)
+               return igc_tsn_clear_schedule(adapter);
 
        if (adapter->base_time)
                return -EALREADY;
@@ -5584,6 +5904,74 @@ static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
        return igc_tsn_offload_apply(adapter);
 }
 
+static int igc_save_cbs_params(struct igc_adapter *adapter, int queue,
+                              bool enable, int idleslope, int sendslope,
+                              int hicredit, int locredit)
+{
+       bool cbs_status[IGC_MAX_SR_QUEUES] = { false };
+       struct net_device *netdev = adapter->netdev;
+       struct igc_ring *ring;
+       int i;
+
+       /* i225 has two sets of credit-based shaper logic.
+        * Supporting it only on the top two priority queues
+        */
+       if (queue < 0 || queue > 1)
+               return -EINVAL;
+
+       ring = adapter->tx_ring[queue];
+
+       for (i = 0; i < IGC_MAX_SR_QUEUES; i++)
+               if (adapter->tx_ring[i])
+                       cbs_status[i] = adapter->tx_ring[i]->cbs_enable;
+
+       /* CBS should be enabled on the highest priority queue first in order
+        * for the CBS algorithm to operate as intended.
+        */
+       if (enable) {
+               if (queue == 1 && !cbs_status[0]) {
+                       netdev_err(netdev,
+                                  "Enabling CBS on queue1 before queue0\n");
+                       return -EINVAL;
+               }
+       } else {
+               if (queue == 0 && cbs_status[1]) {
+                       netdev_err(netdev,
+                                  "Disabling CBS on queue0 before queue1\n");
+                       return -EINVAL;
+               }
+       }
+
+       ring->cbs_enable = enable;
+       ring->idleslope = idleslope;
+       ring->sendslope = sendslope;
+       ring->hicredit = hicredit;
+       ring->locredit = locredit;
+
+       return 0;
+}
+
+static int igc_tsn_enable_cbs(struct igc_adapter *adapter,
+                             struct tc_cbs_qopt_offload *qopt)
+{
+       struct igc_hw *hw = &adapter->hw;
+       int err;
+
+       if (hw->mac.type != igc_i225)
+               return -EOPNOTSUPP;
+
+       if (qopt->queue < 0 || qopt->queue > 1)
+               return -EINVAL;
+
+       err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable,
+                                 qopt->idleslope, qopt->sendslope,
+                                 qopt->hicredit, qopt->locredit);
+       if (err)
+               return err;
+
+       return igc_tsn_offload_apply(adapter);
+}
+
 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
                        void *type_data)
 {
@@ -5596,6 +5984,9 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
        case TC_SETUP_QDISC_ETF:
                return igc_tsn_enable_launchtime(adapter, type_data);
 
+       case TC_SETUP_QDISC_CBS:
+               return igc_tsn_enable_cbs(adapter, type_data);
+
        default:
                return -EOPNOTSUPP;
        }
@@ -5704,7 +6095,7 @@ static const struct net_device_ops igc_netdev_ops = {
        .ndo_fix_features       = igc_fix_features,
        .ndo_set_features       = igc_set_features,
        .ndo_features_check     = igc_features_check,
-       .ndo_do_ioctl           = igc_ioctl,
+       .ndo_eth_ioctl          = igc_ioctl,
        .ndo_setup_tc           = igc_setup_tc,
        .ndo_bpf                = igc_bpf,
        .ndo_xdp_xmit           = igc_xdp_xmit,
@@ -5865,6 +6256,10 @@ static int igc_probe(struct pci_dev *pdev,
 
        pci_enable_pcie_error_reporting(pdev);
 
+       err = pci_enable_ptm(pdev, NULL);
+       if (err < 0)
+               dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n");
+
        pci_set_master(pdev);
 
        err = -ENOMEM;
@@ -6018,6 +6413,8 @@ static int igc_probe(struct pci_dev *pdev,
 
        igc_ptp_init(adapter);
 
+       igc_tsn_clear_schedule(adapter);
+
        /* reset the hardware with the new settings */
        igc_reset(adapter);
 
index 83aeb5e..5cad31c 100644 (file)
@@ -249,8 +249,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw)
                        return ret_val;
        }
 
-       if ((phy->autoneg_mask & ADVERTISE_2500_FULL) &&
-           hw->phy.id == I225_I_PHY_ID) {
+       if (phy->autoneg_mask & ADVERTISE_2500_FULL) {
                /* Read the MULTI GBT AN Control Register - reg 7.32 */
                ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK <<
                                            MMD_DEVADDR_SHIFT) |
@@ -390,8 +389,7 @@ static s32 igc_phy_setup_autoneg(struct igc_hw *hw)
                ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL,
                                             mii_1000t_ctrl_reg);
 
-       if ((phy->autoneg_mask & ADVERTISE_2500_FULL) &&
-           hw->phy.id == I225_I_PHY_ID)
+       if (phy->autoneg_mask & ADVERTISE_2500_FULL)
                ret_val = phy->ops.write_reg(hw,
                                             (STANDARD_AN_REG_MASK <<
                                             MMD_DEVADDR_SHIFT) |
index 4ae19c6..0f02190 100644 (file)
@@ -9,6 +9,8 @@
 #include <linux/ptp_classify.h>
 #include <linux/clocksource.h>
 #include <linux/ktime.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
 
 #define INCVALUE_MASK          0x7fffffff
 #define ISGN                   0x80000000
@@ -16,6 +18,9 @@
 #define IGC_SYSTIM_OVERFLOW_PERIOD     (HZ * 60 * 9)
 #define IGC_PTP_TX_TIMEOUT             (HZ * 15)
 
+#define IGC_PTM_STAT_SLEEP             2
+#define IGC_PTM_STAT_TIMEOUT           100
+
 /* SYSTIM read access for I225 */
 void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts)
 {
@@ -752,6 +757,147 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr)
                -EFAULT : 0;
 }
 
+/* The two conditions below must be met for cross timestamping via
+ * PCIe PTM:
+ *
+ * 1. We have an way to convert the timestamps in the PTM messages
+ *    to something related to the system clocks (right now, only
+ *    X86 systems with support for the Always Running Timer allow that);
+ *
+ * 2. We have PTM enabled in the path from the device to the PCIe root port.
+ */
+static bool igc_is_crosststamp_supported(struct igc_adapter *adapter)
+{
+       return IS_ENABLED(CONFIG_X86_TSC) ? pcie_ptm_enabled(adapter->pdev) : false;
+}
+
+static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp)
+{
+#if IS_ENABLED(CONFIG_X86_TSC)
+       return convert_art_ns_to_tsc(tstamp);
+#else
+       return (struct system_counterval_t) { };
+#endif
+}
+
+static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat)
+{
+       struct net_device *netdev = adapter->netdev;
+
+       switch (ptm_stat) {
+       case IGC_PTM_STAT_RET_ERR:
+               netdev_err(netdev, "PTM Error: Root port timeout\n");
+               break;
+       case IGC_PTM_STAT_BAD_PTM_RES:
+               netdev_err(netdev, "PTM Error: Bad response, PTM Response Data expected\n");
+               break;
+       case IGC_PTM_STAT_T4M1_OVFL:
+               netdev_err(netdev, "PTM Error: T4 minus T1 overflow\n");
+               break;
+       case IGC_PTM_STAT_ADJUST_1ST:
+               netdev_err(netdev, "PTM Error: 1588 timer adjusted during first PTM cycle\n");
+               break;
+       case IGC_PTM_STAT_ADJUST_CYC:
+               netdev_err(netdev, "PTM Error: 1588 timer adjusted during non-first PTM cycle\n");
+               break;
+       default:
+               netdev_err(netdev, "PTM Error: Unknown error (%#x)\n", ptm_stat);
+               break;
+       }
+}
+
+static int igc_phc_get_syncdevicetime(ktime_t *device,
+                                     struct system_counterval_t *system,
+                                     void *ctx)
+{
+       u32 stat, t2_curr_h, t2_curr_l, ctrl;
+       struct igc_adapter *adapter = ctx;
+       struct igc_hw *hw = &adapter->hw;
+       int err, count = 100;
+       ktime_t t1, t2_curr;
+
+       /* Get a snapshot of system clocks to use as historic value. */
+       ktime_get_snapshot(&adapter->snapshot);
+
+       do {
+               /* Doing this in a loop because in the event of a
+                * badly timed (ha!) system clock adjustment, we may
+                * get PTM errors from the PCI root, but these errors
+                * are transitory. Repeating the process returns valid
+                * data eventually.
+                */
+
+               /* To "manually" start the PTM cycle we need to clear and
+                * then set again the TRIG bit.
+                */
+               ctrl = rd32(IGC_PTM_CTRL);
+               ctrl &= ~IGC_PTM_CTRL_TRIG;
+               wr32(IGC_PTM_CTRL, ctrl);
+               ctrl |= IGC_PTM_CTRL_TRIG;
+               wr32(IGC_PTM_CTRL, ctrl);
+
+               /* The cycle only starts "for real" when software notifies
+                * that it has read the registers, this is done by setting
+                * VALID bit.
+                */
+               wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID);
+
+               err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat,
+                                        stat, IGC_PTM_STAT_SLEEP,
+                                        IGC_PTM_STAT_TIMEOUT);
+               if (err < 0) {
+                       netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n");
+                       return err;
+               }
+
+               if ((stat & IGC_PTM_STAT_VALID) == IGC_PTM_STAT_VALID)
+                       break;
+
+               if (stat & ~IGC_PTM_STAT_VALID) {
+                       /* An error occurred, log it. */
+                       igc_ptm_log_error(adapter, stat);
+                       /* The STAT register is write-1-to-clear (W1C),
+                        * so write the previous error status to clear it.
+                        */
+                       wr32(IGC_PTM_STAT, stat);
+                       continue;
+               }
+       } while (--count);
+
+       if (!count) {
+               netdev_err(adapter->netdev, "Exceeded number of tries for PTM cycle\n");
+               return -ETIMEDOUT;
+       }
+
+       t1 = ktime_set(rd32(IGC_PTM_T1_TIM0_H), rd32(IGC_PTM_T1_TIM0_L));
+
+       t2_curr_l = rd32(IGC_PTM_CURR_T2_L);
+       t2_curr_h = rd32(IGC_PTM_CURR_T2_H);
+
+       /* FIXME: When the register that tells the endianness of the
+        * PTM registers are implemented, check them here and add the
+        * appropriate conversion.
+        */
+       t2_curr_h = swab32(t2_curr_h);
+
+       t2_curr = ((s64)t2_curr_h << 32 | t2_curr_l);
+
+       *device = t1;
+       *system = igc_device_tstamp_to_system(t2_curr);
+
+       return 0;
+}
+
+static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp,
+                                 struct system_device_crosststamp *cts)
+{
+       struct igc_adapter *adapter = container_of(ptp, struct igc_adapter,
+                                                  ptp_caps);
+
+       return get_device_system_crosststamp(igc_phc_get_syncdevicetime,
+                                            adapter, &adapter->snapshot, cts);
+}
+
 /**
  * igc_ptp_init - Initialize PTP functionality
  * @adapter: Board private structure
@@ -788,6 +934,11 @@ void igc_ptp_init(struct igc_adapter *adapter)
                adapter->ptp_caps.n_per_out = IGC_N_PEROUT;
                adapter->ptp_caps.n_pins = IGC_N_SDP;
                adapter->ptp_caps.verify = igc_ptp_verify_pin;
+
+               if (!igc_is_crosststamp_supported(adapter))
+                       break;
+
+               adapter->ptp_caps.getcrosststamp = igc_ptp_getcrosststamp;
                break;
        default:
                adapter->ptp_clock = NULL;
@@ -879,7 +1030,9 @@ void igc_ptp_stop(struct igc_adapter *adapter)
 void igc_ptp_reset(struct igc_adapter *adapter)
 {
        struct igc_hw *hw = &adapter->hw;
+       u32 cycle_ctrl, ctrl;
        unsigned long flags;
+       u32 timadj;
 
        /* reset the tstamp_config */
        igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
@@ -888,12 +1041,38 @@ void igc_ptp_reset(struct igc_adapter *adapter)
 
        switch (adapter->hw.mac.type) {
        case igc_i225:
+               timadj = rd32(IGC_TIMADJ);
+               timadj |= IGC_TIMADJ_ADJUST_METH;
+               wr32(IGC_TIMADJ, timadj);
+
                wr32(IGC_TSAUXC, 0x0);
                wr32(IGC_TSSDP, 0x0);
                wr32(IGC_TSIM,
                     IGC_TSICR_INTERRUPTS |
                     (adapter->pps_sys_wrap_on ? IGC_TSICR_SYS_WRAP : 0));
                wr32(IGC_IMS, IGC_IMS_TS);
+
+               if (!igc_is_crosststamp_supported(adapter))
+                       break;
+
+               wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT);
+               wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT);
+
+               cycle_ctrl = IGC_PTM_CYCLE_CTRL_CYC_TIME(IGC_PTM_CYC_TIME_DEFAULT);
+
+               wr32(IGC_PTM_CYCLE_CTRL, cycle_ctrl);
+
+               ctrl = IGC_PTM_CTRL_EN |
+                       IGC_PTM_CTRL_START_NOW |
+                       IGC_PTM_CTRL_SHRT_CYC(IGC_PTM_SHORT_CYC_DEFAULT) |
+                       IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT) |
+                       IGC_PTM_CTRL_TRIG;
+
+               wr32(IGC_PTM_CTRL, ctrl);
+
+               /* Force the first cycle to run. */
+               wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID);
+
                break;
        default:
                /* No work to do. */
index 0f82990..e197a33 100644 (file)
@@ -67,6 +67,9 @@
 
 /* Filtering Registers */
 #define IGC_ETQF(_n)           (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
+#define IGC_FHFT(_n)           (0x09000 + (256 * (_n))) /* Flexible Host Filter */
+#define IGC_FHFT_EXT(_n)       (0x09A00 + (256 * (_n))) /* Flexible Host Filter Extended */
+#define IGC_FHFTSL             0x05804 /* Flex Filter indirect table select */
 
 /* ETQF register bit definitions */
 #define IGC_ETQF_FILTER_ENABLE BIT(26)
 #define IGC_ETQF_QUEUE_MASK    0x00070000
 #define IGC_ETQF_ETYPE_MASK    0x0000FFFF
 
+/* FHFT register bit definitions */
+#define IGC_FHFT_LENGTH_MASK   GENMASK(7, 0)
+#define IGC_FHFT_QUEUE_SHIFT   8
+#define IGC_FHFT_QUEUE_MASK    GENMASK(10, 8)
+#define IGC_FHFT_PRIO_SHIFT    16
+#define IGC_FHFT_PRIO_MASK     GENMASK(18, 16)
+#define IGC_FHFT_IMM_INT       BIT(24)
+#define IGC_FHFT_DROP          BIT(25)
+
+/* FHFTSL register bit definitions */
+#define IGC_FHFTSL_FTSL_SHIFT  0
+#define IGC_FHFTSL_FTSL_MASK   GENMASK(1, 0)
+
 /* Redirection Table - RW Array */
 #define IGC_RETA(_i)           (0x05C00 + ((_i) * 4))
 /* RSS Random Key - RW Array */
 #define IGC_ENDQT(_n)          (0x3334 + 0x4 * (_n))
 #define IGC_DTXMXPKTSZ         0x355C
 
+#define IGC_TQAVCC(_n)         (0x3004 + ((_n) * 0x40))
+#define IGC_TQAVHC(_n)         (0x300C + ((_n) * 0x40))
+
 /* System Time Registers */
 #define IGC_SYSTIML    0x0B600  /* System time register Low - RO */
 #define IGC_SYSTIMH    0x0B604  /* System time register High - RO */
 #define IGC_TXSTMPL    0x0B618  /* Tx timestamp value Low - RO */
 #define IGC_TXSTMPH    0x0B61C  /* Tx timestamp value High - RO */
 
+#define IGC_TIMADJ     0x0B60C  /* Time Adjustment Offset Register */
+
+/* PCIe Registers */
+#define IGC_PTM_CTRL           0x12540  /* PTM Control */
+#define IGC_PTM_STAT           0x12544  /* PTM Status */
+#define IGC_PTM_CYCLE_CTRL     0x1254C  /* PTM Cycle Control */
+
+/* PTM Time registers */
+#define IGC_PTM_T1_TIM0_L      0x12558  /* T1 on Timer 0 Low */
+#define IGC_PTM_T1_TIM0_H      0x1255C  /* T1 on Timer 0 High */
+
+#define IGC_PTM_CURR_T2_L      0x1258C  /* Current T2 Low */
+#define IGC_PTM_CURR_T2_H      0x12590  /* Current T2 High */
+#define IGC_PTM_PREV_T2_L      0x12584  /* Previous T2 Low */
+#define IGC_PTM_PREV_T2_H      0x12588  /* Previous T2 High */
+#define IGC_PTM_PREV_T4M1      0x12578  /* T4 Minus T1 on previous PTM Cycle */
+#define IGC_PTM_CURR_T4M1      0x1257C  /* T4 Minus T1 on this PTM Cycle */
+#define IGC_PTM_PREV_T3M2      0x12580  /* T3 Minus T2 on previous PTM Cycle */
+#define IGC_PTM_TDELAY         0x12594  /* PTM PCIe Link Delay */
+
+#define IGC_PCIE_DIG_DELAY     0x12550  /* PCIe Digital Delay */
+#define IGC_PCIE_PHY_DELAY     0x12554  /* PCIe PHY Delay */
+
 /* Management registers */
 #define IGC_MANC       0x05820  /* Management Control - RW */
 
 #define IGC_WUFC       0x05808  /* Wakeup Filter Control - RW */
 #define IGC_WUS                0x05810  /* Wakeup Status - R/W1C */
 #define IGC_WUPL       0x05900  /* Wakeup Packet Length - RW */
+#define IGC_WUFC_EXT   0x0580C  /* Wakeup Filter Control Register Extended - RW */
 
 /* Wake Up packet memory */
 #define IGC_WUPM_REG(_i)       (0x05A00 + ((_i) * 4))
index 174103c..0fce22d 100644 (file)
@@ -18,8 +18,38 @@ static bool is_any_launchtime(struct igc_adapter *adapter)
        return false;
 }
 
+static bool is_cbs_enabled(struct igc_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igc_ring *ring = adapter->tx_ring[i];
+
+               if (ring->cbs_enable)
+                       return true;
+       }
+
+       return false;
+}
+
+static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter)
+{
+       unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED;
+
+       if (adapter->base_time)
+               new_flags |= IGC_FLAG_TSN_QBV_ENABLED;
+
+       if (is_any_launchtime(adapter))
+               new_flags |= IGC_FLAG_TSN_QBV_ENABLED;
+
+       if (is_cbs_enabled(adapter))
+               new_flags |= IGC_FLAG_TSN_QAV_ENABLED;
+
+       return new_flags;
+}
+
 /* Returns the TSN specific registers to their default values after
- * TSN offloading is disabled.
+ * the adapter is reset.
  */
 static int igc_tsn_disable_offload(struct igc_adapter *adapter)
 {
@@ -27,11 +57,6 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
        u32 tqavctrl;
        int i;
 
-       if (!(adapter->flags & IGC_FLAG_TSN_QBV_ENABLED))
-               return 0;
-
-       adapter->cycle_time = 0;
-
        wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
        wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT);
 
@@ -41,18 +66,12 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
        wr32(IGC_TQAVCTRL, tqavctrl);
 
        for (i = 0; i < adapter->num_tx_queues; i++) {
-               struct igc_ring *ring = adapter->tx_ring[i];
-
-               ring->start_time = 0;
-               ring->end_time = 0;
-               ring->launchtime_enable = false;
-
                wr32(IGC_TXQCTL(i), 0);
                wr32(IGC_STQT(i), 0);
                wr32(IGC_ENDQT(i), NSEC_PER_SEC);
        }
 
-       wr32(IGC_QBVCYCLET_S, NSEC_PER_SEC);
+       wr32(IGC_QBVCYCLET_S, 0);
        wr32(IGC_QBVCYCLET, NSEC_PER_SEC);
 
        adapter->flags &= ~IGC_FLAG_TSN_QBV_ENABLED;
@@ -68,9 +87,6 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
        ktime_t base_time, systim;
        int i;
 
-       if (adapter->flags & IGC_FLAG_TSN_QBV_ENABLED)
-               return 0;
-
        cycle = adapter->cycle_time;
        base_time = adapter->base_time;
 
@@ -88,6 +104,8 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
        for (i = 0; i < adapter->num_tx_queues; i++) {
                struct igc_ring *ring = adapter->tx_ring[i];
                u32 txqctl = 0;
+               u16 cbs_value;
+               u32 tqavcc;
 
                wr32(IGC_STQT(i), ring->start_time);
                wr32(IGC_ENDQT(i), ring->end_time);
@@ -105,6 +123,90 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
                if (ring->launchtime_enable)
                        txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
 
+               /* Skip configuring CBS for Q2 and Q3 */
+               if (i > 1)
+                       goto skip_cbs;
+
+               if (ring->cbs_enable) {
+                       if (i == 0)
+                               txqctl |= IGC_TXQCTL_QAV_SEL_CBS0;
+                       else
+                               txqctl |= IGC_TXQCTL_QAV_SEL_CBS1;
+
+                       /* According to i225 datasheet section 7.5.2.7, we
+                        * should set the 'idleSlope' field from TQAVCC
+                        * register following the equation:
+                        *
+                        * value = link-speed   0x7736 * BW * 0.2
+                        *         ---------- *  -----------------         (E1)
+                        *          100Mbps            2.5
+                        *
+                        * Note that 'link-speed' is in Mbps.
+                        *
+                        * 'BW' is the percentage bandwidth out of full
+                        * link speed which can be found with the
+                        * following equation. Note that idleSlope here
+                        * is the parameter from this function
+                        * which is in kbps.
+                        *
+                        *     BW =     idleSlope
+                        *          -----------------                      (E2)
+                        *          link-speed * 1000
+                        *
+                        * That said, we can come up with a generic
+                        * equation to calculate the value we should set
+                        * it TQAVCC register by replacing 'BW' in E1 by E2.
+                        * The resulting equation is:
+                        *
+                        * value = link-speed * 0x7736 * idleSlope * 0.2
+                        *         -------------------------------------   (E3)
+                        *             100 * 2.5 * link-speed * 1000
+                        *
+                        * 'link-speed' is present in both sides of the
+                        * fraction so it is canceled out. The final
+                        * equation is the following:
+                        *
+                        *     value = idleSlope * 61036
+                        *             -----------------                   (E4)
+                        *                  2500000
+                        *
+                        * NOTE: For i225, given the above, we can see
+                        *       that idleslope is represented in
+                        *       40.959433 kbps units by the value at
+                        *       the TQAVCC register (2.5Gbps / 61036),
+                        *       which reduces the granularity for
+                        *       idleslope increments.
+                        *
+                        * In i225 controller, the sendSlope and loCredit
+                        * parameters from CBS are not configurable
+                        * by software so we don't do any
+                        * 'controller configuration' in respect to
+                        * these parameters.
+                        */
+                       cbs_value = DIV_ROUND_UP_ULL(ring->idleslope
+                                                    * 61036ULL, 2500000);
+
+                       tqavcc = rd32(IGC_TQAVCC(i));
+                       tqavcc &= ~IGC_TQAVCC_IDLESLOPE_MASK;
+                       tqavcc |= cbs_value | IGC_TQAVCC_KEEP_CREDITS;
+                       wr32(IGC_TQAVCC(i), tqavcc);
+
+                       wr32(IGC_TQAVHC(i),
+                            0x80000000 + ring->hicredit * 0x7735);
+               } else {
+                       /* Disable any CBS for the queue */
+                       txqctl &= ~(IGC_TXQCTL_QAV_SEL_MASK);
+
+                       /* Set idleSlope to zero. */
+                       tqavcc = rd32(IGC_TQAVCC(i));
+                       tqavcc &= ~(IGC_TQAVCC_IDLESLOPE_MASK |
+                                   IGC_TQAVCC_KEEP_CREDITS);
+                       wr32(IGC_TQAVCC(i), tqavcc);
+
+                       /* Set hiCredit to zero. */
+                       wr32(IGC_TQAVHC(i), 0);
+               }
+skip_cbs:
                wr32(IGC_TXQCTL(i), txqctl);
        }
 
@@ -125,33 +227,41 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
        wr32(IGC_BASET_H, baset_h);
        wr32(IGC_BASET_L, baset_l);
 
-       adapter->flags |= IGC_FLAG_TSN_QBV_ENABLED;
-
        return 0;
 }
 
-int igc_tsn_offload_apply(struct igc_adapter *adapter)
+int igc_tsn_reset(struct igc_adapter *adapter)
 {
-       bool is_any_enabled = adapter->base_time || is_any_launchtime(adapter);
+       unsigned int new_flags;
+       int err = 0;
 
-       if (!(adapter->flags & IGC_FLAG_TSN_QBV_ENABLED) && !is_any_enabled)
-               return 0;
+       new_flags = igc_tsn_new_flags(adapter);
 
-       if (!is_any_enabled) {
-               int err = igc_tsn_disable_offload(adapter);
+       if (!(new_flags & IGC_FLAG_TSN_ANY_ENABLED))
+               return igc_tsn_disable_offload(adapter);
 
-               if (err < 0)
-                       return err;
+       err = igc_tsn_enable_offload(adapter);
+       if (err < 0)
+               return err;
 
-               /* The BASET registers aren't cleared when writing
-                * into them, force a reset if the interface is
-                * running.
-                */
-               if (netif_running(adapter->netdev))
-                       schedule_work(&adapter->reset_task);
+       adapter->flags = new_flags;
 
+       return err;
+}
+
+int igc_tsn_offload_apply(struct igc_adapter *adapter)
+{
+       int err;
+
+       if (netif_running(adapter->netdev)) {
+               schedule_work(&adapter->reset_task);
                return 0;
        }
 
-       return igc_tsn_enable_offload(adapter);
+       err = igc_tsn_enable_offload(adapter);
+       if (err < 0)
+               return err;
+
+       adapter->flags = igc_tsn_new_flags(adapter);
+       return 0;
 }
index f76bc86..1512307 100644 (file)
@@ -5,5 +5,6 @@
 #define _IGC_TSN_H_
 
 int igc_tsn_offload_apply(struct igc_adapter *adapter);
+int igc_tsn_reset(struct igc_adapter *adapter);
 
 #endif /* _IGC_BASE_H */
index 4ceaca0..fc26e4d 100644 (file)
@@ -2358,7 +2358,9 @@ static int ixgbe_set_phys_id(struct net_device *netdev,
 }
 
 static int ixgbe_get_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -2412,7 +2414,9 @@ static bool ixgbe_update_rsc(struct ixgbe_adapter *adapter)
 }
 
 static int ixgbe_set_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct ixgbe_adapter *adapter = netdev_priv(netdev);
        struct ixgbe_q_vector *q_vector;
index 14aea40..24e06ba 100644 (file)
@@ -10247,7 +10247,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
        .ndo_set_tx_maxrate     = ixgbe_tx_maxrate,
        .ndo_vlan_rx_add_vid    = ixgbe_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = ixgbe_vlan_rx_kill_vid,
-       .ndo_do_ioctl           = ixgbe_ioctl,
+       .ndo_eth_ioctl          = ixgbe_ioctl,
        .ndo_set_vf_mac         = ixgbe_ndo_set_vf_mac,
        .ndo_set_vf_vlan        = ixgbe_ndo_set_vf_vlan,
        .ndo_set_vf_rate        = ixgbe_ndo_set_vf_bw,
index e49fb1c..8380f90 100644 (file)
@@ -787,7 +787,9 @@ static int ixgbevf_nway_reset(struct net_device *netdev)
 }
 
 static int ixgbevf_get_coalesce(struct net_device *netdev,
-                               struct ethtool_coalesce *ec)
+                               struct ethtool_coalesce *ec,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 
@@ -811,7 +813,9 @@ static int ixgbevf_get_coalesce(struct net_device *netdev,
 }
 
 static int ixgbevf_set_coalesce(struct net_device *netdev,
-                               struct ethtool_coalesce *ec)
+                               struct ethtool_coalesce *ec,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct ixgbevf_adapter *adapter = netdev_priv(netdev);
        struct ixgbevf_q_vector *q_vector;
index f1b9284..1bdc4f2 100644 (file)
@@ -734,17 +734,17 @@ jme_make_new_rx_buf(struct jme_adapter *jme, int i)
        if (unlikely(!skb))
                return -ENOMEM;
 
-       mapping = pci_map_page(jme->pdev, virt_to_page(skb->data),
+       mapping = dma_map_page(&jme->pdev->dev, virt_to_page(skb->data),
                               offset_in_page(skb->data), skb_tailroom(skb),
-                              PCI_DMA_FROMDEVICE);
-       if (unlikely(pci_dma_mapping_error(jme->pdev, mapping))) {
+                              DMA_FROM_DEVICE);
+       if (unlikely(dma_mapping_error(&jme->pdev->dev, mapping))) {
                dev_kfree_skb(skb);
                return -ENOMEM;
        }
 
        if (likely(rxbi->mapping))
-               pci_unmap_page(jme->pdev, rxbi->mapping,
-                              rxbi->len, PCI_DMA_FROMDEVICE);
+               dma_unmap_page(&jme->pdev->dev, rxbi->mapping, rxbi->len,
+                              DMA_FROM_DEVICE);
 
        rxbi->skb = skb;
        rxbi->len = skb_tailroom(skb);
@@ -760,10 +760,8 @@ jme_free_rx_buf(struct jme_adapter *jme, int i)
        rxbi += i;
 
        if (rxbi->skb) {
-               pci_unmap_page(jme->pdev,
-                                rxbi->mapping,
-                                rxbi->len,
-                                PCI_DMA_FROMDEVICE);
+               dma_unmap_page(&jme->pdev->dev, rxbi->mapping, rxbi->len,
+                              DMA_FROM_DEVICE);
                dev_kfree_skb(rxbi->skb);
                rxbi->skb = NULL;
                rxbi->mapping = 0;
@@ -1005,16 +1003,12 @@ jme_alloc_and_feed_skb(struct jme_adapter *jme, int idx)
        rxbi += idx;
 
        skb = rxbi->skb;
-       pci_dma_sync_single_for_cpu(jme->pdev,
-                                       rxbi->mapping,
-                                       rxbi->len,
-                                       PCI_DMA_FROMDEVICE);
+       dma_sync_single_for_cpu(&jme->pdev->dev, rxbi->mapping, rxbi->len,
+                               DMA_FROM_DEVICE);
 
        if (unlikely(jme_make_new_rx_buf(jme, idx))) {
-               pci_dma_sync_single_for_device(jme->pdev,
-                                               rxbi->mapping,
-                                               rxbi->len,
-                                               PCI_DMA_FROMDEVICE);
+               dma_sync_single_for_device(&jme->pdev->dev, rxbi->mapping,
+                                          rxbi->len, DMA_FROM_DEVICE);
 
                ++(NET_STAT(jme).rx_dropped);
        } else {
@@ -1453,10 +1447,9 @@ static void jme_tx_clean_tasklet(struct tasklet_struct *t)
                                ttxbi = txbi + ((i + j) & (mask));
                                txdesc[(i + j) & (mask)].dw[0] = 0;
 
-                               pci_unmap_page(jme->pdev,
-                                                ttxbi->mapping,
-                                                ttxbi->len,
-                                                PCI_DMA_TODEVICE);
+                               dma_unmap_page(&jme->pdev->dev,
+                                              ttxbi->mapping, ttxbi->len,
+                                              DMA_TO_DEVICE);
 
                                ttxbi->mapping = 0;
                                ttxbi->len = 0;
@@ -1966,19 +1959,13 @@ jme_fill_tx_map(struct pci_dev *pdev,
 {
        dma_addr_t dmaaddr;
 
-       dmaaddr = pci_map_page(pdev,
-                               page,
-                               page_offset,
-                               len,
-                               PCI_DMA_TODEVICE);
+       dmaaddr = dma_map_page(&pdev->dev, page, page_offset, len,
+                              DMA_TO_DEVICE);
 
-       if (unlikely(pci_dma_mapping_error(pdev, dmaaddr)))
+       if (unlikely(dma_mapping_error(&pdev->dev, dmaaddr)))
                return -EINVAL;
 
-       pci_dma_sync_single_for_device(pdev,
-                                      dmaaddr,
-                                      len,
-                                      PCI_DMA_TODEVICE);
+       dma_sync_single_for_device(&pdev->dev, dmaaddr, len, DMA_TO_DEVICE);
 
        txdesc->dw[0] = 0;
        txdesc->dw[1] = 0;
@@ -2003,10 +1990,8 @@ static void jme_drop_tx_map(struct jme_adapter *jme, int startidx, int count)
 
        for (j = 0 ; j < count ; j++) {
                ctxbi = txbi + ((startidx + j + 2) & (mask));
-               pci_unmap_page(jme->pdev,
-                               ctxbi->mapping,
-                               ctxbi->len,
-                               PCI_DMA_TODEVICE);
+               dma_unmap_page(&jme->pdev->dev, ctxbi->mapping, ctxbi->len,
+                              DMA_TO_DEVICE);
 
                ctxbi->mapping = 0;
                ctxbi->len = 0;
@@ -2400,8 +2385,10 @@ jme_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
        mdio_memcpy(jme, p32, JME_PHY_REG_NR);
 }
 
-static int
-jme_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecmd)
+static int jme_get_coalesce(struct net_device *netdev,
+                           struct ethtool_coalesce *ecmd,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct jme_adapter *jme = netdev_priv(netdev);
 
@@ -2437,8 +2424,10 @@ jme_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecmd)
        return 0;
 }
 
-static int
-jme_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecmd)
+static int jme_set_coalesce(struct net_device *netdev,
+                           struct ethtool_coalesce *ecmd,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct jme_adapter *jme = netdev_priv(netdev);
        struct dynpcc_info *dpi = &(jme->dpi);
@@ -2859,18 +2848,15 @@ static int
 jme_pci_dma64(struct pci_dev *pdev)
 {
        if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 &&
-           !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
-               if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
-                       return 1;
+           !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)))
+               return 1;
 
        if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 &&
-           !pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
-               if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)))
-                       return 1;
+           !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)))
+               return 1;
 
-       if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
-               if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))
-                       return 0;
+       if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+               return 0;
 
        return -1;
 }
@@ -2901,7 +2887,7 @@ static const struct net_device_ops jme_netdev_ops = {
        .ndo_open               = jme_open,
        .ndo_stop               = jme_close,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = jme_ioctl,
+       .ndo_eth_ioctl          = jme_ioctl,
        .ndo_start_xmit         = jme_start_xmit,
        .ndo_set_mac_address    = jme_set_macaddr,
        .ndo_set_rx_mode        = jme_set_multi,
index b30a457..3e9f324 100644 (file)
@@ -1272,7 +1272,7 @@ static const struct net_device_ops korina_netdev_ops = {
        .ndo_start_xmit         = korina_send_packet,
        .ndo_set_rx_mode        = korina_multicast_list,
        .ndo_tx_timeout         = korina_tx_timeout,
-       .ndo_do_ioctl           = korina_ioctl,
+       .ndo_eth_ioctl          = korina_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 2d0c52f..62f8c52 100644 (file)
@@ -609,7 +609,7 @@ static const struct net_device_ops ltq_eth_netdev_ops = {
        .ndo_stop = ltq_etop_stop,
        .ndo_start_xmit = ltq_etop_tx,
        .ndo_change_mtu = ltq_etop_change_mtu,
-       .ndo_do_ioctl = phy_do_ioctl,
+       .ndo_eth_ioctl = phy_do_ioctl,
        .ndo_set_mac_address = ltq_etop_set_mac_address,
        .ndo_validate_addr = eth_validate_addr,
        .ndo_set_rx_mode = ltq_etop_set_multicast_list,
diff --git a/drivers/net/ethernet/litex/Kconfig b/drivers/net/ethernet/litex/Kconfig
new file mode 100644 (file)
index 0000000..63bf01d
--- /dev/null
@@ -0,0 +1,28 @@
+#
+# LiteX device configuration
+#
+
+config NET_VENDOR_LITEX
+       bool "LiteX devices"
+       default y
+       help
+         If you have a network (Ethernet) card belonging to this class, say Y.
+
+         Note that the answer to this question doesn't directly affect the
+         kernel: saying N will just cause the configurator to skip all
+         the questions about LiteX devices. If you say Y, you will be asked
+         for your specific card in the following questions.
+
+if NET_VENDOR_LITEX
+
+config LITEX_LITEETH
+       tristate "LiteX Ethernet support"
+       depends on OF_NET
+       help
+         If you wish to compile a kernel for hardware with a LiteX LiteEth
+         device then you should answer Y to this.
+
+         LiteX is a soft system-on-chip that targets FPGAs. LiteETH is a basic
+         network device that is commonly used in LiteX designs.
+
+endif # NET_VENDOR_LITEX
diff --git a/drivers/net/ethernet/litex/Makefile b/drivers/net/ethernet/litex/Makefile
new file mode 100644 (file)
index 0000000..9343b73
--- /dev/null
@@ -0,0 +1,5 @@
+#
+# Makefile for the LiteX network device drivers.
+#
+
+obj-$(CONFIG_LITEX_LITEETH) += litex_liteeth.o
diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c
new file mode 100644 (file)
index 0000000..a9bdbf0
--- /dev/null
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LiteX Liteeth Ethernet
+ *
+ * Copyright 2017 Joel Stanley <joel@jms.id.au>
+ *
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/litex.h>
+#include <linux/module.h>
+#include <linux/of_net.h>
+#include <linux/platform_device.h>
+
+#define LITEETH_WRITER_SLOT       0x00
+#define LITEETH_WRITER_LENGTH     0x04
+#define LITEETH_WRITER_ERRORS     0x08
+#define LITEETH_WRITER_EV_STATUS  0x0C
+#define LITEETH_WRITER_EV_PENDING 0x10
+#define LITEETH_WRITER_EV_ENABLE  0x14
+#define LITEETH_READER_START      0x18
+#define LITEETH_READER_READY      0x1C
+#define LITEETH_READER_LEVEL      0x20
+#define LITEETH_READER_SLOT       0x24
+#define LITEETH_READER_LENGTH     0x28
+#define LITEETH_READER_EV_STATUS  0x2C
+#define LITEETH_READER_EV_PENDING 0x30
+#define LITEETH_READER_EV_ENABLE  0x34
+#define LITEETH_PREAMBLE_CRC      0x38
+#define LITEETH_PREAMBLE_ERRORS   0x3C
+#define LITEETH_CRC_ERRORS        0x40
+
+#define LITEETH_PHY_CRG_RESET     0x00
+#define LITEETH_MDIO_W            0x04
+#define LITEETH_MDIO_R            0x0C
+
+#define DRV_NAME       "liteeth"
+
+struct liteeth {
+       void __iomem *base;
+       struct net_device *netdev;
+       struct device *dev;
+       u32 slot_size;
+
+       /* Tx */
+       u32 tx_slot;
+       u32 num_tx_slots;
+       void __iomem *tx_base;
+
+       /* Rx */
+       u32 rx_slot;
+       u32 num_rx_slots;
+       void __iomem *rx_base;
+};
+
+static int liteeth_rx(struct net_device *netdev)
+{
+       struct liteeth *priv = netdev_priv(netdev);
+       struct sk_buff *skb;
+       unsigned char *data;
+       u8 rx_slot;
+       int len;
+
+       rx_slot = litex_read8(priv->base + LITEETH_WRITER_SLOT);
+       len = litex_read32(priv->base + LITEETH_WRITER_LENGTH);
+
+       if (len == 0 || len > 2048)
+               goto rx_drop;
+
+       skb = netdev_alloc_skb_ip_align(netdev, len);
+       if (!skb) {
+               netdev_err(netdev, "couldn't get memory\n");
+               goto rx_drop;
+       }
+
+       data = skb_put(skb, len);
+       memcpy_fromio(data, priv->rx_base + rx_slot * priv->slot_size, len);
+       skb->protocol = eth_type_trans(skb, netdev);
+
+       netdev->stats.rx_packets++;
+       netdev->stats.rx_bytes += len;
+
+       return netif_rx(skb);
+
+rx_drop:
+       netdev->stats.rx_dropped++;
+       netdev->stats.rx_errors++;
+
+       return NET_RX_DROP;
+}
+
+static irqreturn_t liteeth_interrupt(int irq, void *dev_id)
+{
+       struct net_device *netdev = dev_id;
+       struct liteeth *priv = netdev_priv(netdev);
+       u8 reg;
+
+       reg = litex_read8(priv->base + LITEETH_READER_EV_PENDING);
+       if (reg) {
+               if (netif_queue_stopped(netdev))
+                       netif_wake_queue(netdev);
+               litex_write8(priv->base + LITEETH_READER_EV_PENDING, reg);
+       }
+
+       reg = litex_read8(priv->base + LITEETH_WRITER_EV_PENDING);
+       if (reg) {
+               liteeth_rx(netdev);
+               litex_write8(priv->base + LITEETH_WRITER_EV_PENDING, reg);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static int liteeth_open(struct net_device *netdev)
+{
+       struct liteeth *priv = netdev_priv(netdev);
+       int err;
+
+       /* Clear pending events */
+       litex_write8(priv->base + LITEETH_WRITER_EV_PENDING, 1);
+       litex_write8(priv->base + LITEETH_READER_EV_PENDING, 1);
+
+       err = request_irq(netdev->irq, liteeth_interrupt, 0, netdev->name, netdev);
+       if (err) {
+               netdev_err(netdev, "failed to request irq %d\n", netdev->irq);
+               return err;
+       }
+
+       /* Enable IRQs */
+       litex_write8(priv->base + LITEETH_WRITER_EV_ENABLE, 1);
+       litex_write8(priv->base + LITEETH_READER_EV_ENABLE, 1);
+
+       netif_carrier_on(netdev);
+       netif_start_queue(netdev);
+
+       return 0;
+}
+
+static int liteeth_stop(struct net_device *netdev)
+{
+       struct liteeth *priv = netdev_priv(netdev);
+
+       netif_stop_queue(netdev);
+       netif_carrier_off(netdev);
+
+       litex_write8(priv->base + LITEETH_WRITER_EV_ENABLE, 0);
+       litex_write8(priv->base + LITEETH_READER_EV_ENABLE, 0);
+
+       free_irq(netdev->irq, netdev);
+
+       return 0;
+}
+
+static int liteeth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct liteeth *priv = netdev_priv(netdev);
+       void __iomem *txbuffer;
+
+       if (!litex_read8(priv->base + LITEETH_READER_READY)) {
+               if (net_ratelimit())
+                       netdev_err(netdev, "LITEETH_READER_READY not ready\n");
+
+               netif_stop_queue(netdev);
+
+               return NETDEV_TX_BUSY;
+       }
+
+       /* Reject oversize packets */
+       if (unlikely(skb->len > priv->slot_size)) {
+               if (net_ratelimit())
+                       netdev_err(netdev, "tx packet too big\n");
+
+               dev_kfree_skb_any(skb);
+               netdev->stats.tx_dropped++;
+               netdev->stats.tx_errors++;
+
+               return NETDEV_TX_OK;
+       }
+
+       txbuffer = priv->tx_base + priv->tx_slot * priv->slot_size;
+       memcpy_toio(txbuffer, skb->data, skb->len);
+       litex_write8(priv->base + LITEETH_READER_SLOT, priv->tx_slot);
+       litex_write16(priv->base + LITEETH_READER_LENGTH, skb->len);
+       litex_write8(priv->base + LITEETH_READER_START, 1);
+
+       netdev->stats.tx_bytes += skb->len;
+       netdev->stats.tx_packets++;
+
+       priv->tx_slot = (priv->tx_slot + 1) % priv->num_tx_slots;
+       dev_kfree_skb_any(skb);
+
+       return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops liteeth_netdev_ops = {
+       .ndo_open               = liteeth_open,
+       .ndo_stop               = liteeth_stop,
+       .ndo_start_xmit         = liteeth_start_xmit,
+};
+
+static void liteeth_setup_slots(struct liteeth *priv)
+{
+       struct device_node *np = priv->dev->of_node;
+       int err;
+
+       err = of_property_read_u32(np, "litex,rx-slots", &priv->num_rx_slots);
+       if (err) {
+               dev_dbg(priv->dev, "unable to get litex,rx-slots, using 2\n");
+               priv->num_rx_slots = 2;
+       }
+
+       err = of_property_read_u32(np, "litex,tx-slots", &priv->num_tx_slots);
+       if (err) {
+               dev_dbg(priv->dev, "unable to get litex,tx-slots, using 2\n");
+               priv->num_tx_slots = 2;
+       }
+
+       err = of_property_read_u32(np, "litex,slot-size", &priv->slot_size);
+       if (err) {
+               dev_dbg(priv->dev, "unable to get litex,slot-size, using 0x800\n");
+               priv->slot_size = 0x800;
+       }
+}
+
+static int liteeth_probe(struct platform_device *pdev)
+{
+       struct net_device *netdev;
+       void __iomem *buf_base;
+       struct liteeth *priv;
+       int irq, err;
+
+       netdev = devm_alloc_etherdev(&pdev->dev, sizeof(*priv));
+       if (!netdev)
+               return -ENOMEM;
+
+       SET_NETDEV_DEV(netdev, &pdev->dev);
+       platform_set_drvdata(pdev, netdev);
+
+       priv = netdev_priv(netdev);
+       priv->netdev = netdev;
+       priv->dev = &pdev->dev;
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0) {
+               dev_err(&pdev->dev, "Failed to get IRQ %d\n", irq);
+               return irq;
+       }
+       netdev->irq = irq;
+
+       priv->base = devm_platform_ioremap_resource_byname(pdev, "mac");
+       if (IS_ERR(priv->base))
+               return PTR_ERR(priv->base);
+
+       buf_base = devm_platform_ioremap_resource_byname(pdev, "buffer");
+       if (IS_ERR(buf_base))
+               return PTR_ERR(buf_base);
+
+       liteeth_setup_slots(priv);
+
+       /* Rx slots */
+       priv->rx_base = buf_base;
+       priv->rx_slot = 0;
+
+       /* Tx slots come after Rx slots */
+       priv->tx_base = buf_base + priv->num_rx_slots * priv->slot_size;
+       priv->tx_slot = 0;
+
+       err = of_get_mac_address(pdev->dev.of_node, netdev->dev_addr);
+       if (err)
+               eth_hw_addr_random(netdev);
+
+       netdev->netdev_ops = &liteeth_netdev_ops;
+
+       err = register_netdev(netdev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to register netdev %d\n", err);
+               return err;
+       }
+
+       netdev_info(netdev, "irq %d slots: tx %d rx %d size %d\n",
+                   netdev->irq, priv->num_tx_slots, priv->num_rx_slots, priv->slot_size);
+
+       return 0;
+}
+
+static int liteeth_remove(struct platform_device *pdev)
+{
+       struct net_device *netdev = platform_get_drvdata(pdev);
+
+       unregister_netdev(netdev);
+       free_netdev(netdev);
+
+       return 0;
+}
+
+static const struct of_device_id liteeth_of_match[] = {
+       { .compatible = "litex,liteeth" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, liteeth_of_match);
+
+static struct platform_driver liteeth_driver = {
+       .probe = liteeth_probe,
+       .remove = liteeth_remove,
+       .driver = {
+               .name = DRV_NAME,
+               .of_match_table = liteeth_of_match,
+       },
+};
+module_platform_driver(liteeth_driver);
+
+MODULE_AUTHOR("Joel Stanley <joel@jms.id.au>");
+MODULE_LICENSE("GPL");
index d207bfc..28d5ad2 100644 (file)
@@ -1611,8 +1611,10 @@ static void mv643xx_eth_get_drvinfo(struct net_device *dev,
        strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
 }
 
-static int
-mv643xx_eth_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int mv643xx_eth_get_coalesce(struct net_device *dev,
+                                   struct ethtool_coalesce *ec,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct mv643xx_eth_private *mp = netdev_priv(dev);
 
@@ -1622,8 +1624,10 @@ mv643xx_eth_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
        return 0;
 }
 
-static int
-mv643xx_eth_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int mv643xx_eth_set_coalesce(struct net_device *dev,
+                                   struct ethtool_coalesce *ec,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct mv643xx_eth_private *mp = netdev_priv(dev);
 
@@ -3060,7 +3064,7 @@ static const struct net_device_ops mv643xx_eth_netdev_ops = {
        .ndo_set_rx_mode        = mv643xx_eth_set_rx_mode,
        .ndo_set_mac_address    = mv643xx_eth_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = mv643xx_eth_ioctl,
+       .ndo_eth_ioctl          = mv643xx_eth_ioctl,
        .ndo_change_mtu         = mv643xx_eth_change_mtu,
        .ndo_set_features       = mv643xx_eth_set_features,
        .ndo_tx_timeout         = mv643xx_eth_tx_timeout,
index de32e5b..9d460a2 100644 (file)
@@ -2327,7 +2327,7 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
        if (!skb)
                return ERR_PTR(-ENOMEM);
 
-       skb_mark_for_recycle(skb, virt_to_page(xdp->data), pool);
+       skb_mark_for_recycle(skb);
 
        skb_reserve(skb, xdp->data - xdp->data_hard_start);
        skb_put(skb, xdp->data_end - xdp->data);
@@ -2339,10 +2339,6 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
                                skb_frag_page(frag), skb_frag_off(frag),
                                skb_frag_size(frag), PAGE_SIZE);
-               /* We don't need to reset pp_recycle here. It's already set, so
-                * just mark fragments for recycling.
-                */
-               page_pool_store_mem_info(skb_frag_page(frag), pool);
        }
 
        return skb;
@@ -2666,7 +2662,7 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev,
                return 0;
 
        if (skb_headlen(skb) < (skb_transport_offset(skb) + tcp_hdrlen(skb))) {
-               pr_info("*** Is this even  possible???!?!?\n");
+               pr_info("*** Is this even possible?\n");
                return 0;
        }
 
@@ -3832,12 +3828,20 @@ static void mvneta_validate(struct phylink_config *config,
        struct mvneta_port *pp = netdev_priv(ndev);
        __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
-       /* We only support QSGMII, SGMII, 802.3z and RGMII modes */
-       if (state->interface != PHY_INTERFACE_MODE_NA &&
-           state->interface != PHY_INTERFACE_MODE_QSGMII &&
-           state->interface != PHY_INTERFACE_MODE_SGMII &&
-           !phy_interface_mode_is_8023z(state->interface) &&
-           !phy_interface_mode_is_rgmii(state->interface)) {
+       /* We only support QSGMII, SGMII, 802.3z and RGMII modes.
+        * When in 802.3z mode, we must have AN enabled:
+        * "Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
+        * When <PortType> = 1 (1000BASE-X) this field must be set to 1."
+        */
+       if (phy_interface_mode_is_8023z(state->interface)) {
+               if (!phylink_test(state->advertising, Autoneg)) {
+                       bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+                       return;
+               }
+       } else if (state->interface != PHY_INTERFACE_MODE_NA &&
+                  state->interface != PHY_INTERFACE_MODE_QSGMII &&
+                  state->interface != PHY_INTERFACE_MODE_SGMII &&
+                  !phy_interface_mode_is_rgmii(state->interface)) {
                bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
                return;
        }
@@ -4496,8 +4500,11 @@ static int mvneta_ethtool_nway_reset(struct net_device *dev)
 }
 
 /* Set interrupt coalescing for ethtools */
-static int mvneta_ethtool_set_coalesce(struct net_device *dev,
-                                      struct ethtool_coalesce *c)
+static int
+mvneta_ethtool_set_coalesce(struct net_device *dev,
+                           struct ethtool_coalesce *c,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct mvneta_port *pp = netdev_priv(dev);
        int queue;
@@ -4520,8 +4527,11 @@ static int mvneta_ethtool_set_coalesce(struct net_device *dev,
 }
 
 /* get coalescing for ethtools */
-static int mvneta_ethtool_get_coalesce(struct net_device *dev,
-                                      struct ethtool_coalesce *c)
+static int
+mvneta_ethtool_get_coalesce(struct net_device *dev,
+                           struct ethtool_coalesce *c,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct mvneta_port *pp = netdev_priv(dev);
 
@@ -4986,7 +4996,7 @@ static const struct net_device_ops mvneta_netdev_ops = {
        .ndo_change_mtu      = mvneta_change_mtu,
        .ndo_fix_features    = mvneta_fix_features,
        .ndo_get_stats64     = mvneta_get_stats64,
-       .ndo_do_ioctl        = mvneta_ioctl,
+       .ndo_eth_ioctl        = mvneta_ioctl,
        .ndo_bpf             = mvneta_xdp,
        .ndo_xdp_xmit        = mvneta_xdp_xmit,
        .ndo_setup_tc        = mvneta_setup_tc,
index 3229baf..d5c92e4 100644 (file)
@@ -3995,7 +3995,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
                }
 
                if (pp)
-                       skb_mark_for_recycle(skb, page, pp);
+                       skb_mark_for_recycle(skb);
                else
                        dma_unmap_single_attrs(dev->dev.parent, dma_addr,
                                               bm_pool->buf_size, DMA_FROM_DEVICE,
@@ -5367,8 +5367,11 @@ static int mvpp2_ethtool_nway_reset(struct net_device *dev)
 }
 
 /* Set interrupt coalescing for ethtools */
-static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
-                                     struct ethtool_coalesce *c)
+static int
+mvpp2_ethtool_set_coalesce(struct net_device *dev,
+                          struct ethtool_coalesce *c,
+                          struct kernel_ethtool_coalesce *kernel_coal,
+                          struct netlink_ext_ack *extack)
 {
        struct mvpp2_port *port = netdev_priv(dev);
        int queue;
@@ -5400,8 +5403,11 @@ static int mvpp2_ethtool_set_coalesce(struct net_device *dev,
 }
 
 /* get coalescing for ethtools */
-static int mvpp2_ethtool_get_coalesce(struct net_device *dev,
-                                     struct ethtool_coalesce *c)
+static int
+mvpp2_ethtool_get_coalesce(struct net_device *dev,
+                          struct ethtool_coalesce *c,
+                          struct kernel_ethtool_coalesce *kernel_coal,
+                          struct netlink_ext_ack *extack)
 {
        struct mvpp2_port *port = netdev_priv(dev);
 
@@ -5702,7 +5708,7 @@ static const struct net_device_ops mvpp2_netdev_ops = {
        .ndo_set_mac_address    = mvpp2_set_mac_address,
        .ndo_change_mtu         = mvpp2_change_mtu,
        .ndo_get_stats64        = mvpp2_get_stats64,
-       .ndo_do_ioctl           = mvpp2_ioctl,
+       .ndo_eth_ioctl          = mvpp2_ioctl,
        .ndo_vlan_rx_add_vid    = mvpp2_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = mvpp2_vlan_rx_kill_vid,
        .ndo_set_features       = mvpp2_set_features,
@@ -6269,6 +6275,15 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
                if (!mvpp2_port_supports_rgmii(port))
                        goto empty_set;
                break;
+       case PHY_INTERFACE_MODE_1000BASEX:
+       case PHY_INTERFACE_MODE_2500BASEX:
+               /* When in 802.3z mode, we must have AN enabled:
+                * Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
+                * When <PortType> = 1 (1000BASE-X) this field must be set to 1.
+                */
+               if (!phylink_test(state->advertising, Autoneg))
+                       goto empty_set;
+               break;
        default:
                break;
        }
index 16caa02..3f982cc 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
-# Marvell OcteonTX2 drivers configuration
+# Marvell RVU Network drivers configuration
 #
 
 config OCTEONTX2_MBOX
@@ -12,6 +12,7 @@ config OCTEONTX2_AF
        select NET_DEVLINK
        depends on (64BIT && COMPILE_TEST) || ARM64
        depends on PCI
+       depends on PTP_1588_CLOCK_OPTIONAL
        help
          This driver supports Marvell's OcteonTX2 Resource Virtualization
          Unit's admin function manager which manages all RVU HW resources
@@ -32,6 +33,7 @@ config OCTEONTX2_PF
        select OCTEONTX2_MBOX
        depends on (64BIT && COMPILE_TEST) || ARM64
        depends on PCI
+       depends on PTP_1588_CLOCK_OPTIONAL
        help
          This driver supports Marvell's OcteonTX2 NIC physical function.
 
index cc8ac36..7f4a4ca 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 #
-# Makefile for Marvell's OcteonTX2 RVU Admin Function driver
+# Makefile for Marvell's RVU Admin Function driver
 #
 
 ccflags-y += -I$(src)
@@ -10,4 +10,5 @@ obj-$(CONFIG_OCTEONTX2_AF) += rvu_af.o
 rvu_mbox-y := mbox.o rvu_trace.o
 rvu_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o \
                  rvu_reg.o rvu_npc.o rvu_debugfs.o ptp.o rvu_npc_fs.o \
-                 rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o
+                 rvu_cpt.o rvu_devlink.o rpm.o rvu_cn10k.o rvu_switch.o \
+                 rvu_sdp.o
index 544c96c..7f3d010 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Marvell OcteonTx2 CGX driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/acpi.h>
index 237ba2b..ab1e4ab 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 CGX driver
+/* Marvell OcteonTx2 CGX driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef CGX_H
index aa4e42f..f72ec0e 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 CGX driver
+/* Marvell OcteonTx2 CGX driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __CGX_FW_INTF_H__
index 47f5ed0..d9bea13 100644 (file)
@@ -1,11 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2018 Marvell.
  */
 
 #ifndef COMMON_H
@@ -64,8 +60,8 @@ static inline int qmem_alloc(struct device *dev, struct qmem **q,
 
        qmem->entry_sz = entry_sz;
        qmem->alloc_sz = (qsize * entry_sz) + OTX2_ALIGN;
-       qmem->base = dma_alloc_coherent(dev, qmem->alloc_sz,
-                                        &qmem->iova, GFP_KERNEL);
+       qmem->base = dma_alloc_attrs(dev, qmem->alloc_sz, &qmem->iova,
+                                    GFP_KERNEL, DMA_ATTR_FORCE_CONTIGUOUS);
        if (!qmem->base)
                return -ENOMEM;
 
@@ -84,9 +80,10 @@ static inline void qmem_free(struct device *dev, struct qmem *qmem)
                return;
 
        if (qmem->base)
-               dma_free_coherent(dev, qmem->alloc_sz,
-                                 qmem->base - qmem->align,
-                                 qmem->iova - qmem->align);
+               dma_free_attrs(dev, qmem->alloc_sz,
+                              qmem->base - qmem->align,
+                              qmem->iova - qmem->align,
+                              DMA_ATTR_FORCE_CONTIGUOUS);
        devm_kfree(dev, qmem);
 }
 
@@ -146,10 +143,7 @@ enum nix_scheduler {
 #define TXSCH_RR_QTM_MAX               ((1 << 24) - 1)
 #define TXSCH_TL1_DFLT_RR_QTM          TXSCH_RR_QTM_MAX
 #define TXSCH_TL1_DFLT_RR_PRIO         (0x1ull)
-#define MAX_SCHED_WEIGHT               0xFF
-#define DFLT_RR_WEIGHT                 71
-#define DFLT_RR_QTM    ((DFLT_RR_WEIGHT * TXSCH_RR_QTM_MAX) \
-                        / MAX_SCHED_WEIGHT)
+#define CN10K_MAX_DWRR_WEIGHT          16384 /* Weight is 14bit on CN10K */
 
 /* Min/Max packet sizes, excluding FCS */
 #define        NIC_HW_MIN_FRS                  40
@@ -187,15 +181,16 @@ enum nix_scheduler {
 
 #define NIX_INTF_TYPE_CGX              0
 #define NIX_INTF_TYPE_LBK              1
+#define NIX_INTF_TYPE_SDP              2
 
 #define MAX_LMAC_PKIND                 12
 #define NIX_LINK_CGX_LMAC(a, b)                (0 + 4 * (a) + (b))
 #define NIX_LINK_LBK(a)                        (12 + (a))
 #define NIX_CHAN_CGX_LMAC_CHX(a, b, c) (0x800 + 0x100 * (a) + 0x10 * (b) + (c))
 #define NIX_CHAN_LBK_CHX(a, b)         (0 + 0x100 * (a) + (b))
-#define NIX_CHAN_SDP_CH_START          (0x700ull)
-
-#define SDP_CHANNELS                   256
+#define NIX_CHAN_SDP_CH_START          (0x700ull)
+#define NIX_CHAN_SDP_CHX(a)            (NIX_CHAN_SDP_CH_START + (a))
+#define NIX_CHAN_SDP_NUM_CHANS         256
 
 /* The mask is to extract lower 10-bits of channel number
  * which CPT will pass to X2P.
index a8b7b1c..c38306b 100644 (file)
@@ -1,7 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 RPM driver
+/* Marvell CN10K RPM driver
  *
  * Copyright (C) 2020 Marvell.
+ *
  */
 
 #ifndef LMAC_COMMON_H
index 0a37ca9..2898931 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -412,5 +409,5 @@ const char *otx2_mbox_id2name(u16 id)
 }
 EXPORT_SYMBOL(otx2_mbox_id2name);
 
-MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_AUTHOR("Marvell.");
 MODULE_LICENSE("GPL v2");
index f5ec39d..1548777 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef MBOX_H
@@ -87,7 +84,7 @@ struct mbox_msghdr {
 #define OTX2_MBOX_REQ_SIG (0xdead)
 #define OTX2_MBOX_RSP_SIG (0xbeef)
        u16 sig;         /* Signature, for validating corrupted msgs */
-#define OTX2_MBOX_VERSION (0x0007)
+#define OTX2_MBOX_VERSION (0x0009)
        u16 ver;         /* Version of msg's structure for this ID */
        u16 next_msgoff; /* Offset of next msg within mailbox region */
        int rc;          /* Msg process'ed response code */
@@ -130,6 +127,7 @@ static inline struct mbox_msghdr *otx2_mbox_alloc_msg(struct otx2_mbox *mbox,
 M(READY,               0x001, ready, msg_req, ready_msg_rsp)           \
 M(ATTACH_RESOURCES,    0x002, attach_resources, rsrc_attach, msg_rsp)  \
 M(DETACH_RESOURCES,    0x003, detach_resources, rsrc_detach, msg_rsp)  \
+M(FREE_RSRC_CNT,       0x004, free_rsrc_cnt, msg_req, free_rsrcs_rsp)  \
 M(MSIX_OFFSET,         0x005, msix_offset, msg_req, msix_offset_rsp)   \
 M(VF_FLR,              0x006, vf_flr, msg_req, msg_rsp)                \
 M(PTP_OP,              0x007, ptp_op, ptp_req, ptp_rsp)                \
@@ -191,6 +189,9 @@ M(CPT_RD_WR_REGISTER,       0xA02, cpt_rd_wr_register,  cpt_rd_wr_reg_msg,  \
 M(CPT_STATS,            0xA05, cpt_sts, cpt_sts_req, cpt_sts_rsp)      \
 M(CPT_RXC_TIME_CFG,     0xA06, cpt_rxc_time_cfg, cpt_rxc_time_cfg_req,  \
                               msg_rsp)                                 \
+/* SDP mbox IDs (range 0x1000 - 0x11FF) */                             \
+M(SET_SDP_CHAN_INFO, 0x1000, set_sdp_chan_info, sdp_chan_info_msg, msg_rsp) \
+M(GET_SDP_CHAN_INFO, 0x1001, get_sdp_chan_info, msg_req, sdp_get_chan_info_msg) \
 /* NPC mbox IDs (range 0x6000 - 0x7FFF) */                             \
 M(NPC_MCAM_ALLOC_ENTRY,        0x6000, npc_mcam_alloc_entry, npc_mcam_alloc_entry_req,\
                                npc_mcam_alloc_entry_rsp)               \
@@ -243,7 +244,8 @@ M(NIX_HWCTX_DISABLE,        0x8003, nix_hwctx_disable,                      \
 M(NIX_TXSCH_ALLOC,     0x8004, nix_txsch_alloc,                        \
                                 nix_txsch_alloc_req, nix_txsch_alloc_rsp)   \
 M(NIX_TXSCH_FREE,      0x8005, nix_txsch_free, nix_txsch_free_req, msg_rsp) \
-M(NIX_TXSCHQ_CFG,      0x8006, nix_txschq_cfg, nix_txschq_config, msg_rsp)  \
+M(NIX_TXSCHQ_CFG,      0x8006, nix_txschq_cfg, nix_txschq_config,      \
+                               nix_txschq_config)                      \
 M(NIX_STATS_RST,       0x8007, nix_stats_rst, msg_req, msg_rsp)        \
 M(NIX_VTAG_CFG,                0x8008, nix_vtag_cfg, nix_vtag_config,          \
                                 nix_vtag_config_rsp)                   \
@@ -268,13 +270,15 @@ M(NIX_BP_ENABLE,  0x8016, nix_bp_enable, nix_bp_cfg_req,  \
                                nix_bp_cfg_rsp) \
 M(NIX_BP_DISABLE,      0x8017, nix_bp_disable, nix_bp_cfg_req, msg_rsp) \
 M(NIX_GET_MAC_ADDR, 0x8018, nix_get_mac_addr, msg_req, nix_get_mac_addr_rsp) \
-M(NIX_CN10K_AQ_ENQ,    0x8019, nix_cn10k_aq_enq, nix_cn10k_aq_enq_req, \
+M(NIX_CN10K_AQ_ENQ,    0x801b, nix_cn10k_aq_enq, nix_cn10k_aq_enq_req, \
                                nix_cn10k_aq_enq_rsp)                   \
 M(NIX_GET_HW_INFO,     0x801c, nix_get_hw_info, msg_req, nix_hw_info)  \
 M(NIX_BANDPROF_ALLOC,  0x801d, nix_bandprof_alloc, nix_bandprof_alloc_req, \
                                nix_bandprof_alloc_rsp)                     \
 M(NIX_BANDPROF_FREE,   0x801e, nix_bandprof_free, nix_bandprof_free_req,   \
-                               msg_rsp)
+                               msg_rsp)                                    \
+M(NIX_BANDPROF_GET_HWINFO, 0x801f, nix_bandprof_get_hwinfo, msg_req,           \
+                               nix_bandprof_get_hwinfo_rsp)
 
 /* Messages initiated by AF (range 0xC00 - 0xDFF) */
 #define MBOX_UP_CGX_MESSAGES                                           \
@@ -363,6 +367,25 @@ struct rsrc_detach {
        u8 cptlfs:1;
 };
 
+/* Number of resources available to the caller.
+ * In reply to MBOX_MSG_FREE_RSRC_CNT.
+ */
+struct free_rsrcs_rsp {
+       struct mbox_msghdr hdr;
+       u16 schq[NIX_TXSCH_LVL_CNT];
+       u16  sso;
+       u16  tim;
+       u16  ssow;
+       u16  cpt;
+       u8   npa;
+       u8   nix;
+       u16  schq_nix1[NIX_TXSCH_LVL_CNT];
+       u8   nix1;
+       u8   cpt1;
+       u8   ree0;
+       u8   ree1;
+};
+
 #define MSIX_VECTOR_INVALID    0xFFFF
 #define MAX_RVU_BLKLF_CNT      256
 
@@ -370,16 +393,20 @@ struct msix_offset_rsp {
        struct mbox_msghdr hdr;
        u16  npa_msixoff;
        u16  nix_msixoff;
-       u  sso;
-       u  ssow;
-       u  timlfs;
-       u  cptlfs;
+       u16  sso;
+       u16  ssow;
+       u16  timlfs;
+       u16  cptlfs;
        u16  sso_msixoff[MAX_RVU_BLKLF_CNT];
        u16  ssow_msixoff[MAX_RVU_BLKLF_CNT];
        u16  timlf_msixoff[MAX_RVU_BLKLF_CNT];
        u16  cptlf_msixoff[MAX_RVU_BLKLF_CNT];
-       u8   cpt1_lfs;
+       u16  cpt1_lfs;
+       u16  ree0_lfs;
+       u16  ree1_lfs;
        u16  cpt1_lf_msixoff[MAX_RVU_BLKLF_CNT];
+       u16  ree0_lf_msixoff[MAX_RVU_BLKLF_CNT];
+       u16  ree1_lf_msixoff[MAX_RVU_BLKLF_CNT];
 };
 
 struct get_hw_cap_rsp {
@@ -594,6 +621,7 @@ struct npa_lf_alloc_rsp {
        u32 stack_pg_ptrs;  /* No of ptrs per stack page */
        u32 stack_pg_bytes; /* Size of stack page */
        u16 qints; /* NPA_AF_CONST::QINTS */
+       u8 cache_lines; /*BATCH ALLOC DMA */
 };
 
 /* NPA AQ enqueue msg */
@@ -698,6 +726,9 @@ struct nix_lf_alloc_req {
        u16 sso_func;
        u64 rx_cfg;   /* See NIX_AF_LF(0..127)_RX_CFG */
        u64 way_mask;
+#define NIX_LF_RSS_TAG_LSB_AS_ADDER BIT_ULL(0)
+#define NIX_LF_LBK_BLK_SEL         BIT_ULL(1)
+       u64 flags;
 };
 
 struct nix_lf_alloc_rsp {
@@ -717,6 +748,7 @@ struct nix_lf_alloc_rsp {
        u8      cgx_links;  /* No. of CGX links present in HW */
        u8      lbk_links;  /* No. of LBK links present in HW */
        u8      sdp_links;  /* No. of SDP links present in HW */
+       u8      tx_link;    /* Transmit channel link number */
 };
 
 struct nix_lf_free_req {
@@ -835,6 +867,7 @@ struct nix_txsch_free_req {
 struct nix_txschq_config {
        struct mbox_msghdr hdr;
        u8 lvl; /* SMQ/MDQ/TL4/TL3/TL2/TL1 */
+       u8 read;
 #define TXSCHQ_IDX_SHIFT       16
 #define TXSCHQ_IDX_MASK                (BIT_ULL(10) - 1)
 #define TXSCHQ_IDX(reg, shift) (((reg) >> (shift)) & TXSCHQ_IDX_MASK)
@@ -842,6 +875,8 @@ struct nix_txschq_config {
 #define MAX_REGS_PER_MBOX_MSG  20
        u64 reg[MAX_REGS_PER_MBOX_MSG];
        u64 regval[MAX_REGS_PER_MBOX_MSG];
+       /* All 0's => overwrite with new value */
+       u64 regval_mask[MAX_REGS_PER_MBOX_MSG];
 };
 
 struct nix_vtag_config {
@@ -1032,8 +1067,12 @@ struct nix_bp_cfg_rsp {
 
 struct nix_hw_info {
        struct mbox_msghdr hdr;
+       u16 rsvs16;
        u16 max_mtu;
        u16 min_mtu;
+       u32 rpm_dwrr_mtu;
+       u32 sdp_dwrr_mtu;
+       u64 rsvd[16]; /* Add reserved fields for future expansion */
 };
 
 struct nix_bandprof_alloc_req {
@@ -1061,6 +1100,12 @@ struct nix_bandprof_free_req {
        u16 prof_idx[BAND_PROF_NUM_LAYERS][MAX_BANDPROF_PER_PFFUNC];
 };
 
+struct nix_bandprof_get_hwinfo_rsp {
+       struct mbox_msghdr hdr;
+       u16 prof_count[BAND_PROF_NUM_LAYERS];
+       u32 policer_timeunit;
+};
+
 /* NPC mbox message structs */
 
 #define NPC_MCAM_ENTRY_INVALID 0xFFFF
@@ -1074,6 +1119,13 @@ enum npc_af_status {
        NPC_MCAM_ALLOC_DENIED   = -702,
        NPC_MCAM_ALLOC_FAILED   = -703,
        NPC_MCAM_PERM_DENIED    = -704,
+       NPC_FLOW_INTF_INVALID   = -707,
+       NPC_FLOW_CHAN_INVALID   = -708,
+       NPC_FLOW_NO_NIXLF       = -709,
+       NPC_FLOW_NOT_SUPPORTED  = -710,
+       NPC_FLOW_VF_PERM_DENIED = -711,
+       NPC_FLOW_VF_NOT_INIT    = -712,
+       NPC_FLOW_VF_OVERLAP     = -713,
 };
 
 struct npc_mcam_alloc_entry_req {
@@ -1328,6 +1380,10 @@ struct set_vf_perm  {
 
 struct lmtst_tbl_setup_req {
        struct mbox_msghdr hdr;
+       u64 dis_sched_early_comp :1;
+       u64 sch_ena              :1;
+       u64 dis_line_pref        :1;
+       u64 ssow_pf_func         :13;
        u16 base_pcifunc;
        u8  use_local_lmt_region;
        u64 lmt_iova;
@@ -1422,4 +1478,34 @@ struct cpt_rxc_time_cfg_req {
        u16 active_limit;
 };
 
+struct sdp_node_info {
+       /* Node to which this PF belons to */
+       u8 node_id;
+       u8 max_vfs;
+       u8 num_pf_rings;
+       u8 pf_srn;
+#define SDP_MAX_VFS    128
+       u8 vf_rings[SDP_MAX_VFS];
+};
+
+struct sdp_chan_info_msg {
+       struct mbox_msghdr hdr;
+       struct sdp_node_info info;
+};
+
+struct sdp_get_chan_info_msg {
+       struct mbox_msghdr hdr;
+       u16 chan_base;
+       u16 num_chan;
+};
+
+/* CGX mailbox error codes
+ * Range 1101 - 1200.
+ */
+enum cgx_af_status {
+       LMAC_AF_ERR_INVALID_PARAM       = -1101,
+       LMAC_AF_ERR_PF_NOT_MAPPED       = -1102,
+       LMAC_AF_ERR_PERM_DENIED         = -1103,
+};
+
 #endif /* MBOX_H */
index 243cf80..3a819b2 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef NPC_H
@@ -172,6 +169,8 @@ enum key_fields {
        NPC_DMAC,
        NPC_SMAC,
        NPC_ETYPE,
+       NPC_VLAN_ETYPE_CTAG, /* 0x8100 */
+       NPC_VLAN_ETYPE_STAG, /* 0x88A8 */
        NPC_OUTER_VID,
        NPC_TOS,
        NPC_SIP_IPV4,
index fee655c..588822a 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef NPC_PROFILE_H
index 1ee3785..9b8e59f 100644 (file)
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Marvell PTP driver
  *
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
+ *
  */
 
 #include <linux/bitfield.h>
 #define PCI_SUBSYS_DEVID_OCTX2_98xx_PTP                0xB100
 #define PCI_SUBSYS_DEVID_OCTX2_96XX_PTP                0xB200
 #define PCI_SUBSYS_DEVID_OCTX2_95XX_PTP                0xB300
-#define PCI_SUBSYS_DEVID_OCTX2_LOKI_PTP                0xB400
+#define PCI_SUBSYS_DEVID_OCTX2_95XXN_PTP       0xB400
 #define PCI_SUBSYS_DEVID_OCTX2_95MM_PTP                0xB500
-#define PCI_SUBSYS_DEVID_CN10K_A_PTP           0xB900
-#define PCI_SUBSYS_DEVID_CNF10K_A_PTP          0xBA00
-#define PCI_SUBSYS_DEVID_CNF10K_B_PTP          0xBC00
+#define PCI_SUBSYS_DEVID_OCTX2_95XXO_PTP       0xB600
 #define PCI_DEVID_OCTEONTX2_RST                        0xA085
+#define PCI_DEVID_CN10K_PTP                    0xA09E
 
 #define PCI_PTP_BAR_NO                         0
 #define PCI_RST_BAR_NO                         0
@@ -39,6 +39,9 @@
 #define RST_MUL_BITS                           GENMASK_ULL(38, 33)
 #define CLOCK_BASE_RATE                                50000000ULL
 
+static struct ptp *first_ptp_block;
+static const struct pci_device_id ptp_id_table[];
+
 static u64 get_clock_rate(void)
 {
        u64 cfg, ret = CLOCK_BASE_RATE * 16;
@@ -74,23 +77,14 @@ error:
 
 struct ptp *ptp_get(void)
 {
-       struct pci_dev *pdev;
-       struct ptp *ptp;
+       struct ptp *ptp = first_ptp_block;
 
-       /* If the PTP pci device is found on the system and ptp
-        * driver is bound to it then the PTP pci device is returned
-        * to the caller(rvu driver).
-        */
-       pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
-                             PCI_DEVID_OCTEONTX2_PTP, NULL);
-       if (!pdev)
+       /* Check PTP block is present in hardware */
+       if (!pci_dev_present(ptp_id_table))
                return ERR_PTR(-ENODEV);
-
-       ptp = pci_get_drvdata(pdev);
+       /* Check driver is bound to PTP block */
        if (!ptp)
                ptp = ERR_PTR(-EPROBE_DEFER);
-       if (IS_ERR(ptp))
-               pci_dev_put(pdev);
 
        return ptp;
 }
@@ -190,6 +184,8 @@ static int ptp_probe(struct pci_dev *pdev,
        writeq(clock_comp, ptp->reg_base + PTP_CLOCK_COMP);
 
        pci_set_drvdata(pdev, ptp);
+       if (!first_ptp_block)
+               first_ptp_block = ptp;
 
        return 0;
 
@@ -204,6 +200,9 @@ error:
         * `dev->driver_data`.
         */
        pci_set_drvdata(pdev, ERR_PTR(err));
+       if (!first_ptp_block)
+               first_ptp_block = ERR_PTR(err);
+
        return 0;
 }
 
@@ -233,19 +232,14 @@ static const struct pci_device_id ptp_id_table[] = {
                         PCI_SUBSYS_DEVID_OCTX2_95XX_PTP) },
        { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
                         PCI_VENDOR_ID_CAVIUM,
-                        PCI_SUBSYS_DEVID_OCTX2_LOKI_PTP) },
+                        PCI_SUBSYS_DEVID_OCTX2_95XXN_PTP) },
        { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
                         PCI_VENDOR_ID_CAVIUM,
                         PCI_SUBSYS_DEVID_OCTX2_95MM_PTP) },
        { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
                         PCI_VENDOR_ID_CAVIUM,
-                        PCI_SUBSYS_DEVID_CN10K_A_PTP) },
-       { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
-                        PCI_VENDOR_ID_CAVIUM,
-                        PCI_SUBSYS_DEVID_CNF10K_A_PTP) },
-       { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_PTP,
-                        PCI_VENDOR_ID_CAVIUM,
-                        PCI_SUBSYS_DEVID_CNF10K_B_PTP) },
+                        PCI_SUBSYS_DEVID_OCTX2_95XXO_PTP) },
+       { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_PTP) },
        { 0, }
 };
 
index 878bc39..76d404b 100644 (file)
@@ -1,7 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Marvell PTP driver
  *
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
+ *
  */
 
 #ifndef PTP_H
index a91ccdc..07b0eaf 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*  Marvell OcteonTx2 RPM driver
+/* Marvell CN10K RPM driver
  *
  * Copyright (C) 2020 Marvell.
  *
index d32e74b..f0b0694 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 RPM driver
+/* Marvell CN10K RPM driver
  *
  * Copyright (C) 2020 Marvell.
  *
index 5fe277e..ce647e0 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -70,18 +67,21 @@ static void rvu_setup_hw_capabilities(struct rvu *rvu)
        hw->cap.nix_shaping = true;
        hw->cap.nix_tx_link_bp = true;
        hw->cap.nix_rx_multicast = true;
+       hw->cap.nix_shaper_toggle_wait = false;
        hw->rvu = rvu;
 
-       if (is_rvu_96xx_B0(rvu)) {
+       if (is_rvu_pre_96xx_C0(rvu)) {
                hw->cap.nix_fixed_txschq_mapping = true;
                hw->cap.nix_txsch_per_cgx_lmac = 4;
                hw->cap.nix_txsch_per_lbk_lmac = 132;
                hw->cap.nix_txsch_per_sdp_lmac = 76;
                hw->cap.nix_shaping = false;
                hw->cap.nix_tx_link_bp = false;
-               if (is_rvu_96xx_A0(rvu))
+               if (is_rvu_96xx_A0(rvu) || is_rvu_95xx_A0(rvu))
                        hw->cap.nix_rx_multicast = false;
        }
+       if (!is_rvu_pre_96xx_C0(rvu))
+               hw->cap.nix_shaper_toggle_wait = true;
 
        if (!is_rvu_otx2(rvu))
                hw->cap.per_pf_mbox_regs = true;
@@ -498,12 +498,15 @@ int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf)
 static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg)
 {
        struct rvu_block *block = &rvu->hw->block[blkaddr];
+       int err;
 
        if (!block->implemented)
                return;
 
        rvu_write64(rvu, blkaddr, rst_reg, BIT_ULL(0));
-       rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true);
+       err = rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true);
+       if (err)
+               dev_err(rvu->dev, "HW block:%d reset failed\n", blkaddr);
 }
 
 static void rvu_reset_all_blocks(struct rvu *rvu)
@@ -924,16 +927,26 @@ static int rvu_setup_hw_resources(struct rvu *rvu)
        block->lfreset_reg = NPA_AF_LF_RST;
        sprintf(block->name, "NPA");
        err = rvu_alloc_bitmap(&block->lf);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev,
+                       "%s: Failed to allocate NPA LF bitmap\n", __func__);
                return err;
+       }
 
 nix:
        err = rvu_setup_nix_hw_resource(rvu, BLKADDR_NIX0);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev,
+                       "%s: Failed to allocate NIX0 LFs bitmap\n", __func__);
                return err;
+       }
+
        err = rvu_setup_nix_hw_resource(rvu, BLKADDR_NIX1);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev,
+                       "%s: Failed to allocate NIX1 LFs bitmap\n", __func__);
                return err;
+       }
 
        /* Init SSO group's bitmap */
        block = &hw->block[BLKADDR_SSO];
@@ -953,8 +966,11 @@ nix:
        block->lfreset_reg = SSO_AF_LF_HWGRP_RST;
        sprintf(block->name, "SSO GROUP");
        err = rvu_alloc_bitmap(&block->lf);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev,
+                       "%s: Failed to allocate SSO LF bitmap\n", __func__);
                return err;
+       }
 
 ssow:
        /* Init SSO workslot's bitmap */
@@ -974,8 +990,11 @@ ssow:
        block->lfreset_reg = SSOW_AF_LF_HWS_RST;
        sprintf(block->name, "SSOWS");
        err = rvu_alloc_bitmap(&block->lf);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev,
+                       "%s: Failed to allocate SSOW LF bitmap\n", __func__);
                return err;
+       }
 
 tim:
        /* Init TIM LF's bitmap */
@@ -996,35 +1015,53 @@ tim:
        block->lfreset_reg = TIM_AF_LF_RST;
        sprintf(block->name, "TIM");
        err = rvu_alloc_bitmap(&block->lf);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev,
+                       "%s: Failed to allocate TIM LF bitmap\n", __func__);
                return err;
+       }
 
 cpt:
        err = rvu_setup_cpt_hw_resource(rvu, BLKADDR_CPT0);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev,
+                       "%s: Failed to allocate CPT0 LF bitmap\n", __func__);
                return err;
+       }
        err = rvu_setup_cpt_hw_resource(rvu, BLKADDR_CPT1);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev,
+                       "%s: Failed to allocate CPT1 LF bitmap\n", __func__);
                return err;
+       }
 
        /* Allocate memory for PFVF data */
        rvu->pf = devm_kcalloc(rvu->dev, hw->total_pfs,
                               sizeof(struct rvu_pfvf), GFP_KERNEL);
-       if (!rvu->pf)
+       if (!rvu->pf) {
+               dev_err(rvu->dev,
+                       "%s: Failed to allocate memory for PF's rvu_pfvf struct\n", __func__);
                return -ENOMEM;
+       }
 
        rvu->hwvf = devm_kcalloc(rvu->dev, hw->total_vfs,
                                 sizeof(struct rvu_pfvf), GFP_KERNEL);
-       if (!rvu->hwvf)
+       if (!rvu->hwvf) {
+               dev_err(rvu->dev,
+                       "%s: Failed to allocate memory for VF's rvu_pfvf struct\n", __func__);
                return -ENOMEM;
+       }
 
        mutex_init(&rvu->rsrc_lock);
 
        rvu_fwdata_init(rvu);
 
        err = rvu_setup_msix_resources(rvu);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev,
+                       "%s: Failed to setup MSIX resources\n", __func__);
                return err;
+       }
 
        for (blkid = 0; blkid < BLK_COUNT; blkid++) {
                block = &hw->block[blkid];
@@ -1050,25 +1087,39 @@ cpt:
                goto msix_err;
 
        err = rvu_npc_init(rvu);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev, "%s: Failed to initialize npc\n", __func__);
                goto npc_err;
+       }
 
        err = rvu_cgx_init(rvu);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev, "%s: Failed to initialize cgx\n", __func__);
                goto cgx_err;
+       }
 
        /* Assign MACs for CGX mapped functions */
        rvu_setup_pfvf_macaddress(rvu);
 
        err = rvu_npa_init(rvu);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev, "%s: Failed to initialize npa\n", __func__);
                goto npa_err;
+       }
 
        rvu_get_lbk_bufsize(rvu);
 
        err = rvu_nix_init(rvu);
-       if (err)
+       if (err) {
+               dev_err(rvu->dev, "%s: Failed to initialize nix\n", __func__);
                goto nix_err;
+       }
+
+       err = rvu_sdp_init(rvu);
+       if (err) {
+               dev_err(rvu->dev, "%s: Failed to initialize sdp\n", __func__);
+               goto nix_err;
+       }
 
        rvu_program_channels(rvu);
 
@@ -1322,9 +1373,10 @@ int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc)
        int blkaddr = BLKADDR_NIX0, vf;
        struct rvu_pfvf *pf;
 
+       pf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK);
+
        /* All CGX mapped PFs are set with assigned NIX block during init */
        if (is_pf_cgxmapped(rvu, rvu_get_pf(pcifunc))) {
-               pf = rvu_get_pfvf(rvu, pcifunc & ~RVU_PFVF_FUNC_MASK);
                blkaddr = pf->nix_blkaddr;
        } else if (is_afvf(pcifunc)) {
                vf = pcifunc - 1;
@@ -1337,6 +1389,10 @@ int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc)
                        blkaddr = BLKADDR_NIX0;
        }
 
+       /* if SDP1 then the blkaddr is NIX1 */
+       if (is_sdp_pfvf(pcifunc) && pf->sdp_info->node_id == 1)
+               blkaddr = BLKADDR_NIX1;
+
        switch (blkaddr) {
        case BLKADDR_NIX1:
                pfvf->nix_blkaddr = BLKADDR_NIX1;
@@ -1737,6 +1793,99 @@ int rvu_mbox_handler_msix_offset(struct rvu *rvu, struct msg_req *req,
        return 0;
 }
 
+int rvu_mbox_handler_free_rsrc_cnt(struct rvu *rvu, struct msg_req *req,
+                                  struct free_rsrcs_rsp *rsp)
+{
+       struct rvu_hwinfo *hw = rvu->hw;
+       struct rvu_block *block;
+       struct nix_txsch *txsch;
+       struct nix_hw *nix_hw;
+
+       mutex_lock(&rvu->rsrc_lock);
+
+       block = &hw->block[BLKADDR_NPA];
+       rsp->npa = rvu_rsrc_free_count(&block->lf);
+
+       block = &hw->block[BLKADDR_NIX0];
+       rsp->nix = rvu_rsrc_free_count(&block->lf);
+
+       block = &hw->block[BLKADDR_NIX1];
+       rsp->nix1 = rvu_rsrc_free_count(&block->lf);
+
+       block = &hw->block[BLKADDR_SSO];
+       rsp->sso = rvu_rsrc_free_count(&block->lf);
+
+       block = &hw->block[BLKADDR_SSOW];
+       rsp->ssow = rvu_rsrc_free_count(&block->lf);
+
+       block = &hw->block[BLKADDR_TIM];
+       rsp->tim = rvu_rsrc_free_count(&block->lf);
+
+       block = &hw->block[BLKADDR_CPT0];
+       rsp->cpt = rvu_rsrc_free_count(&block->lf);
+
+       block = &hw->block[BLKADDR_CPT1];
+       rsp->cpt1 = rvu_rsrc_free_count(&block->lf);
+
+       if (rvu->hw->cap.nix_fixed_txschq_mapping) {
+               rsp->schq[NIX_TXSCH_LVL_SMQ] = 1;
+               rsp->schq[NIX_TXSCH_LVL_TL4] = 1;
+               rsp->schq[NIX_TXSCH_LVL_TL3] = 1;
+               rsp->schq[NIX_TXSCH_LVL_TL2] = 1;
+               /* NIX1 */
+               if (!is_block_implemented(rvu->hw, BLKADDR_NIX1))
+                       goto out;
+               rsp->schq_nix1[NIX_TXSCH_LVL_SMQ] = 1;
+               rsp->schq_nix1[NIX_TXSCH_LVL_TL4] = 1;
+               rsp->schq_nix1[NIX_TXSCH_LVL_TL3] = 1;
+               rsp->schq_nix1[NIX_TXSCH_LVL_TL2] = 1;
+       } else {
+               nix_hw = get_nix_hw(hw, BLKADDR_NIX0);
+               txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+               rsp->schq[NIX_TXSCH_LVL_SMQ] =
+                               rvu_rsrc_free_count(&txsch->schq);
+
+               txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL4];
+               rsp->schq[NIX_TXSCH_LVL_TL4] =
+                               rvu_rsrc_free_count(&txsch->schq);
+
+               txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL3];
+               rsp->schq[NIX_TXSCH_LVL_TL3] =
+                               rvu_rsrc_free_count(&txsch->schq);
+
+               txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2];
+               rsp->schq[NIX_TXSCH_LVL_TL2] =
+                               rvu_rsrc_free_count(&txsch->schq);
+
+               if (!is_block_implemented(rvu->hw, BLKADDR_NIX1))
+                       goto out;
+
+               nix_hw = get_nix_hw(hw, BLKADDR_NIX1);
+               txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+               rsp->schq_nix1[NIX_TXSCH_LVL_SMQ] =
+                               rvu_rsrc_free_count(&txsch->schq);
+
+               txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL4];
+               rsp->schq_nix1[NIX_TXSCH_LVL_TL4] =
+                               rvu_rsrc_free_count(&txsch->schq);
+
+               txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL3];
+               rsp->schq_nix1[NIX_TXSCH_LVL_TL3] =
+                               rvu_rsrc_free_count(&txsch->schq);
+
+               txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2];
+               rsp->schq_nix1[NIX_TXSCH_LVL_TL2] =
+                               rvu_rsrc_free_count(&txsch->schq);
+       }
+
+       rsp->schq_nix1[NIX_TXSCH_LVL_TL1] = 1;
+out:
+       rsp->schq[NIX_TXSCH_LVL_TL1] = 1;
+       mutex_unlock(&rvu->rsrc_lock);
+
+       return 0;
+}
+
 int rvu_mbox_handler_vf_flr(struct rvu *rvu, struct msg_req *req,
                            struct msg_rsp *rsp)
 {
@@ -2402,11 +2551,12 @@ static void rvu_afvf_queue_flr_work(struct rvu *rvu, int start_vf, int numvfs)
        for (vf = 0; vf < numvfs; vf++) {
                if (!(intr & BIT_ULL(vf)))
                        continue;
-               dev = vf + start_vf + rvu->hw->total_pfs;
-               queue_work(rvu->flr_wq, &rvu->flr_wrk[dev].work);
                /* Clear and disable the interrupt */
                rvupf_write64(rvu, RVU_PF_VFFLR_INTX(reg), BIT_ULL(vf));
                rvupf_write64(rvu, RVU_PF_VFFLR_INT_ENA_W1CX(reg), BIT_ULL(vf));
+
+               dev = vf + start_vf + rvu->hw->total_pfs;
+               queue_work(rvu->flr_wq, &rvu->flr_wrk[dev].work);
        }
 }
 
@@ -2422,14 +2572,14 @@ static irqreturn_t rvu_flr_intr_handler(int irq, void *rvu_irq)
 
        for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
                if (intr & (1ULL << pf)) {
-                       /* PF is already dead do only AF related operations */
-                       queue_work(rvu->flr_wq, &rvu->flr_wrk[pf].work);
                        /* clear interrupt */
                        rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT,
                                    BIT_ULL(pf));
                        /* Disable the interrupt */
                        rvu_write64(rvu, BLKADDR_RVUM, RVU_AF_PFFLR_INT_ENA_W1C,
                                    BIT_ULL(pf));
+                       /* PF is already dead do only AF related operations */
+                       queue_work(rvu->flr_wq, &rvu->flr_wrk[pf].work);
                }
        }
 
@@ -2984,27 +3134,37 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        err = rvu_mbox_init(rvu, &rvu->afpf_wq_info, TYPE_AFPF,
                            rvu->hw->total_pfs, rvu_afpf_mbox_handler,
                            rvu_afpf_mbox_up_handler);
-       if (err)
+       if (err) {
+               dev_err(dev, "%s: Failed to initialize mbox\n", __func__);
                goto err_hwsetup;
+       }
 
        err = rvu_flr_init(rvu);
-       if (err)
+       if (err) {
+               dev_err(dev, "%s: Failed to initialize flr\n", __func__);
                goto err_mbox;
+       }
 
        err = rvu_register_interrupts(rvu);
-       if (err)
+       if (err) {
+               dev_err(dev, "%s: Failed to register interrupts\n", __func__);
                goto err_flr;
+       }
 
        err = rvu_register_dl(rvu);
-       if (err)
+       if (err) {
+               dev_err(dev, "%s: Failed to register devlink\n", __func__);
                goto err_irq;
+       }
 
        rvu_setup_rvum_blk_revid(rvu);
 
        /* Enable AF's VFs (if any) */
        err = rvu_enable_sriov(rvu);
-       if (err)
+       if (err) {
+               dev_err(dev, "%s: Failed to enable sriov\n", __func__);
                goto err_dl;
+       }
 
        /* Initialize debugfs */
        rvu_dbg_init(rvu);
index 91503fb..d38e5c9 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef RVU_H
@@ -243,8 +240,11 @@ struct rvu_pfvf {
        u8      nix_blkaddr; /* BLKADDR_NIX0/1 assigned to this PF */
        u8      nix_rx_intf; /* NIX0_RX/NIX1_RX interface to NPC */
        u8      nix_tx_intf; /* NIX0_TX/NIX1_TX interface to NPC */
+       u8      lbkid;       /* NIX0/1 lbk link ID */
        u64     lmt_base_addr; /* Preseving the pcifunc's lmtst base addr*/
+       u64     lmt_map_ent_w1; /* Preseving the word1 of lmtst map table entry*/
        unsigned long flags;
+       struct  sdp_node_info *sdp_info;
 };
 
 enum rvu_pfvf_flags {
@@ -314,6 +314,7 @@ struct nix_hw {
        struct nix_lso lso;
        struct nix_txvlan txvlan;
        struct nix_ipolicer *ipolicer;
+       u64    *tx_credits;
 };
 
 /* RVU block's capabilities or functionality,
@@ -327,8 +328,10 @@ struct hw_cap {
        u16     nix_txsch_per_sdp_lmac; /* Max Q's transmitting to SDP LMAC */
        bool    nix_fixed_txschq_mapping; /* Schq mapping fixed or flexible */
        bool    nix_shaping;             /* Is shaping and coloring supported */
+       bool    nix_shaper_toggle_wait; /* Shaping toggle needs poll/wait */
        bool    nix_tx_link_bp;          /* Can link backpressure TL queues ? */
        bool    nix_rx_multicast;        /* Rx packet replication support */
+       bool    nix_common_dwrr_mtu;     /* Common DWRR MTU for quantum config */
        bool    per_pf_mbox_regs; /* PF mbox specified in per PF registers ? */
        bool    programmable_chans; /* Channels programmable ? */
        bool    ipolicer;
@@ -355,6 +358,7 @@ struct rvu_hwinfo {
        u16     npc_counters;      /* No of match stats counters */
        u32     lbk_bufsize;       /* FIFO size supported by LBK */
        bool    npc_ext_set;       /* Extended register set */
+       u64     npc_stat_ena;      /* Match stats enable bit */
 
        struct hw_cap    cap;
        struct rvu_block block[BLK_COUNT]; /* Block info */
@@ -514,20 +518,34 @@ static inline u64 rvupf_read64(struct rvu *rvu, u64 offset)
 }
 
 /* Silicon revisions */
+static inline bool is_rvu_pre_96xx_C0(struct rvu *rvu)
+{
+       struct pci_dev *pdev = rvu->pdev;
+       /* 96XX A0/B0, 95XX A0/A1/B0 chips */
+       return ((pdev->revision == 0x00) || (pdev->revision == 0x01) ||
+               (pdev->revision == 0x10) || (pdev->revision == 0x11) ||
+               (pdev->revision == 0x14));
+}
+
 static inline bool is_rvu_96xx_A0(struct rvu *rvu)
 {
        struct pci_dev *pdev = rvu->pdev;
 
-       return (pdev->revision == 0x00) &&
-               (pdev->subsystem_device == PCI_SUBSYS_DEVID_96XX);
+       return (pdev->revision == 0x00);
 }
 
 static inline bool is_rvu_96xx_B0(struct rvu *rvu)
 {
        struct pci_dev *pdev = rvu->pdev;
 
-       return ((pdev->revision == 0x00) || (pdev->revision == 0x01)) &&
-               (pdev->subsystem_device == PCI_SUBSYS_DEVID_96XX);
+       return (pdev->revision == 0x00) || (pdev->revision == 0x01);
+}
+
+static inline bool is_rvu_95xx_A0(struct rvu *rvu)
+{
+       struct pci_dev *pdev = rvu->pdev;
+
+       return (pdev->revision == 0x10) || (pdev->revision == 0x11);
 }
 
 /* REVID for PCIe devices.
@@ -536,9 +554,10 @@ static inline bool is_rvu_96xx_B0(struct rvu *rvu)
  */
 #define PCI_REVISION_ID_96XX           0x00
 #define PCI_REVISION_ID_95XX           0x10
-#define PCI_REVISION_ID_LOKI           0x20
+#define PCI_REVISION_ID_95XXN          0x20
 #define PCI_REVISION_ID_98XX           0x30
 #define PCI_REVISION_ID_95XXMM         0x40
+#define PCI_REVISION_ID_95XXO          0xE0
 
 static inline bool is_rvu_otx2(struct rvu *rvu)
 {
@@ -547,8 +566,8 @@ static inline bool is_rvu_otx2(struct rvu *rvu)
        u8 midr = pdev->revision & 0xF0;
 
        return (midr == PCI_REVISION_ID_96XX || midr == PCI_REVISION_ID_95XX ||
-               midr == PCI_REVISION_ID_LOKI || midr == PCI_REVISION_ID_98XX ||
-               midr == PCI_REVISION_ID_95XXMM);
+               midr == PCI_REVISION_ID_95XXN || midr == PCI_REVISION_ID_98XX ||
+               midr == PCI_REVISION_ID_95XXMM || midr == PCI_REVISION_ID_95XXO);
 }
 
 static inline u16 rvu_nix_chan_cgx(struct rvu *rvu, u8 cgxid,
@@ -578,6 +597,16 @@ static inline u16 rvu_nix_chan_lbk(struct rvu *rvu, u8 lbkid,
        return rvu->hw->lbk_chan_base + lbkid * lbk_chans + chan;
 }
 
+static inline u16 rvu_nix_chan_sdp(struct rvu *rvu, u8 chan)
+{
+       struct rvu_hwinfo *hw = rvu->hw;
+
+       if (!hw->cap.programmable_chans)
+               return NIX_CHAN_SDP_CHX(chan);
+
+       return hw->sdp_chan_base + chan;
+}
+
 static inline u16 rvu_nix_chan_cpt(struct rvu *rvu, u8 chan)
 {
        return rvu->hw->cpt_chan_base + chan;
@@ -640,10 +669,17 @@ int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue,
                 int qsize, int inst_size, int res_size);
 void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq);
 
+/* SDP APIs */
+int rvu_sdp_init(struct rvu *rvu);
+bool is_sdp_pfvf(u16 pcifunc);
+bool is_sdp_pf(u16 pcifunc);
+bool is_sdp_vf(u16 pcifunc);
+
 /* CGX APIs */
 static inline bool is_pf_cgxmapped(struct rvu *rvu, u8 pf)
 {
-       return (pf >= PF_CGXMAP_BASE && pf <= rvu->cgx_mapped_pfs);
+       return (pf >= PF_CGXMAP_BASE && pf <= rvu->cgx_mapped_pfs) &&
+               !is_sdp_pf(pf << RVU_PFVF_PF_SHIFT);
 }
 
 static inline void rvu_get_cgx_lmac_id(u8 map, u8 *cgx_id, u8 *lmac_id)
@@ -706,6 +742,8 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw,
                        struct nix_cn10k_aq_enq_rsp *aq_rsp,
                        u16 pcifunc, u8 ctype, u32 qidx);
 int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc);
+u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu);
+u32 convert_bytes_to_dwrr_mtu(u32 bytes);
 
 /* NPC APIs */
 int rvu_npc_init(struct rvu *rvu);
@@ -745,7 +783,6 @@ bool is_npc_intf_tx(u8 intf);
 bool is_npc_intf_rx(u8 intf);
 bool is_npc_interface_valid(struct rvu *rvu, u8 intf);
 int rvu_npc_get_tx_nibble_cfg(struct rvu *rvu, u64 nibble_ena);
-int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel);
 int npc_flow_steering_init(struct rvu *rvu, int blkaddr);
 const char *npc_get_field_name(u8 hdr);
 int npc_get_bank(struct npc_mcam *mcam, int index);
index fe99ac4..81e8ea9 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/types.h>
@@ -448,7 +445,7 @@ int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start)
        u8 cgx_id, lmac_id;
 
        if (!is_cgx_config_permitted(rvu, pcifunc))
-               return -EPERM;
+               return LMAC_AF_ERR_PERM_DENIED;
 
        rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
 
@@ -507,7 +504,7 @@ static int rvu_lmac_get_stats(struct rvu *rvu, struct msg_req *req,
        void *cgxd;
 
        if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc))
-               return -ENODEV;
+               return LMAC_AF_ERR_PERM_DENIED;
 
        rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac);
        cgxd = rvu_cgx_pdata(cgx_idx, rvu);
@@ -561,7 +558,7 @@ int rvu_mbox_handler_cgx_fec_stats(struct rvu *rvu,
        void *cgxd;
 
        if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc))
-               return -EPERM;
+               return LMAC_AF_ERR_PERM_DENIED;
        rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac);
 
        cgxd = rvu_cgx_pdata(cgx_idx, rvu);
@@ -888,7 +885,7 @@ int rvu_mbox_handler_cgx_get_phy_fec_stats(struct rvu *rvu, struct msg_req *req,
        u8 cgx_id, lmac_id;
 
        if (!is_pf_cgxmapped(rvu, pf))
-               return -EPERM;
+               return LMAC_AF_ERR_PF_NOT_MAPPED;
 
        rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
        return cgx_get_phy_fec_stats(rvu_cgx_pdata(cgx_id, rvu), lmac_id);
@@ -1046,7 +1043,7 @@ int rvu_mbox_handler_cgx_mac_addr_reset(struct rvu *rvu, struct msg_req *req,
        u8 cgx_id, lmac_id;
 
        if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc))
-               return -EPERM;
+               return LMAC_AF_ERR_PERM_DENIED;
 
        rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
        return cgx_lmac_addr_reset(cgx_id, lmac_id);
@@ -1060,7 +1057,7 @@ int rvu_mbox_handler_cgx_mac_addr_update(struct rvu *rvu,
        u8 cgx_id, lmac_id;
 
        if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc))
-               return -EPERM;
+               return LMAC_AF_ERR_PERM_DENIED;
 
        rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
        return cgx_lmac_addr_update(cgx_id, lmac_id, req->mac_addr, req->index);
index 8d48b64..46a41cf 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/*  Marvell RPM CN10K driver
+/* Marvell RPM CN10K driver
  *
  * Copyright (C) 2020 Marvell.
  */
@@ -49,6 +49,7 @@ static int lmtst_map_table_ops(struct rvu *rvu, u32 index, u64 *val,
        return 0;
 }
 
+#define LMT_MAP_TBL_W1_OFF  8
 static u32 rvu_get_lmtst_tbl_index(struct rvu *rvu, u16 pcifunc)
 {
        return ((rvu_get_pf(pcifunc) * rvu->hw->total_vfs) +
@@ -82,10 +83,10 @@ static int rvu_get_lmtaddr(struct rvu *rvu, u16 pcifunc,
                dev_err(rvu->dev, "%s LMTLINE iova transulation failed err:%llx\n", __func__, val);
                return -EIO;
        }
-       /* PA[51:12] = RVU_AF_SMMU_TLN_FLIT1[60:21]
+       /* PA[51:12] = RVU_AF_SMMU_TLN_FLIT0[57:18]
         * PA[11:0] = IOVA[11:0]
         */
-       pa = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TLN_FLIT1) >> 21;
+       pa = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_SMMU_TLN_FLIT0) >> 18;
        pa &= GENMASK_ULL(39, 0);
        *lmt_addr = (pa << 12) | (iova  & 0xFFF);
 
@@ -131,9 +132,11 @@ int rvu_mbox_handler_lmtst_tbl_setup(struct rvu *rvu,
                                     struct lmtst_tbl_setup_req *req,
                                     struct msg_rsp *rsp)
 {
-       u64 lmt_addr, val;
-       u32 pri_tbl_idx;
+       struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+       u32 pri_tbl_idx, tbl_idx;
+       u64 lmt_addr;
        int err = 0;
+       u64 val;
 
        /* Check if PF_FUNC wants to use it's own local memory as LMTLINE
         * region, if so, convert that IOVA to physical address and
@@ -170,7 +173,7 @@ int rvu_mbox_handler_lmtst_tbl_setup(struct rvu *rvu,
                        dev_err(rvu->dev,
                                "Failed to read LMT map table: index 0x%x err %d\n",
                                pri_tbl_idx, err);
-                       return err;
+                       goto error;
                }
 
                /* Update the base lmt addr of secondary with primary's base
@@ -181,7 +184,53 @@ int rvu_mbox_handler_lmtst_tbl_setup(struct rvu *rvu,
                        return err;
        }
 
-       return 0;
+       /* This mailbox can also be used to update word1 of APR_LMT_MAP_ENTRY_S
+        * like enabling scheduled LMTST, disable LMTLINE prefetch, disable
+        * early completion for ordered LMTST.
+        */
+       if (req->sch_ena || req->dis_sched_early_comp || req->dis_line_pref) {
+               tbl_idx = rvu_get_lmtst_tbl_index(rvu, req->hdr.pcifunc);
+               err = lmtst_map_table_ops(rvu, tbl_idx + LMT_MAP_TBL_W1_OFF,
+                                         &val, LMT_TBL_OP_READ);
+               if (err) {
+                       dev_err(rvu->dev,
+                               "Failed to read LMT map table: index 0x%x err %d\n",
+                               tbl_idx + LMT_MAP_TBL_W1_OFF, err);
+                       goto error;
+               }
+
+               /* Storing lmt map table entry word1 default value as this needs
+                * to be reverted in FLR. Also making sure this default value
+                * doesn't get overwritten on multiple calls to this mailbox.
+                */
+               if (!pfvf->lmt_map_ent_w1)
+                       pfvf->lmt_map_ent_w1 = val;
+
+               /* Disable early completion for Ordered LMTSTs. */
+               if (req->dis_sched_early_comp)
+                       val |= (req->dis_sched_early_comp <<
+                               APR_LMT_MAP_ENT_DIS_SCH_CMP_SHIFT);
+               /* Enable scheduled LMTST */
+               if (req->sch_ena)
+                       val |= (req->sch_ena << APR_LMT_MAP_ENT_SCH_ENA_SHIFT) |
+                               req->ssow_pf_func;
+               /* Disables LMTLINE prefetch before receiving store data. */
+               if (req->dis_line_pref)
+                       val |= (req->dis_line_pref <<
+                               APR_LMT_MAP_ENT_DIS_LINE_PREF_SHIFT);
+
+               err = lmtst_map_table_ops(rvu, tbl_idx + LMT_MAP_TBL_W1_OFF,
+                                         &val, LMT_TBL_OP_WRITE);
+               if (err) {
+                       dev_err(rvu->dev,
+                               "Failed to update LMT map table: index 0x%x err %d\n",
+                               tbl_idx + LMT_MAP_TBL_W1_OFF, err);
+                       goto error;
+               }
+       }
+
+error:
+       return err;
 }
 
 /* Resetting the lmtst map table to original base addresses */
@@ -194,27 +243,45 @@ void rvu_reset_lmt_map_tbl(struct rvu *rvu, u16 pcifunc)
        if (is_rvu_otx2(rvu))
                return;
 
-       if (pfvf->lmt_base_addr) {
+       if (pfvf->lmt_base_addr || pfvf->lmt_map_ent_w1) {
                /* This corresponds to lmt map table index */
                tbl_idx = rvu_get_lmtst_tbl_index(rvu, pcifunc);
                /* Reverting back original lmt base addr for respective
                 * pcifunc.
                 */
-               err = lmtst_map_table_ops(rvu, tbl_idx, &pfvf->lmt_base_addr,
-                                         LMT_TBL_OP_WRITE);
-               if (err)
-                       dev_err(rvu->dev,
-                               "Failed to update LMT map table: index 0x%x err %d\n",
-                               tbl_idx, err);
-               pfvf->lmt_base_addr = 0;
+               if (pfvf->lmt_base_addr) {
+                       err = lmtst_map_table_ops(rvu, tbl_idx,
+                                                 &pfvf->lmt_base_addr,
+                                                 LMT_TBL_OP_WRITE);
+                       if (err)
+                               dev_err(rvu->dev,
+                                       "Failed to update LMT map table: index 0x%x err %d\n",
+                                       tbl_idx, err);
+                       pfvf->lmt_base_addr = 0;
+               }
+               /* Reverting back to orginal word1 val of lmtst map table entry
+                * which underwent changes.
+                */
+               if (pfvf->lmt_map_ent_w1) {
+                       err = lmtst_map_table_ops(rvu,
+                                                 tbl_idx + LMT_MAP_TBL_W1_OFF,
+                                                 &pfvf->lmt_map_ent_w1,
+                                                 LMT_TBL_OP_WRITE);
+                       if (err)
+                               dev_err(rvu->dev,
+                                       "Failed to update LMT map table: index 0x%x err %d\n",
+                                       tbl_idx + LMT_MAP_TBL_W1_OFF, err);
+                       pfvf->lmt_map_ent_w1 = 0;
+               }
        }
 }
 
 int rvu_set_channels_base(struct rvu *rvu)
 {
+       u16 nr_lbk_chans, nr_sdp_chans, nr_cgx_chans, nr_cpt_chans;
+       u16 sdp_chan_base, cgx_chan_base, cpt_chan_base;
        struct rvu_hwinfo *hw = rvu->hw;
-       u16 cpt_chan_base;
-       u64 nix_const;
+       u64 nix_const, nix_const1;
        int blkaddr;
 
        blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
@@ -222,6 +289,7 @@ int rvu_set_channels_base(struct rvu *rvu)
                return blkaddr;
 
        nix_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
+       nix_const1 = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
 
        hw->cgx = (nix_const >> 12) & 0xFULL;
        hw->lmac_per_cgx = (nix_const >> 8) & 0xFULL;
@@ -244,14 +312,24 @@ int rvu_set_channels_base(struct rvu *rvu)
         * channels such that all channel numbers are contiguous
         * leaving no holes. This way the new CPT channels can be
         * accomodated. The order of channel numbers assigned is
-        * LBK, SDP, CGX and CPT.
+        * LBK, SDP, CGX and CPT. Also the base channel number
+        * of a block must be multiple of number of channels
+        * of the block.
         */
-       hw->sdp_chan_base = hw->lbk_chan_base + hw->lbk_links *
-                               ((nix_const >> 16) & 0xFFULL);
-       hw->cgx_chan_base = hw->sdp_chan_base + hw->sdp_links * SDP_CHANNELS;
+       nr_lbk_chans = (nix_const >> 16) & 0xFFULL;
+       nr_sdp_chans = nix_const1 & 0xFFFULL;
+       nr_cgx_chans = nix_const & 0xFFULL;
+       nr_cpt_chans = (nix_const >> 32) & 0xFFFULL;
+
+       sdp_chan_base = hw->lbk_chan_base + hw->lbk_links * nr_lbk_chans;
+       /* Round up base channel to multiple of number of channels */
+       hw->sdp_chan_base = ALIGN(sdp_chan_base, nr_sdp_chans);
+
+       cgx_chan_base = hw->sdp_chan_base + hw->sdp_links * nr_sdp_chans;
+       hw->cgx_chan_base = ALIGN(cgx_chan_base, nr_cgx_chans);
 
-       cpt_chan_base = hw->cgx_chan_base + hw->cgx_links *
-                               (nix_const & 0xFFULL);
+       cpt_chan_base = hw->cgx_chan_base + hw->cgx_links * nr_cgx_chans;
+       hw->cpt_chan_base = ALIGN(cpt_chan_base, nr_cpt_chans);
 
        /* Out of 4096 channels start CPT from 2048 so
         * that MSB for CPT channels is always set
@@ -355,6 +433,7 @@ err_put:
 
 static void __rvu_nix_set_channels(struct rvu *rvu, int blkaddr)
 {
+       u64 nix_const1 = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
        u64 nix_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
        u16 cgx_chans, lbk_chans, sdp_chans, cpt_chans;
        struct rvu_hwinfo *hw = rvu->hw;
@@ -364,7 +443,7 @@ static void __rvu_nix_set_channels(struct rvu *rvu, int blkaddr)
 
        cgx_chans = nix_const & 0xFFULL;
        lbk_chans = (nix_const >> 16) & 0xFFULL;
-       sdp_chans = SDP_CHANNELS;
+       sdp_chans = nix_const1 & 0xFFFULL;
        cpt_chans = (nix_const >> 32) & 0xFFFULL;
 
        start = hw->cgx_chan_base;
index 89253f7..1f90a74 100644 (file)
@@ -1,5 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2020 Marvell. */
+/* Marvell RVU Admin Function driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
 
 #include <linux/bitfield.h>
 #include <linux/pci.h>
index 9b2dfbf..9338765 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2019 Marvell International Ltd.
+ * Copyright (C) 2019 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifdef CONFIG_DEBUG_FS
index 2688186..274d3ab 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Devlink
+/* Marvell RVU Admin Function Devlink
  *
  * Copyright (C) 2020 Marvell.
  *
@@ -1364,6 +1364,89 @@ static void rvu_health_reporters_destroy(struct rvu *rvu)
        rvu_nix_health_reporters_destroy(rvu_dl);
 }
 
+/* Devlink Params APIs */
+static int rvu_af_dl_dwrr_mtu_validate(struct devlink *devlink, u32 id,
+                                      union devlink_param_value val,
+                                      struct netlink_ext_ack *extack)
+{
+       struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+       struct rvu *rvu = rvu_dl->rvu;
+       int dwrr_mtu = val.vu32;
+       struct nix_txsch *txsch;
+       struct nix_hw *nix_hw;
+
+       if (!rvu->hw->cap.nix_common_dwrr_mtu) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Setting DWRR_MTU is not supported on this silicon");
+               return -EOPNOTSUPP;
+       }
+
+       if ((dwrr_mtu > 65536 || !is_power_of_2(dwrr_mtu)) &&
+           (dwrr_mtu != 9728 && dwrr_mtu != 10240)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Invalid, supported MTUs are 0,2,4,8.16,32,64....4K,8K,32K,64K and 9728, 10240");
+               return -EINVAL;
+       }
+
+       nix_hw = get_nix_hw(rvu->hw, BLKADDR_NIX0);
+       if (!nix_hw)
+               return -ENODEV;
+
+       txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+       if (rvu_rsrc_free_count(&txsch->schq) != txsch->schq.max) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Changing DWRR MTU is not supported when there are active NIXLFs");
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Make sure none of the PF/VF interfaces are initialized and retry");
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int rvu_af_dl_dwrr_mtu_set(struct devlink *devlink, u32 id,
+                                 struct devlink_param_gset_ctx *ctx)
+{
+       struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+       struct rvu *rvu = rvu_dl->rvu;
+       u64 dwrr_mtu;
+
+       dwrr_mtu = convert_bytes_to_dwrr_mtu(ctx->val.vu32);
+       rvu_write64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU, dwrr_mtu);
+
+       return 0;
+}
+
+static int rvu_af_dl_dwrr_mtu_get(struct devlink *devlink, u32 id,
+                                 struct devlink_param_gset_ctx *ctx)
+{
+       struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+       struct rvu *rvu = rvu_dl->rvu;
+       u64 dwrr_mtu;
+
+       if (!rvu->hw->cap.nix_common_dwrr_mtu)
+               return -EOPNOTSUPP;
+
+       dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU);
+       ctx->val.vu32 = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
+       return 0;
+}
+
+enum rvu_af_dl_param_id {
+       RVU_AF_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+       RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU,
+};
+
+static const struct devlink_param rvu_af_dl_params[] = {
+       DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU,
+                            "dwrr_mtu", DEVLINK_PARAM_TYPE_U32,
+                            BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+                            rvu_af_dl_dwrr_mtu_get, rvu_af_dl_dwrr_mtu_set,
+                            rvu_af_dl_dwrr_mtu_validate),
+};
+
+/* Devlink switch mode */
 static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 {
        struct rvu_devlink *rvu_dl = devlink_priv(devlink);
@@ -1420,13 +1503,14 @@ int rvu_register_dl(struct rvu *rvu)
        struct devlink *dl;
        int err;
 
-       dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink));
+       dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink),
+                          rvu->dev);
        if (!dl) {
                dev_warn(rvu->dev, "devlink_alloc failed\n");
                return -ENOMEM;
        }
 
-       err = devlink_register(dl, rvu->dev);
+       err = devlink_register(dl);
        if (err) {
                dev_err(rvu->dev, "devlink register failed with error %d\n", err);
                devlink_free(dl);
@@ -1438,7 +1522,30 @@ int rvu_register_dl(struct rvu *rvu)
        rvu_dl->rvu = rvu;
        rvu->rvu_dl = rvu_dl;
 
-       return rvu_health_reporters_create(rvu);
+       err = rvu_health_reporters_create(rvu);
+       if (err) {
+               dev_err(rvu->dev,
+                       "devlink health reporter creation failed with error %d\n", err);
+               goto err_dl_health;
+       }
+
+       err = devlink_params_register(dl, rvu_af_dl_params,
+                                     ARRAY_SIZE(rvu_af_dl_params));
+       if (err) {
+               dev_err(rvu->dev,
+                       "devlink params register failed with error %d", err);
+               goto err_dl_health;
+       }
+
+       devlink_params_publish(dl);
+
+       return 0;
+
+err_dl_health:
+       rvu_health_reporters_destroy(rvu);
+       devlink_unregister(dl);
+       devlink_free(dl);
+       return err;
 }
 
 void rvu_unregister_dl(struct rvu *rvu)
@@ -1449,6 +1556,8 @@ void rvu_unregister_dl(struct rvu *rvu)
        if (!dl)
                return;
 
+       devlink_params_unregister(dl, rvu_af_dl_params,
+                                 ARRAY_SIZE(rvu_af_dl_params));
        rvu_health_reporters_destroy(rvu);
        devlink_unregister(dl);
        devlink_free(dl);
index 471e57d..51efe88 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 RVU Devlink
+/* Marvell RVU Admin Function Devlink
  *
  * Copyright (C) 2020 Marvell.
  *
index 4bfbbdf..9ef4e94 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -25,7 +22,7 @@ static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc,
                               int type, bool add);
 static int nix_setup_ipolicers(struct rvu *rvu,
                               struct nix_hw *nix_hw, int blkaddr);
-static void nix_ipolicer_freemem(struct nix_hw *nix_hw);
+static void nix_ipolicer_freemem(struct rvu *rvu, struct nix_hw *nix_hw);
 static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req,
                               struct nix_hw *nix_hw, u16 pcifunc);
 static int nix_free_all_bandprof(struct rvu *rvu, u16 pcifunc);
@@ -192,6 +189,47 @@ struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr)
        return NULL;
 }
 
+u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu)
+{
+       dwrr_mtu &= 0x1FULL;
+
+       /* MTU used for DWRR calculation is in power of 2 up until 64K bytes.
+        * Value of 4 is reserved for MTU value of 9728 bytes.
+        * Value of 5 is reserved for MTU value of 10240 bytes.
+        */
+       switch (dwrr_mtu) {
+       case 4:
+               return 9728;
+       case 5:
+               return 10240;
+       default:
+               return BIT_ULL(dwrr_mtu);
+       }
+
+       return 0;
+}
+
+u32 convert_bytes_to_dwrr_mtu(u32 bytes)
+{
+       /* MTU used for DWRR calculation is in power of 2 up until 64K bytes.
+        * Value of 4 is reserved for MTU value of 9728 bytes.
+        * Value of 5 is reserved for MTU value of 10240 bytes.
+        */
+       if (bytes > BIT_ULL(16))
+               return 0;
+
+       switch (bytes) {
+       case 9728:
+               return 4;
+       case 10240:
+               return 5;
+       default:
+               return ilog2(bytes);
+       }
+
+       return 0;
+}
+
 static void nix_rx_sync(struct rvu *rvu, int blkaddr)
 {
        int err;
@@ -249,16 +287,22 @@ static bool is_valid_txschq(struct rvu *rvu, int blkaddr,
        return true;
 }
 
-static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
+static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf,
+                             struct nix_lf_alloc_rsp *rsp, bool loop)
 {
-       struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+       struct rvu_pfvf *parent_pf, *pfvf = rvu_get_pfvf(rvu, pcifunc);
+       u16 req_chan_base, req_chan_end, req_chan_cnt;
+       struct rvu_hwinfo *hw = rvu->hw;
+       struct sdp_node_info *sdp_info;
+       int pkind, pf, vf, lbkid, vfid;
        struct mac_ops *mac_ops;
-       int pkind, pf, vf, lbkid;
        u8 cgx_id, lmac_id;
+       bool from_vf;
        int err;
 
        pf = rvu_get_pf(pcifunc);
-       if (!is_pf_cgxmapped(rvu, pf) && type != NIX_INTF_TYPE_LBK)
+       if (!is_pf_cgxmapped(rvu, pf) && type != NIX_INTF_TYPE_LBK &&
+           type != NIX_INTF_TYPE_SDP)
                return 0;
 
        switch (type) {
@@ -276,10 +320,13 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
                pfvf->tx_chan_base = pfvf->rx_chan_base;
                pfvf->rx_chan_cnt = 1;
                pfvf->tx_chan_cnt = 1;
+               rsp->tx_link = cgx_id * hw->lmac_per_cgx + lmac_id;
+
                cgx_set_pkind(rvu_cgx_pdata(cgx_id, rvu), lmac_id, pkind);
                rvu_npc_set_pkind(rvu, pkind, pfvf);
 
                mac_ops = get_mac_ops(rvu_cgx_pdata(cgx_id, rvu));
+
                /* By default we enable pause frames */
                if ((pcifunc & RVU_PFVF_FUNC_MASK) == 0)
                        mac_ops->mac_enadis_pause_frm(rvu_cgx_pdata(cgx_id,
@@ -299,6 +346,25 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
                if (rvu->hw->lbk_links > 1)
                        lbkid = vf & 0x1 ? 0 : 1;
 
+               /* By default NIX0 is configured to send packet on lbk link 1
+                * (which corresponds to LBK1), same packet will receive on
+                * NIX1 over lbk link 0. If NIX1 sends packet on lbk link 0
+                * (which corresponds to LBK2) packet will receive on NIX0 lbk
+                * link 1.
+                * But if lbk links for NIX0 and NIX1 are negated, i.e NIX0
+                * transmits and receives on lbk link 0, whick corresponds
+                * to LBK1 block, back to back connectivity between NIX and
+                * LBK can be achieved (which is similar to 96xx)
+                *
+                *                      RX              TX
+                * NIX0 lbk link        1 (LBK2)        1 (LBK1)
+                * NIX0 lbk link        0 (LBK0)        0 (LBK0)
+                * NIX1 lbk link        0 (LBK1)        0 (LBK2)
+                * NIX1 lbk link        1 (LBK3)        1 (LBK3)
+                */
+               if (loop)
+                       lbkid = !lbkid;
+
                /* Note that AF's VFs work in pairs and talk over consecutive
                 * loopback channels.Therefore if odd number of AF VFs are
                 * enabled then the last VF remains with no pair.
@@ -309,7 +375,48 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
                                        rvu_nix_chan_lbk(rvu, lbkid, vf + 1);
                pfvf->rx_chan_cnt = 1;
                pfvf->tx_chan_cnt = 1;
+               rsp->tx_link = hw->cgx_links + lbkid;
+               pfvf->lbkid = lbkid;
                rvu_npc_set_pkind(rvu, NPC_RX_LBK_PKIND, pfvf);
+               rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
+                                             pfvf->rx_chan_base,
+                                             pfvf->rx_chan_cnt);
+
+               break;
+       case NIX_INTF_TYPE_SDP:
+               from_vf = !!(pcifunc & RVU_PFVF_FUNC_MASK);
+               parent_pf = &rvu->pf[rvu_get_pf(pcifunc)];
+               sdp_info = parent_pf->sdp_info;
+               if (!sdp_info) {
+                       dev_err(rvu->dev, "Invalid sdp_info pointer\n");
+                       return -EINVAL;
+               }
+               if (from_vf) {
+                       req_chan_base = rvu_nix_chan_sdp(rvu, 0) + sdp_info->pf_srn +
+                               sdp_info->num_pf_rings;
+                       vf = (pcifunc & RVU_PFVF_FUNC_MASK) - 1;
+                       for (vfid = 0; vfid < vf; vfid++)
+                               req_chan_base += sdp_info->vf_rings[vfid];
+                       req_chan_cnt = sdp_info->vf_rings[vf];
+                       req_chan_end = req_chan_base + req_chan_cnt - 1;
+                       if (req_chan_base < rvu_nix_chan_sdp(rvu, 0) ||
+                           req_chan_end > rvu_nix_chan_sdp(rvu, 255)) {
+                               dev_err(rvu->dev,
+                                       "PF_Func 0x%x: Invalid channel base and count\n",
+                                       pcifunc);
+                               return -EINVAL;
+                       }
+               } else {
+                       req_chan_base = rvu_nix_chan_sdp(rvu, 0) + sdp_info->pf_srn;
+                       req_chan_cnt = sdp_info->num_pf_rings;
+               }
+
+               pfvf->rx_chan_base = req_chan_base;
+               pfvf->rx_chan_cnt = req_chan_cnt;
+               pfvf->tx_chan_base = pfvf->rx_chan_base;
+               pfvf->tx_chan_cnt = pfvf->rx_chan_cnt;
+
+               rsp->tx_link = hw->cgx_links + hw->lbk_links;
                rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
                                              pfvf->rx_chan_base,
                                              pfvf->rx_chan_cnt);
@@ -393,9 +500,9 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu,
 static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
                            int type, int chan_id)
 {
-       int bpid, blkaddr, lmac_chan_cnt;
+       int bpid, blkaddr, lmac_chan_cnt, sdp_chan_cnt;
+       u16 cgx_bpid_cnt, lbk_bpid_cnt, sdp_bpid_cnt;
        struct rvu_hwinfo *hw = rvu->hw;
-       u16 cgx_bpid_cnt, lbk_bpid_cnt;
        struct rvu_pfvf *pfvf;
        u8 cgx_id, lmac_id;
        u64 cfg;
@@ -404,8 +511,12 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
        cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
        lmac_chan_cnt = cfg & 0xFF;
 
+       cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+       sdp_chan_cnt = cfg & 0xFFF;
+
        cgx_bpid_cnt = hw->cgx_links * lmac_chan_cnt;
        lbk_bpid_cnt = hw->lbk_links * ((cfg >> 16) & 0xFF);
+       sdp_bpid_cnt = hw->sdp_links * sdp_chan_cnt;
 
        pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
 
@@ -443,6 +554,17 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
                if (bpid > (cgx_bpid_cnt + lbk_bpid_cnt))
                        return -EINVAL;
                break;
+       case NIX_INTF_TYPE_SDP:
+               if ((req->chan_base + req->chan_cnt) > 255)
+                       return -EINVAL;
+
+               bpid = sdp_bpid_cnt + req->chan_base;
+               if (req->bpid_per_chan)
+                       bpid += chan_id;
+
+               if (bpid > (cgx_bpid_cnt + lbk_bpid_cnt + sdp_bpid_cnt))
+                       return -EINVAL;
+               break;
        default:
                return -EINVAL;
        }
@@ -462,9 +584,12 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu,
 
        pf = rvu_get_pf(pcifunc);
        type = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
+       if (is_sdp_pfvf(pcifunc))
+               type = NIX_INTF_TYPE_SDP;
 
-       /* Enable backpressure only for CGX mapped PFs and LBK interface */
-       if (!is_pf_cgxmapped(rvu, pf) && type != NIX_INTF_TYPE_LBK)
+       /* Enable backpressure only for CGX mapped PFs and LBK/SDP interface */
+       if (!is_pf_cgxmapped(rvu, pf) && type != NIX_INTF_TYPE_LBK &&
+           type != NIX_INTF_TYPE_SDP)
                return 0;
 
        pfvf = rvu_get_pfvf(rvu, pcifunc);
@@ -481,8 +606,9 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu,
                }
 
                cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan));
+               cfg &= ~GENMASK_ULL(8, 0);
                rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan),
-                           cfg | (bpid & 0xFF) | BIT_ULL(16));
+                           cfg | (bpid & GENMASK_ULL(8, 0)) | BIT_ULL(16));
                chan_id++;
                bpid = rvu_nix_get_bpid(rvu, req, type, chan_id);
        }
@@ -630,9 +756,10 @@ static void nix_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf)
 static int nixlf_rss_ctx_init(struct rvu *rvu, int blkaddr,
                              struct rvu_pfvf *pfvf, int nixlf,
                              int rss_sz, int rss_grps, int hwctx_size,
-                             u64 way_mask)
+                             u64 way_mask, bool tag_lsb_as_adder)
 {
        int err, grp, num_indices;
+       u64 val;
 
        /* RSS is not requested for this NIXLF */
        if (!rss_sz)
@@ -648,10 +775,13 @@ static int nixlf_rss_ctx_init(struct rvu *rvu, int blkaddr,
                    (u64)pfvf->rss_ctx->iova);
 
        /* Config full RSS table size, enable RSS and caching */
-       rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf),
-                   BIT_ULL(36) | BIT_ULL(4) |
-                   ilog2(num_indices / MAX_RSS_INDIR_TBL_SIZE) |
-                   way_mask << 20);
+       val = BIT_ULL(36) | BIT_ULL(4) | way_mask << 20 |
+                       ilog2(num_indices / MAX_RSS_INDIR_TBL_SIZE);
+
+       if (tag_lsb_as_adder)
+               val |= BIT_ULL(5);
+
+       rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf), val);
        /* Config RSS group offset and sizes */
        for (grp = 0; grp < rss_grps; grp++)
                rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_GRPX(nixlf, grp),
@@ -943,7 +1073,7 @@ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
 
        nix_hw =  get_nix_hw(rvu->hw, blkaddr);
        if (!nix_hw)
-               return -EINVAL;
+               return NIX_AF_ERR_INVALID_NIXBLK;
 
        return rvu_nix_blk_aq_enq_inst(rvu, nix_hw, req, rsp);
 }
@@ -1200,7 +1330,8 @@ int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu,
        /* Initialize receive side scaling (RSS) */
        hwctx_size = 1UL << ((ctx_cfg >> 12) & 0xF);
        err = nixlf_rss_ctx_init(rvu, blkaddr, pfvf, nixlf, req->rss_sz,
-                                req->rss_grps, hwctx_size, req->way_mask);
+                                req->rss_grps, hwctx_size, req->way_mask,
+                                !!(req->flags & NIX_LF_RSS_TAG_LSB_AS_ADDER));
        if (err)
                goto free_mem;
 
@@ -1258,7 +1389,11 @@ int rvu_mbox_handler_nix_lf_alloc(struct rvu *rvu,
        rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_PARSE_CFG(nixlf), cfg);
 
        intf = is_afvf(pcifunc) ? NIX_INTF_TYPE_LBK : NIX_INTF_TYPE_CGX;
-       err = nix_interface_init(rvu, pcifunc, intf, nixlf);
+       if (is_sdp_pfvf(pcifunc))
+               intf = NIX_INTF_TYPE_SDP;
+
+       err = nix_interface_init(rvu, pcifunc, intf, nixlf, rsp,
+                                !!(req->flags & NIX_LF_LBK_BLK_SEL));
        if (err)
                goto free_mem;
 
@@ -1364,7 +1499,7 @@ int rvu_mbox_handler_nix_mark_format_cfg(struct rvu *rvu,
 
        nix_hw = get_nix_hw(rvu->hw, blkaddr);
        if (!nix_hw)
-               return -EINVAL;
+               return NIX_AF_ERR_INVALID_NIXBLK;
 
        cfg = (((u32)req->offset & 0x7) << 16) |
              (((u32)req->y_mask & 0xF) << 12) |
@@ -1382,12 +1517,104 @@ int rvu_mbox_handler_nix_mark_format_cfg(struct rvu *rvu,
        return 0;
 }
 
+/* Handle shaper update specially for few revisions */
+static bool
+handle_txschq_shaper_update(struct rvu *rvu, int blkaddr, int nixlf,
+                           int lvl, u64 reg, u64 regval)
+{
+       u64 regbase, oldval, sw_xoff = 0;
+       u64 dbgval, md_debug0 = 0;
+       unsigned long poll_tmo;
+       bool rate_reg = 0;
+       u32 schq;
+
+       regbase = reg & 0xFFFF;
+       schq = TXSCHQ_IDX(reg, TXSCHQ_IDX_SHIFT);
+
+       /* Check for rate register */
+       switch (lvl) {
+       case NIX_TXSCH_LVL_TL1:
+               md_debug0 = NIX_AF_TL1X_MD_DEBUG0(schq);
+               sw_xoff = NIX_AF_TL1X_SW_XOFF(schq);
+
+               rate_reg = !!(regbase == NIX_AF_TL1X_CIR(0));
+               break;
+       case NIX_TXSCH_LVL_TL2:
+               md_debug0 = NIX_AF_TL2X_MD_DEBUG0(schq);
+               sw_xoff = NIX_AF_TL2X_SW_XOFF(schq);
+
+               rate_reg = (regbase == NIX_AF_TL2X_CIR(0) ||
+                           regbase == NIX_AF_TL2X_PIR(0));
+               break;
+       case NIX_TXSCH_LVL_TL3:
+               md_debug0 = NIX_AF_TL3X_MD_DEBUG0(schq);
+               sw_xoff = NIX_AF_TL3X_SW_XOFF(schq);
+
+               rate_reg = (regbase == NIX_AF_TL3X_CIR(0) ||
+                           regbase == NIX_AF_TL3X_PIR(0));
+               break;
+       case NIX_TXSCH_LVL_TL4:
+               md_debug0 = NIX_AF_TL4X_MD_DEBUG0(schq);
+               sw_xoff = NIX_AF_TL4X_SW_XOFF(schq);
+
+               rate_reg = (regbase == NIX_AF_TL4X_CIR(0) ||
+                           regbase == NIX_AF_TL4X_PIR(0));
+               break;
+       case NIX_TXSCH_LVL_MDQ:
+               sw_xoff = NIX_AF_MDQX_SW_XOFF(schq);
+               rate_reg = (regbase == NIX_AF_MDQX_CIR(0) ||
+                           regbase == NIX_AF_MDQX_PIR(0));
+               break;
+       }
+
+       if (!rate_reg)
+               return false;
+
+       /* Nothing special to do when state is not toggled */
+       oldval = rvu_read64(rvu, blkaddr, reg);
+       if ((oldval & 0x1) == (regval & 0x1)) {
+               rvu_write64(rvu, blkaddr, reg, regval);
+               return true;
+       }
+
+       /* PIR/CIR disable */
+       if (!(regval & 0x1)) {
+               rvu_write64(rvu, blkaddr, sw_xoff, 1);
+               rvu_write64(rvu, blkaddr, reg, 0);
+               udelay(4);
+               rvu_write64(rvu, blkaddr, sw_xoff, 0);
+               return true;
+       }
+
+       /* PIR/CIR enable */
+       rvu_write64(rvu, blkaddr, sw_xoff, 1);
+       if (md_debug0) {
+               poll_tmo = jiffies + usecs_to_jiffies(10000);
+               /* Wait until VLD(bit32) == 1 or C_CON(bit48) == 0 */
+               do {
+                       if (time_after(jiffies, poll_tmo)) {
+                               dev_err(rvu->dev,
+                                       "NIXLF%d: TLX%u(lvl %u) CIR/PIR enable failed\n",
+                                       nixlf, schq, lvl);
+                               goto exit;
+                       }
+                       usleep_range(1, 5);
+                       dbgval = rvu_read64(rvu, blkaddr, md_debug0);
+               } while (!(dbgval & BIT_ULL(32)) && (dbgval & BIT_ULL(48)));
+       }
+       rvu_write64(rvu, blkaddr, reg, regval);
+exit:
+       rvu_write64(rvu, blkaddr, sw_xoff, 0);
+       return true;
+}
+
 /* Disable shaping of pkts by a scheduler queue
  * at a given scheduler level.
  */
 static void nix_reset_tx_shaping(struct rvu *rvu, int blkaddr,
-                                int lvl, int schq)
+                                int nixlf, int lvl, int schq)
 {
+       struct rvu_hwinfo *hw = rvu->hw;
        u64  cir_reg = 0, pir_reg = 0;
        u64  cfg;
 
@@ -1408,6 +1635,21 @@ static void nix_reset_tx_shaping(struct rvu *rvu, int blkaddr,
                cir_reg = NIX_AF_TL4X_CIR(schq);
                pir_reg = NIX_AF_TL4X_PIR(schq);
                break;
+       case NIX_TXSCH_LVL_MDQ:
+               cir_reg = NIX_AF_MDQX_CIR(schq);
+               pir_reg = NIX_AF_MDQX_PIR(schq);
+               break;
+       }
+
+       /* Shaper state toggle needs wait/poll */
+       if (hw->cap.nix_shaper_toggle_wait) {
+               if (cir_reg)
+                       handle_txschq_shaper_update(rvu, blkaddr, nixlf,
+                                                   lvl, cir_reg, 0);
+               if (pir_reg)
+                       handle_txschq_shaper_update(rvu, blkaddr, nixlf,
+                                                   lvl, pir_reg, 0);
+               return;
        }
 
        if (!cir_reg)
@@ -1425,6 +1667,7 @@ static void nix_reset_tx_linkcfg(struct rvu *rvu, int blkaddr,
                                 int lvl, int schq)
 {
        struct rvu_hwinfo *hw = rvu->hw;
+       int link_level;
        int link;
 
        if (lvl >= hw->cap.nix_tx_aggr_lvl)
@@ -1434,7 +1677,9 @@ static void nix_reset_tx_linkcfg(struct rvu *rvu, int blkaddr,
        if (lvl == NIX_TXSCH_LVL_TL4)
                rvu_write64(rvu, blkaddr, NIX_AF_TL4X_SDP_LINK_CFG(schq), 0x00);
 
-       if (lvl != NIX_TXSCH_LVL_TL2)
+       link_level = rvu_read64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL) & 0x01 ?
+                       NIX_TXSCH_LVL_TL3 : NIX_TXSCH_LVL_TL2;
+       if (lvl != link_level)
                return;
 
        /* Reset TL2's CGX or LBK link config */
@@ -1443,6 +1688,40 @@ static void nix_reset_tx_linkcfg(struct rvu *rvu, int blkaddr,
                            NIX_AF_TL3_TL2X_LINKX_CFG(schq, link), 0x00);
 }
 
+static void nix_clear_tx_xoff(struct rvu *rvu, int blkaddr,
+                             int lvl, int schq)
+{
+       struct rvu_hwinfo *hw = rvu->hw;
+       u64 reg;
+
+       /* Skip this if shaping is not supported */
+       if (!hw->cap.nix_shaping)
+               return;
+
+       /* Clear level specific SW_XOFF */
+       switch (lvl) {
+       case NIX_TXSCH_LVL_TL1:
+               reg = NIX_AF_TL1X_SW_XOFF(schq);
+               break;
+       case NIX_TXSCH_LVL_TL2:
+               reg = NIX_AF_TL2X_SW_XOFF(schq);
+               break;
+       case NIX_TXSCH_LVL_TL3:
+               reg = NIX_AF_TL3X_SW_XOFF(schq);
+               break;
+       case NIX_TXSCH_LVL_TL4:
+               reg = NIX_AF_TL4X_SW_XOFF(schq);
+               break;
+       case NIX_TXSCH_LVL_MDQ:
+               reg = NIX_AF_MDQX_SW_XOFF(schq);
+               break;
+       default:
+               return;
+       }
+
+       rvu_write64(rvu, blkaddr, reg, 0x0);
+}
+
 static int nix_get_tx_link(struct rvu *rvu, u16 pcifunc)
 {
        struct rvu_hwinfo *hw = rvu->hw;
@@ -1620,19 +1899,18 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
        int link, blkaddr, rc = 0;
        int lvl, idx, start, end;
        struct nix_txsch *txsch;
-       struct rvu_pfvf *pfvf;
        struct nix_hw *nix_hw;
        u32 *pfvf_map;
+       int nixlf;
        u16 schq;
 
-       pfvf = rvu_get_pfvf(rvu, pcifunc);
-       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
-       if (!pfvf->nixlf || blkaddr < 0)
-               return NIX_AF_ERR_AF_LF_INVALID;
+       rc = nix_get_nixlf(rvu, pcifunc, &nixlf, &blkaddr);
+       if (rc)
+               return rc;
 
        nix_hw = get_nix_hw(rvu->hw, blkaddr);
        if (!nix_hw)
-               return -EINVAL;
+               return NIX_AF_ERR_INVALID_NIXBLK;
 
        mutex_lock(&rvu->rsrc_lock);
 
@@ -1677,7 +1955,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
                            NIX_TXSCHQ_CFG_DONE))
                                pfvf_map[schq] = TXSCH_MAP(pcifunc, 0);
                        nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
-                       nix_reset_tx_shaping(rvu, blkaddr, lvl, schq);
+                       nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);
                }
 
                for (idx = 0; idx < req->schq[lvl]; idx++) {
@@ -1686,7 +1964,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu,
                            NIX_TXSCHQ_CFG_DONE))
                                pfvf_map[schq] = TXSCH_MAP(pcifunc, 0);
                        nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
-                       nix_reset_tx_shaping(rvu, blkaddr, lvl, schq);
+                       nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq);
                }
        }
 
@@ -1703,8 +1981,8 @@ exit:
        return rc;
 }
 
-static void nix_smq_flush(struct rvu *rvu, int blkaddr,
-                         int smq, u16 pcifunc, int nixlf)
+static int nix_smq_flush(struct rvu *rvu, int blkaddr,
+                        int smq, u16 pcifunc, int nixlf)
 {
        int pf = rvu_get_pf(pcifunc);
        u8 cgx_id = 0, lmac_id = 0;
@@ -1739,6 +2017,7 @@ static void nix_smq_flush(struct rvu *rvu, int blkaddr,
        /* restore cgx tx state */
        if (restore_tx_en)
                cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
+       return err;
 }
 
 static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
@@ -1747,6 +2026,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
        struct rvu_hwinfo *hw = rvu->hw;
        struct nix_txsch *txsch;
        struct nix_hw *nix_hw;
+       u16 map_func;
 
        blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
        if (blkaddr < 0)
@@ -1754,25 +2034,42 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc)
 
        nix_hw = get_nix_hw(rvu->hw, blkaddr);
        if (!nix_hw)
-               return -EINVAL;
+               return NIX_AF_ERR_INVALID_NIXBLK;
 
        nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
        if (nixlf < 0)
                return NIX_AF_ERR_AF_LF_INVALID;
 
-       /* Disable TL2/3 queue links before SMQ flush*/
+       /* Disable TL2/3 queue links and all XOFF's before SMQ flush*/
        mutex_lock(&rvu->rsrc_lock);
-       for (lvl = NIX_TXSCH_LVL_TL4; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
-               if (lvl != NIX_TXSCH_LVL_TL2 && lvl != NIX_TXSCH_LVL_TL4)
+       for (lvl = NIX_TXSCH_LVL_MDQ; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+               txsch = &nix_hw->txsch[lvl];
+
+               if (lvl >= hw->cap.nix_tx_aggr_lvl)
                        continue;
 
-               txsch = &nix_hw->txsch[lvl];
                for (schq = 0; schq < txsch->schq.max; schq++) {
                        if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc)
                                continue;
                        nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq);
+                       nix_clear_tx_xoff(rvu, blkaddr, lvl, schq);
                }
        }
+       nix_clear_tx_xoff(rvu, blkaddr, NIX_TXSCH_LVL_TL1,
+                         nix_get_tx_link(rvu, pcifunc));
+
+       /* On PF cleanup, clear cfg done flag as
+        * PF would have changed default config.
+        */
+       if (!(pcifunc & RVU_PFVF_FUNC_MASK)) {
+               txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL1];
+               schq = nix_get_tx_link(rvu, pcifunc);
+               /* Do not clear pcifunc in txsch->pfvf_map[schq] because
+                * VF might be using this TL1 queue
+                */
+               map_func = TXSCH_MAP_FUNC(txsch->pfvf_map[schq]);
+               txsch->pfvf_map[schq] = TXSCH_SET_FLAG(map_func, 0x0);
+       }
 
        /* Flush SMQs */
        txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
@@ -1818,6 +2115,7 @@ static int nix_txschq_free_one(struct rvu *rvu,
        struct nix_txsch *txsch;
        struct nix_hw *nix_hw;
        u32 *pfvf_map;
+       int rc;
 
        blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
        if (blkaddr < 0)
@@ -1825,7 +2123,7 @@ static int nix_txschq_free_one(struct rvu *rvu,
 
        nix_hw = get_nix_hw(rvu->hw, blkaddr);
        if (!nix_hw)
-               return -EINVAL;
+               return NIX_AF_ERR_INVALID_NIXBLK;
 
        nixlf = rvu_get_lf(rvu, &hw->block[blkaddr], pcifunc, 0);
        if (nixlf < 0)
@@ -1842,15 +2140,24 @@ static int nix_txschq_free_one(struct rvu *rvu,
        mutex_lock(&rvu->rsrc_lock);
 
        if (TXSCH_MAP_FUNC(pfvf_map[schq]) != pcifunc) {
-               mutex_unlock(&rvu->rsrc_lock);
+               rc = NIX_AF_ERR_TLX_INVALID;
                goto err;
        }
 
+       /* Clear SW_XOFF of this resource only.
+        * For SMQ level, all path XOFF's
+        * need to be made clear by user
+        */
+       nix_clear_tx_xoff(rvu, blkaddr, lvl, schq);
+
        /* Flush if it is a SMQ. Onus of disabling
         * TL2/3 queue links before SMQ flush is on user
         */
-       if (lvl == NIX_TXSCH_LVL_SMQ)
-               nix_smq_flush(rvu, blkaddr, schq, pcifunc, nixlf);
+       if (lvl == NIX_TXSCH_LVL_SMQ &&
+           nix_smq_flush(rvu, blkaddr, schq, pcifunc, nixlf)) {
+               rc = NIX_AF_SMQ_FLUSH_FAILED;
+               goto err;
+       }
 
        /* Free the resource */
        rvu_free_rsrc(&txsch->schq, schq);
@@ -1858,7 +2165,8 @@ static int nix_txschq_free_one(struct rvu *rvu,
        mutex_unlock(&rvu->rsrc_lock);
        return 0;
 err:
-       return NIX_AF_ERR_TLX_INVALID;
+       mutex_unlock(&rvu->rsrc_lock);
+       return rc;
 }
 
 int rvu_mbox_handler_nix_txsch_free(struct rvu *rvu,
@@ -1941,6 +2249,11 @@ static bool is_txschq_shaping_valid(struct rvu_hwinfo *hw, int lvl, u64 reg)
                    regbase == NIX_AF_TL4X_PIR(0))
                        return false;
                break;
+       case NIX_TXSCH_LVL_MDQ:
+               if (regbase == NIX_AF_MDQX_CIR(0) ||
+                   regbase == NIX_AF_MDQX_PIR(0))
+                       return false;
+               break;
        }
        return true;
 }
@@ -1958,12 +2271,48 @@ static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw,
                return;
        rvu_write64(rvu, blkaddr, NIX_AF_TL1X_TOPOLOGY(schq),
                    (TXSCH_TL1_DFLT_RR_PRIO << 1));
-       rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
-                   TXSCH_TL1_DFLT_RR_QTM);
+
+       /* On OcteonTx2 the config was in bytes and newer silcons
+        * it's changed to weight.
+        */
+       if (!rvu->hw->cap.nix_common_dwrr_mtu)
+               rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
+                           TXSCH_TL1_DFLT_RR_QTM);
+       else
+               rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
+                           CN10K_MAX_DWRR_WEIGHT);
+
        rvu_write64(rvu, blkaddr, NIX_AF_TL1X_CIR(schq), 0x00);
        pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE);
 }
 
+/* Register offset - [15:0]
+ * Scheduler Queue number - [25:16]
+ */
+#define NIX_TX_SCHQ_MASK       GENMASK_ULL(25, 0)
+
+static int nix_txschq_cfg_read(struct rvu *rvu, struct nix_hw *nix_hw,
+                              int blkaddr, struct nix_txschq_config *req,
+                              struct nix_txschq_config *rsp)
+{
+       u16 pcifunc = req->hdr.pcifunc;
+       int idx, schq;
+       u64 reg;
+
+       for (idx = 0; idx < req->num_regs; idx++) {
+               reg = req->reg[idx];
+               reg &= NIX_TX_SCHQ_MASK;
+               schq = TXSCHQ_IDX(reg, TXSCHQ_IDX_SHIFT);
+               if (!rvu_check_valid_reg(TXSCHQ_HWREGMAP, req->lvl, reg) ||
+                   !is_valid_txschq(rvu, blkaddr, req->lvl, pcifunc, schq))
+                       return NIX_AF_INVAL_TXSCHQ_CFG;
+               rsp->regval[idx] = rvu_read64(rvu, blkaddr, reg);
+       }
+       rsp->lvl = req->lvl;
+       rsp->num_regs = req->num_regs;
+       return 0;
+}
+
 static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr,
                               u16 pcifunc, struct nix_txsch *txsch)
 {
@@ -1995,11 +2344,11 @@ static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr,
 
 int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
                                    struct nix_txschq_config *req,
-                                   struct msg_rsp *rsp)
+                                   struct nix_txschq_config *rsp)
 {
+       u64 reg, val, regval, schq_regbase, val_mask;
        struct rvu_hwinfo *hw = rvu->hw;
        u16 pcifunc = req->hdr.pcifunc;
-       u64 reg, regval, schq_regbase;
        struct nix_txsch *txsch;
        struct nix_hw *nix_hw;
        int blkaddr, idx, err;
@@ -2016,7 +2365,10 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
 
        nix_hw = get_nix_hw(rvu->hw, blkaddr);
        if (!nix_hw)
-               return -EINVAL;
+               return NIX_AF_ERR_INVALID_NIXBLK;
+
+       if (req->read)
+               return nix_txschq_cfg_read(rvu, nix_hw, blkaddr, req, rsp);
 
        txsch = &nix_hw->txsch[req->lvl];
        pfvf_map = txsch->pfvf_map;
@@ -2032,8 +2384,10 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
 
        for (idx = 0; idx < req->num_regs; idx++) {
                reg = req->reg[idx];
+               reg &= NIX_TX_SCHQ_MASK;
                regval = req->regval[idx];
                schq_regbase = reg & 0xFFFF;
+               val_mask = req->regval_mask[idx];
 
                if (!is_txschq_hierarchy_valid(rvu, pcifunc, blkaddr,
                                               txsch->lvl, reg, regval))
@@ -2043,6 +2397,15 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
                if (!is_txschq_shaping_valid(hw, req->lvl, reg))
                        continue;
 
+               val = rvu_read64(rvu, blkaddr, reg);
+               regval = (val & val_mask) | (regval & ~val_mask);
+
+               /* Handle shaping state toggle specially */
+               if (hw->cap.nix_shaper_toggle_wait &&
+                   handle_txschq_shaper_update(rvu, blkaddr, nixlf,
+                                               req->lvl, reg, regval))
+                       continue;
+
                /* Replace PF/VF visible NIXLF slot with HW NIXLF id */
                if (schq_regbase == NIX_AF_SMQX_CFG(0)) {
                        nixlf = rvu_get_lf(rvu, &hw->block[blkaddr],
@@ -2083,7 +2446,6 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu,
 
        rvu_nix_tx_tl2_cfg(rvu, blkaddr, pcifunc,
                           &nix_hw->txsch[NIX_TXSCH_LVL_TL2]);
-
        return 0;
 }
 
@@ -2114,8 +2476,12 @@ static int nix_tx_vtag_free(struct rvu *rvu, int blkaddr,
                            u16 pcifunc, int index)
 {
        struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr);
-       struct nix_txvlan *vlan = &nix_hw->txvlan;
+       struct nix_txvlan *vlan;
+
+       if (!nix_hw)
+               return NIX_AF_ERR_INVALID_NIXBLK;
 
+       vlan = &nix_hw->txvlan;
        if (vlan->entry2pfvf_map[index] != pcifunc)
                return NIX_AF_ERR_PARAM;
 
@@ -2156,10 +2522,15 @@ static int nix_tx_vtag_alloc(struct rvu *rvu, int blkaddr,
                             u64 vtag, u8 size)
 {
        struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr);
-       struct nix_txvlan *vlan = &nix_hw->txvlan;
+       struct nix_txvlan *vlan;
        u64 regval;
        int index;
 
+       if (!nix_hw)
+               return NIX_AF_ERR_INVALID_NIXBLK;
+
+       vlan = &nix_hw->txvlan;
+
        mutex_lock(&vlan->rsrc_lock);
 
        index = rvu_alloc_rsrc(&vlan->rsrc);
@@ -2184,12 +2555,16 @@ static int nix_tx_vtag_decfg(struct rvu *rvu, int blkaddr,
                             struct nix_vtag_config *req)
 {
        struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr);
-       struct nix_txvlan *vlan = &nix_hw->txvlan;
        u16 pcifunc = req->hdr.pcifunc;
        int idx0 = req->tx.vtag0_idx;
        int idx1 = req->tx.vtag1_idx;
+       struct nix_txvlan *vlan;
        int err = 0;
 
+       if (!nix_hw)
+               return NIX_AF_ERR_INVALID_NIXBLK;
+
+       vlan = &nix_hw->txvlan;
        if (req->tx.free_vtag0 && req->tx.free_vtag1)
                if (vlan->entry2pfvf_map[idx0] != pcifunc ||
                    vlan->entry2pfvf_map[idx1] != pcifunc)
@@ -2216,9 +2591,13 @@ static int nix_tx_vtag_cfg(struct rvu *rvu, int blkaddr,
                           struct nix_vtag_config_rsp *rsp)
 {
        struct nix_hw *nix_hw = get_nix_hw(rvu->hw, blkaddr);
-       struct nix_txvlan *vlan = &nix_hw->txvlan;
+       struct nix_txvlan *vlan;
        u16 pcifunc = req->hdr.pcifunc;
 
+       if (!nix_hw)
+               return NIX_AF_ERR_INVALID_NIXBLK;
+
+       vlan = &nix_hw->txvlan;
        if (req->tx.cfg_vtag0) {
                rsp->vtag0_idx =
                        nix_tx_vtag_alloc(rvu, blkaddr,
@@ -2456,14 +2835,19 @@ static int nix_update_mce_rule(struct rvu *rvu, u16 pcifunc,
        struct npc_mcam *mcam = &rvu->hw->mcam;
        struct rvu_hwinfo *hw = rvu->hw;
        struct nix_mce_list *mce_list;
+       int pf;
 
-       /* skip multicast pkt replication for AF's VFs */
-       if (is_afvf(pcifunc))
+       /* skip multicast pkt replication for AF's VFs & SDP links */
+       if (is_afvf(pcifunc) || is_sdp_pfvf(pcifunc))
                return 0;
 
        if (!hw->cap.nix_rx_multicast)
                return 0;
 
+       pf = rvu_get_pf(pcifunc);
+       if (!is_pf_cgxmapped(rvu, pf))
+               return 0;
+
        blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
        if (blkaddr < 0)
                return -EINVAL;
@@ -2667,6 +3051,15 @@ static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
                for (schq = 0; schq < txsch->schq.max; schq++)
                        txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE);
        }
+
+       /* Setup a default value of 8192 as DWRR MTU */
+       if (rvu->hw->cap.nix_common_dwrr_mtu) {
+               rvu_write64(rvu, blkaddr, NIX_AF_DWRR_RPM_MTU,
+                           convert_bytes_to_dwrr_mtu(8192));
+               rvu_write64(rvu, blkaddr, NIX_AF_DWRR_SDP_MTU,
+                           convert_bytes_to_dwrr_mtu(8192));
+       }
+
        return 0;
 }
 
@@ -2743,6 +3136,7 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req,
                                     struct nix_hw_info *rsp)
 {
        u16 pcifunc = req->hdr.pcifunc;
+       u64 dwrr_mtu;
        int blkaddr;
 
        blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
@@ -2755,6 +3149,20 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req,
                rvu_get_lmac_link_max_frs(rvu, &rsp->max_mtu);
 
        rsp->min_mtu = NIC_HW_MIN_FRS;
+
+       if (!rvu->hw->cap.nix_common_dwrr_mtu) {
+               /* Return '1' on OTx2 */
+               rsp->rpm_dwrr_mtu = 1;
+               rsp->sdp_dwrr_mtu = 1;
+               return 0;
+       }
+
+       dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU);
+       rsp->rpm_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
+       dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_SDP_MTU);
+       rsp->sdp_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
        return 0;
 }
 
@@ -3068,7 +3476,7 @@ static int reserve_flowkey_alg_idx(struct rvu *rvu, int blkaddr, u32 flow_cfg)
 
        hw = get_nix_hw(rvu->hw, blkaddr);
        if (!hw)
-               return -EINVAL;
+               return NIX_AF_ERR_INVALID_NIXBLK;
 
        /* No room to add new flow hash algoritham */
        if (hw->flowkey.in_use >= NIX_FLOW_KEY_ALG_MAX)
@@ -3108,7 +3516,7 @@ int rvu_mbox_handler_nix_rss_flowkey_cfg(struct rvu *rvu,
 
        nix_hw = get_nix_hw(rvu->hw, blkaddr);
        if (!nix_hw)
-               return -EINVAL;
+               return NIX_AF_ERR_INVALID_NIXBLK;
 
        alg_idx = get_flowkey_alg_idx(nix_hw, req->flowkey_cfg);
        /* Failed to get algo index from the exiting list, reserve new  */
@@ -3366,6 +3774,77 @@ static void nix_find_link_frs(struct rvu *rvu,
                req->minlen = minlen;
 }
 
+static int
+nix_config_link_credits(struct rvu *rvu, int blkaddr, int link,
+                       u16 pcifunc, u64 tx_credits)
+{
+       struct rvu_hwinfo *hw = rvu->hw;
+       int pf = rvu_get_pf(pcifunc);
+       u8 cgx_id = 0, lmac_id = 0;
+       unsigned long poll_tmo;
+       bool restore_tx_en = 0;
+       struct nix_hw *nix_hw;
+       u64 cfg, sw_xoff = 0;
+       u32 schq = 0;
+       u32 credits;
+       int rc;
+
+       nix_hw = get_nix_hw(rvu->hw, blkaddr);
+       if (!nix_hw)
+               return NIX_AF_ERR_INVALID_NIXBLK;
+
+       if (tx_credits == nix_hw->tx_credits[link])
+               return 0;
+
+       /* Enable cgx tx if disabled for credits to be back */
+       if (is_pf_cgxmapped(rvu, pf)) {
+               rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
+               restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu),
+                                                   lmac_id, true);
+       }
+
+       mutex_lock(&rvu->rsrc_lock);
+       /* Disable new traffic to link */
+       if (hw->cap.nix_shaping) {
+               schq = nix_get_tx_link(rvu, pcifunc);
+               sw_xoff = rvu_read64(rvu, blkaddr, NIX_AF_TL1X_SW_XOFF(schq));
+               rvu_write64(rvu, blkaddr,
+                           NIX_AF_TL1X_SW_XOFF(schq), BIT_ULL(0));
+       }
+
+       rc = -EBUSY;
+       poll_tmo = jiffies + usecs_to_jiffies(10000);
+       /* Wait for credits to return */
+       do {
+               if (time_after(jiffies, poll_tmo))
+                       goto exit;
+               usleep_range(100, 200);
+
+               cfg = rvu_read64(rvu, blkaddr,
+                                NIX_AF_TX_LINKX_NORM_CREDIT(link));
+               credits = (cfg >> 12) & 0xFFFFFULL;
+       } while (credits != nix_hw->tx_credits[link]);
+
+       cfg &= ~(0xFFFFFULL << 12);
+       cfg |= (tx_credits << 12);
+       rvu_write64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link), cfg);
+       rc = 0;
+
+       nix_hw->tx_credits[link] = tx_credits;
+
+exit:
+       /* Enable traffic back */
+       if (hw->cap.nix_shaping && !sw_xoff)
+               rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SW_XOFF(schq), 0);
+
+       /* Restore state of cgx tx */
+       if (restore_tx_en)
+               cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false);
+
+       mutex_unlock(&rvu->rsrc_lock);
+       return rc;
+}
+
 int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
                                    struct msg_rsp *rsp)
 {
@@ -3376,6 +3855,7 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
        struct nix_txsch *txsch;
        u64 cfg, lmac_fifo_len;
        struct nix_hw *nix_hw;
+       struct rvu_pfvf *pfvf;
        u8 cgx = 0, lmac = 0;
        u16 max_mtu;
 
@@ -3385,7 +3865,7 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req,
 
        nix_hw = get_nix_hw(rvu->hw, blkaddr);
        if (!nix_hw)
-               return -EINVAL;
+               return NIX_AF_ERR_INVALID_NIXBLK;
 
        if (is_afvf(pcifunc))
                rvu_get_lbk_link_max_frs(rvu, &max_mtu);
@@ -3432,7 +3912,8 @@ rx_frscfg:
                link = (cgx * hw->lmac_per_cgx) + lmac;
        } else if (pf == 0) {
                /* For VFs of PF0 ingress is LBK port, so config LBK link */
-               link = hw->cgx_links;
+               pfvf = rvu_get_pfvf(rvu, pcifunc);
+               link = hw->cgx_links + pfvf->lbkid;
        }
 
        if (link < 0)
@@ -3454,11 +3935,8 @@ linkcfg:
        lmac_fifo_len =
                rvu_cgx_get_fifolen(rvu) /
                cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu));
-       cfg = rvu_read64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link));
-       cfg &= ~(0xFFFFFULL << 12);
-       cfg |=  ((lmac_fifo_len - req->maxlen) / 16) << 12;
-       rvu_write64(rvu, blkaddr, NIX_AF_TX_LINKX_NORM_CREDIT(link), cfg);
-       return 0;
+       return nix_config_link_credits(rvu, blkaddr, link, pcifunc,
+                                      (lmac_fifo_len - req->maxlen) / 16);
 }
 
 int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req,
@@ -3502,12 +3980,13 @@ static u64 rvu_get_lbk_link_credits(struct rvu *rvu, u16 lbk_max_frs)
        return 1600; /* 16 * max LBK datarate = 16 * 100Gbps */
 }
 
-static void nix_link_config(struct rvu *rvu, int blkaddr)
+static void nix_link_config(struct rvu *rvu, int blkaddr,
+                           struct nix_hw *nix_hw)
 {
        struct rvu_hwinfo *hw = rvu->hw;
        int cgx, lmac_cnt, slink, link;
        u16 lbk_max_frs, lmac_max_frs;
-       u64 tx_credits;
+       u64 tx_credits, cfg;
 
        rvu_get_lbk_link_max_frs(rvu, &lbk_max_frs);
        rvu_get_lmac_link_max_frs(rvu, &lmac_max_frs);
@@ -3538,15 +4017,18 @@ static void nix_link_config(struct rvu *rvu, int blkaddr)
         */
        for (cgx = 0; cgx < hw->cgx; cgx++) {
                lmac_cnt = cgx_get_lmac_cnt(rvu_cgx_pdata(cgx, rvu));
+               /* Skip when cgx is not available or lmac cnt is zero */
+               if (lmac_cnt <= 0)
+                       continue;
                tx_credits = ((rvu_cgx_get_fifolen(rvu) / lmac_cnt) -
                               lmac_max_frs) / 16;
                /* Enable credits and set credit pkt count to max allowed */
-               tx_credits =  (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1);
+               cfg =  (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1);
                slink = cgx * hw->lmac_per_cgx;
                for (link = slink; link < (slink + lmac_cnt); link++) {
+                       nix_hw->tx_credits[link] = tx_credits;
                        rvu_write64(rvu, blkaddr,
-                                   NIX_AF_TX_LINKX_NORM_CREDIT(link),
-                                   tx_credits);
+                                   NIX_AF_TX_LINKX_NORM_CREDIT(link), cfg);
                }
        }
 
@@ -3554,6 +4036,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr)
        slink = hw->cgx_links;
        for (link = slink; link < (slink + hw->lbk_links); link++) {
                tx_credits = rvu_get_lbk_link_credits(rvu, lbk_max_frs);
+               nix_hw->tx_credits[link] = tx_credits;
                /* Enable credits and set credit pkt count to max allowed */
                tx_credits =  (tx_credits << 12) | (0x1FF << 2) | BIT_ULL(1);
                rvu_write64(rvu, blkaddr,
@@ -3647,6 +4130,28 @@ static int nix_aq_init(struct rvu *rvu, struct rvu_block *block)
        return 0;
 }
 
+static void rvu_nix_setup_capabilities(struct rvu *rvu, int blkaddr)
+{
+       struct rvu_hwinfo *hw = rvu->hw;
+       u64 hw_const;
+
+       hw_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+
+       /* On OcteonTx2 DWRR quantum is directly configured into each of
+        * the transmit scheduler queues. And PF/VF drivers were free to
+        * config any value upto 2^24.
+        * On CN10K, HW is modified, the quantum configuration at scheduler
+        * queues is in terms of weight. And SW needs to setup a base DWRR MTU
+        * at NIX_AF_DWRR_RPM_MTU / NIX_AF_DWRR_SDP_MTU. HW will do
+        * 'DWRR MTU * weight' to get the quantum.
+        *
+        * Check if HW uses a common MTU for all DWRR quantum configs.
+        * On OcteonTx2 this register field is '0'.
+        */
+       if (((hw_const >> 56) & 0x10) == 0x10)
+               hw->cap.nix_common_dwrr_mtu = true;
+}
+
 static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
 {
        const struct npc_lt_def_cfg *ltdefs;
@@ -3684,6 +4189,9 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
        if (err)
                return err;
 
+       /* Setup capabilities of the NIX block */
+       rvu_nix_setup_capabilities(rvu, blkaddr);
+
        /* Initialize admin queue */
        err = nix_aq_init(rvu, block);
        if (err)
@@ -3692,6 +4200,9 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
        /* Restore CINT timer delay to HW reset values */
        rvu_write64(rvu, blkaddr, NIX_AF_CINT_DELAY, 0x0ULL);
 
+       /* For better performance use NDC TX instead of NDC RX for SQ's SQEs" */
+       rvu_write64(rvu, blkaddr, NIX_AF_SEB_CFG, 0x1ULL);
+
        if (is_block_implemented(hw, blkaddr)) {
                err = nix_setup_txschq(rvu, nix_hw, blkaddr);
                if (err)
@@ -3792,8 +4303,13 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
                if (err)
                        return err;
 
+               nix_hw->tx_credits = kcalloc(hw->cgx_links + hw->lbk_links,
+                                            sizeof(u64), GFP_KERNEL);
+               if (!nix_hw->tx_credits)
+                       return -ENOMEM;
+
                /* Initialize CGX/LBK/SDP link credits, min/max pkt lengths */
-               nix_link_config(rvu, blkaddr);
+               nix_link_config(rvu, blkaddr, nix_hw);
 
                /* Enable Channel backpressure */
                rvu_write64(rvu, blkaddr, NIX_AF_RX_CFG, BIT_ULL(0));
@@ -3849,7 +4365,9 @@ static void rvu_nix_block_freemem(struct rvu *rvu, int blkaddr,
                        kfree(txsch->schq.bmap);
                }
 
-               nix_ipolicer_freemem(nix_hw);
+               kfree(nix_hw->tx_credits);
+
+               nix_ipolicer_freemem(rvu, nix_hw);
 
                vlan = &nix_hw->txvlan;
                kfree(vlan->rsrc.bmap);
@@ -4027,7 +4545,7 @@ int rvu_mbox_handler_nix_lso_format_cfg(struct rvu *rvu,
 
        nix_hw = get_nix_hw(rvu->hw, blkaddr);
        if (!nix_hw)
-               return -EINVAL;
+               return NIX_AF_ERR_INVALID_NIXBLK;
 
        /* Find existing matching LSO format, if any */
        for (idx = 0; idx < nix_hw->lso.in_use; idx++) {
@@ -4225,11 +4743,14 @@ static int nix_setup_ipolicers(struct rvu *rvu,
        return 0;
 }
 
-static void nix_ipolicer_freemem(struct nix_hw *nix_hw)
+static void nix_ipolicer_freemem(struct rvu *rvu, struct nix_hw *nix_hw)
 {
        struct nix_ipolicer *ipolicer;
        int layer;
 
+       if (!rvu->hw->cap.ipolicer)
+               return;
+
        for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
                ipolicer = &nix_hw->ipolicer[layer];
 
@@ -4652,3 +5173,36 @@ static void nix_clear_ratelimit_aggr(struct rvu *rvu, struct nix_hw *nix_hw,
                rvu_free_rsrc(&ipolicer->band_prof, mid_prof);
        }
 }
+
+int rvu_mbox_handler_nix_bandprof_get_hwinfo(struct rvu *rvu, struct msg_req *req,
+                                            struct nix_bandprof_get_hwinfo_rsp *rsp)
+{
+       struct nix_ipolicer *ipolicer;
+       int blkaddr, layer, err;
+       struct nix_hw *nix_hw;
+       u64 tu;
+
+       if (!rvu->hw->cap.ipolicer)
+               return NIX_AF_ERR_IPOLICER_NOTSUPP;
+
+       err = nix_get_struct_ptrs(rvu, req->hdr.pcifunc, &nix_hw, &blkaddr);
+       if (err)
+               return err;
+
+       /* Return number of bandwidth profiles free at each layer */
+       mutex_lock(&rvu->rsrc_lock);
+       for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) {
+               if (layer == BAND_PROF_INVAL_LAYER)
+                       continue;
+
+               ipolicer = &nix_hw->ipolicer[layer];
+               rsp->prof_count[layer] = rvu_rsrc_free_count(&ipolicer->band_prof);
+       }
+       mutex_unlock(&rvu->rsrc_lock);
+
+       /* Set the policer timeunit in nanosec */
+       tu = rvu_read64(rvu, blkaddr, NIX_AF_PL_TS) & GENMASK_ULL(9, 0);
+       rsp->policer_timeunit = (tu + 1) * 100;
+
+       return 0;
+}
index 24c2bfd..70bd036 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -419,6 +416,10 @@ exit:
        rsp->stack_pg_ptrs = (cfg >> 8) & 0xFF;
        rsp->stack_pg_bytes = cfg & 0xFF;
        rsp->qints = (cfg >> 28) & 0xFFF;
+       if (!is_rvu_otx2(rvu)) {
+               cfg = rvu_read64(rvu, block->addr, NPA_AF_BATCH_CTL);
+               rsp->cache_lines = (cfg >> 1) & 0x3F;
+       }
        return rc;
 }
 
@@ -478,6 +479,13 @@ static int npa_aq_init(struct rvu *rvu, struct rvu_block *block)
 #endif
        rvu_write64(rvu, block->addr, NPA_AF_NDC_CFG, cfg);
 
+       /* For CN10K NPA BATCH DMA set 35 cache lines */
+       if (!is_rvu_otx2(rvu)) {
+               cfg = rvu_read64(rvu, block->addr, NPA_AF_BATCH_CTL);
+               cfg &= ~0x7EULL;
+               cfg |= BIT_ULL(6) | BIT_ULL(2) | BIT_ULL(1);
+               rvu_write64(rvu, block->addr, NPA_AF_BATCH_CTL, cfg);
+       }
        /* Result structure can be followed by Aura/Pool context at
         * RES + 128bytes and a write mask at RES + 256 bytes, depending on
         * operation type. Alloc sufficient result memory for all operations.
index 52b2554..5efb417 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/bitfield.h>
@@ -23,7 +20,7 @@
 #define RSVD_MCAM_ENTRIES_PER_NIXLF    1 /* Ucast for LFs */
 
 #define NPC_PARSE_RESULT_DMAC_OFFSET   8
-#define NPC_HW_TSTAMP_OFFSET           8
+#define NPC_HW_TSTAMP_OFFSET           8ULL
 #define NPC_KEX_CHAN_MASK              0xFFFULL
 #define NPC_KEX_PF_FUNC_MASK           0xFFFFULL
 
@@ -85,36 +82,6 @@ static int npc_mcam_verify_pf_func(struct rvu *rvu,
        return 0;
 }
 
-int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel)
-{
-       int pf = rvu_get_pf(pcifunc);
-       u8 cgx_id, lmac_id;
-       int base = 0, end;
-
-       if (is_npc_intf_tx(intf))
-               return 0;
-
-       /* return in case of AF installed rules */
-       if (is_pffunc_af(pcifunc))
-               return 0;
-
-       if (is_afvf(pcifunc)) {
-               end = rvu_get_num_lbk_chans();
-               if (end < 0)
-                       return -EINVAL;
-       } else {
-               rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id);
-               base = rvu_nix_chan_cgx(rvu, cgx_id, lmac_id, 0x0);
-               /* CGX mapped functions has maximum of 16 channels */
-               end = rvu_nix_chan_cgx(rvu, cgx_id, lmac_id, 0xF);
-       }
-
-       if (channel < base || channel > end)
-               return -EINVAL;
-
-       return 0;
-}
-
 void rvu_npc_set_pkind(struct rvu *rvu, int pkind, struct rvu_pfvf *pfvf)
 {
        int blkaddr;
@@ -634,8 +601,8 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
        struct nix_rx_action action;
        int blkaddr, index;
 
-       /* AF's VFs work in promiscuous mode */
-       if (is_afvf(pcifunc))
+       /* AF's and SDP VFs work in promiscuous mode */
+       if (is_afvf(pcifunc) || is_sdp_vf(pcifunc))
                return;
 
        blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@ -724,7 +691,17 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
                action.index = pfvf->promisc_mce_idx;
        }
 
-       req.chan_mask = 0xFFFU;
+       /* For cn10k the upper two bits of the channel number are
+        * cpt channel number. with masking out these bits in the
+        * mcam entry, same entry used for NIX will allow packets
+        * received from cpt for parsing.
+        */
+       if (!is_rvu_otx2(rvu)) {
+               req.chan_mask = NIX_CHAN_CPT_X2P_MASK;
+       } else {
+               req.chan_mask = 0xFFFU;
+       }
+
        if (chan_cnt > 1) {
                if (!is_power_of_2(chan_cnt)) {
                        dev_err(rvu->dev,
@@ -853,7 +830,7 @@ void rvu_npc_install_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
        u16 vf_func;
 
        /* Only CGX PF/VF can add allmulticast entry */
-       if (is_afvf(pcifunc))
+       if (is_afvf(pcifunc) && is_sdp_vf(pcifunc))
                return;
 
        blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@ -938,7 +915,7 @@ void rvu_npc_enable_allmulti_entry(struct rvu *rvu, u16 pcifunc, int nixlf,
 static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
                                     int blkaddr, u16 pcifunc, u64 rx_action)
 {
-       int actindex, index, bank;
+       int actindex, index, bank, entry;
        bool enable;
 
        if (!(pcifunc & RVU_PFVF_FUNC_MASK))
@@ -949,7 +926,7 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
                if (mcam->entry2target_pffunc[index] == pcifunc) {
                        bank = npc_get_bank(mcam, index);
                        actindex = index;
-                       index &= (mcam->banksize - 1);
+                       entry = index & (mcam->banksize - 1);
 
                        /* read vf flow entry enable status */
                        enable = is_mcam_entry_enabled(rvu, mcam, blkaddr,
@@ -959,7 +936,7 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
                                              false);
                        /* update 'action' */
                        rvu_write64(rvu, blkaddr,
-                                   NPC_AF_MCAMEX_BANKX_ACTION(index, bank),
+                                   NPC_AF_MCAMEX_BANKX_ACTION(entry, bank),
                                    rx_action);
                        if (enable)
                                npc_enable_mcam_entry(rvu, mcam, blkaddr,
@@ -1898,9 +1875,22 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr)
 
        mcam->banks = (npc_const >> 44) & 0xFULL;
        mcam->banksize = (npc_const >> 28) & 0xFFFFULL;
+       hw->npc_stat_ena = BIT_ULL(9);
        /* Extended set */
        if (npc_const2) {
                hw->npc_ext_set = true;
+               /* 96xx supports only match_stats and npc_counters
+                * reflected in NPC_AF_CONST reg.
+                * STAT_SEL and ENA are at [0:8] and 9 bit positions.
+                * 98xx has both match_stat and ext and npc_counter
+                * reflected in NPC_AF_CONST2
+                * STAT_SEL_EXT added at [12:14] bit position.
+                * cn10k supports only ext and hence npc_counters in
+                * NPC_AF_CONST is 0 and npc_counters reflected in NPC_AF_CONST2.
+                * STAT_SEL bitpos incremented from [0:8] to [0:11] and ENA bit moved to 63
+                */
+               if (!hw->npc_counters)
+                       hw->npc_stat_ena = BIT_ULL(63);
                hw->npc_counters = (npc_const2 >> 16) & 0xFFFFULL;
                mcam->banksize = npc_const2 & 0xFFFFULL;
        }
@@ -1955,7 +1945,7 @@ static void rvu_npc_setup_interfaces(struct rvu *rvu, int blkaddr)
                rvu_write64(rvu, blkaddr,
                            NPC_AF_INTFX_MISS_STAT_ACT(intf),
                            ((mcam->rx_miss_act_cntr >> 9) << 12) |
-                           BIT_ULL(9) | mcam->rx_miss_act_cntr);
+                           hw->npc_stat_ena | mcam->rx_miss_act_cntr);
        }
 
        /* Configure TX interfaces */
@@ -2030,14 +2020,15 @@ int rvu_npc_init(struct rvu *rvu)
 
        /* Enable below for Rx pkts.
         * - Outer IPv4 header checksum validation.
-        * - Detect outer L2 broadcast address and set NPC_RESULT_S[L2M].
+        * - Detect outer L2 broadcast address and set NPC_RESULT_S[L2B].
+        * - Detect outer L2 multicast address and set NPC_RESULT_S[L2M].
         * - Inner IPv4 header checksum validation.
         * - Set non zero checksum error code value
         */
        rvu_write64(rvu, blkaddr, NPC_AF_PCK_CFG,
                    rvu_read64(rvu, blkaddr, NPC_AF_PCK_CFG) |
-                   BIT_ULL(32) | BIT_ULL(24) | BIT_ULL(6) |
-                   BIT_ULL(2) | BIT_ULL(1));
+                   ((u64)NPC_EC_OIP4_CSUM << 32) | (NPC_EC_IIP4_CSUM << 24) |
+                   BIT_ULL(7) | BIT_ULL(6) | BIT_ULL(2) | BIT_ULL(1));
 
        rvu_npc_setup_interfaces(rvu, blkaddr);
 
@@ -2147,18 +2138,16 @@ static void npc_map_mcam_entry_and_cntr(struct rvu *rvu, struct npc_mcam *mcam,
                                        int blkaddr, u16 entry, u16 cntr)
 {
        u16 index = entry & (mcam->banksize - 1);
-       u16 bank = npc_get_bank(mcam, entry);
+       u32 bank = npc_get_bank(mcam, entry);
+       struct rvu_hwinfo *hw = rvu->hw;
 
        /* Set mapping and increment counter's refcnt */
        mcam->entry2cntr_map[entry] = cntr;
        mcam->cntr_refcnt[cntr]++;
-       /* Enable stats
-        * NPC_AF_MCAMEX_BANKX_STAT_ACT[14:12] - counter[11:9]
-        * NPC_AF_MCAMEX_BANKX_STAT_ACT[8:0] - counter[8:0]
-        */
+       /* Enable stats */
        rvu_write64(rvu, blkaddr,
                    NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank),
-                   ((cntr >> 9) << 12) | BIT_ULL(9) | cntr);
+                   ((cntr >> 9) << 12) | hw->npc_stat_ena | cntr);
 }
 
 static void npc_unmap_mcam_entry_and_cntr(struct rvu *rvu,
@@ -2166,7 +2155,7 @@ static void npc_unmap_mcam_entry_and_cntr(struct rvu *rvu,
                                          int blkaddr, u16 entry, u16 cntr)
 {
        u16 index = entry & (mcam->banksize - 1);
-       u16 bank = npc_get_bank(mcam, entry);
+       u32 bank = npc_get_bank(mcam, entry);
 
        /* Remove mapping and reduce counter's refcnt */
        mcam->entry2cntr_map[entry] = NPC_MCAM_INVALID_MAP;
@@ -2414,6 +2403,17 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc,
                goto alloc;
        }
 
+       /* For a VF base MCAM match rule is set by its PF. And all the
+        * further MCAM rules installed by VF on its own are
+        * concatenated with the base rule set by its PF. Hence PF entries
+        * should be at lower priority compared to VF entries. Otherwise
+        * base rule is hit always and rules installed by VF will be of
+        * no use. Hence if the request is from PF and NOT a priority
+        * allocation request then allocate low priority entries.
+        */
+       if (!(pcifunc & RVU_PFVF_FUNC_MASK))
+               goto lprio_alloc;
+
        /* Find out the search range for non-priority allocation request
         *
         * Get MCAM free entry count in middle zone.
@@ -2439,6 +2439,7 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc,
                /* Not enough free entries, search all entries in reverse,
                 * so that low priority ones will get used up.
                 */
+lprio_alloc:
                reverse = true;
                start = 0;
                end = mcam->bmap_entries;
@@ -2673,7 +2674,6 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
        struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
        struct npc_mcam *mcam = &rvu->hw->mcam;
        u16 pcifunc = req->hdr.pcifunc;
-       u16 channel, chan_mask;
        int blkaddr, rc;
        u8 nix_intf;
 
@@ -2681,10 +2681,6 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
        if (blkaddr < 0)
                return NPC_MCAM_INVALID_REQ;
 
-       chan_mask = req->entry_data.kw_mask[0] & NPC_KEX_CHAN_MASK;
-       channel = req->entry_data.kw[0] & NPC_KEX_CHAN_MASK;
-       channel &= chan_mask;
-
        mutex_lock(&mcam->lock);
        rc = npc_mcam_verify_entry(mcam, pcifunc, req->entry);
        if (rc)
@@ -2706,12 +2702,6 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
        else
                nix_intf = pfvf->nix_rx_intf;
 
-       if (!is_pffunc_af(pcifunc) &&
-           npc_mcam_verify_channel(rvu, pcifunc, req->intf, channel)) {
-               rc = NPC_MCAM_INVALID_REQ;
-               goto exit;
-       }
-
        if (!is_pffunc_af(pcifunc) &&
            npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf, pcifunc)) {
                rc = NPC_MCAM_INVALID_REQ;
@@ -2788,8 +2778,8 @@ int rvu_mbox_handler_npc_mcam_shift_entry(struct rvu *rvu,
        struct npc_mcam *mcam = &rvu->hw->mcam;
        u16 pcifunc = req->hdr.pcifunc;
        u16 old_entry, new_entry;
+       int blkaddr, rc = 0;
        u16 index, cntr;
-       int blkaddr, rc;
 
        blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
        if (blkaddr < 0)
@@ -2990,10 +2980,11 @@ int rvu_mbox_handler_npc_mcam_unmap_counter(struct rvu *rvu,
                index = find_next_bit(mcam->bmap, mcam->bmap_entries, entry);
                if (index >= mcam->bmap_entries)
                        break;
+               entry = index + 1;
+
                if (mcam->entry2cntr_map[index] != req->cntr)
                        continue;
 
-               entry = index + 1;
                npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr,
                                              index, req->cntr);
        }
@@ -3058,7 +3049,6 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu,
        struct npc_mcam *mcam = &rvu->hw->mcam;
        u16 entry = NPC_MCAM_ENTRY_INVALID;
        u16 cntr = NPC_MCAM_ENTRY_INVALID;
-       u16 channel, chan_mask;
        int blkaddr, rc;
        u8 nix_intf;
 
@@ -3069,13 +3059,6 @@ int rvu_mbox_handler_npc_mcam_alloc_and_write_entry(struct rvu *rvu,
        if (!is_npc_interface_valid(rvu, req->intf))
                return NPC_MCAM_INVALID_REQ;
 
-       chan_mask = req->entry_data.kw_mask[0] & NPC_KEX_CHAN_MASK;
-       channel = req->entry_data.kw[0] & NPC_KEX_CHAN_MASK;
-       channel &= chan_mask;
-
-       if (npc_mcam_verify_channel(rvu, req->hdr.pcifunc, req->intf, channel))
-               return NPC_MCAM_INVALID_REQ;
-
        if (npc_mcam_verify_pf_func(rvu, &req->entry_data, req->intf,
                                    req->hdr.pcifunc))
                return NPC_MCAM_INVALID_REQ;
@@ -3252,7 +3235,7 @@ int rvu_mbox_handler_npc_mcam_entry_stats(struct rvu *rvu,
        /* read MCAM entry STAT_ACT register */
        regval = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank));
 
-       if (!(regval & BIT_ULL(9))) {
+       if (!(regval & rvu->hw->npc_stat_ena)) {
                rsp->stat_ena = 0;
                mutex_unlock(&mcam->lock);
                return 0;
index 5c01cf4..51ddc7b 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
  * Copyright (C) 2020 Marvell.
  */
@@ -20,6 +20,8 @@ static const char * const npc_flow_names[] = {
        [NPC_DMAC]      = "dmac",
        [NPC_SMAC]      = "smac",
        [NPC_ETYPE]     = "ether type",
+       [NPC_VLAN_ETYPE_CTAG] = "vlan ether type ctag",
+       [NPC_VLAN_ETYPE_STAG] = "vlan ether type stag",
        [NPC_OUTER_VID] = "outer vlan id",
        [NPC_TOS]       = "tos",
        [NPC_SIP_IPV4]  = "ipv4 source ip",
@@ -492,6 +494,11 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf)
        if (*features & BIT_ULL(NPC_OUTER_VID))
                if (!npc_check_field(rvu, blkaddr, NPC_LB, intf))
                        *features &= ~BIT_ULL(NPC_OUTER_VID);
+
+       /* for vlan ethertypes corresponding layer type should be in the key */
+       if (npc_check_field(rvu, blkaddr, NPC_LB, intf))
+               *features |= BIT_ULL(NPC_VLAN_ETYPE_CTAG) |
+                            BIT_ULL(NPC_VLAN_ETYPE_STAG);
 }
 
 /* Scan key extraction profile and record how fields of our interest
@@ -600,7 +607,7 @@ static int npc_check_unsupported_flows(struct rvu *rvu, u64 features, u8 intf)
                dev_info(rvu->dev, "Unsupported flow(s):\n");
                for_each_set_bit(bit, (unsigned long *)&unsupported, 64)
                        dev_info(rvu->dev, "%s ", npc_get_field_name(bit));
-               return NIX_AF_ERR_NPC_KEY_NOT_SUPP;
+               return -EOPNOTSUPP;
        }
 
        return 0;
@@ -747,6 +754,28 @@ static void npc_update_ipv6_flow(struct rvu *rvu, struct mcam_entry *entry,
        }
 }
 
+static void npc_update_vlan_features(struct rvu *rvu, struct mcam_entry *entry,
+                                    u64 features, u8 intf)
+{
+       bool ctag = !!(features & BIT_ULL(NPC_VLAN_ETYPE_CTAG));
+       bool stag = !!(features & BIT_ULL(NPC_VLAN_ETYPE_STAG));
+       bool vid = !!(features & BIT_ULL(NPC_OUTER_VID));
+
+       /* If only VLAN id is given then always match outer VLAN id */
+       if (vid && !ctag && !stag) {
+               npc_update_entry(rvu, NPC_LB, entry,
+                                NPC_LT_LB_STAG_QINQ | NPC_LT_LB_CTAG, 0,
+                                NPC_LT_LB_STAG_QINQ & NPC_LT_LB_CTAG, 0, intf);
+               return;
+       }
+       if (ctag)
+               npc_update_entry(rvu, NPC_LB, entry, NPC_LT_LB_CTAG, 0,
+                                ~0ULL, 0, intf);
+       if (stag)
+               npc_update_entry(rvu, NPC_LB, entry, NPC_LT_LB_STAG_QINQ, 0,
+                                ~0ULL, 0, intf);
+}
+
 static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry,
                            u64 features, struct flow_msg *pkt,
                            struct flow_msg *mask,
@@ -779,11 +808,6 @@ static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry,
                npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_ICMP6,
                                 0, ~0ULL, 0, intf);
 
-       if (features & BIT_ULL(NPC_OUTER_VID))
-               npc_update_entry(rvu, NPC_LB, entry,
-                                NPC_LT_LB_STAG_QINQ | NPC_LT_LB_CTAG, 0,
-                                NPC_LT_LB_STAG_QINQ & NPC_LT_LB_CTAG, 0, intf);
-
        /* For AH, LTYPE should be present in entry */
        if (features & BIT_ULL(NPC_IPPROTO_AH))
                npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_AH,
@@ -829,6 +853,7 @@ do {                                                                              \
                       ntohs(mask->vlan_tci), 0);
 
        npc_update_ipv6_flow(rvu, entry, features, pkt, mask, output, intf);
+       npc_update_vlan_features(rvu, entry, features, intf);
 }
 
 static struct rvu_npc_mcam_rule *rvu_mcam_find_rule(struct npc_mcam *mcam,
@@ -995,13 +1020,11 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target,
        struct npc_mcam *mcam = &rvu->hw->mcam;
        struct rvu_npc_mcam_rule dummy = { 0 };
        struct rvu_npc_mcam_rule *rule;
-       bool new = false, msg_from_vf;
        u16 owner = req->hdr.pcifunc;
        struct msg_rsp write_rsp;
        struct mcam_entry *entry;
        int entry_index, err;
-
-       msg_from_vf = !!(owner & RVU_PFVF_FUNC_MASK);
+       bool new = false;
 
        installed_features = req->features;
        features = req->features;
@@ -1027,7 +1050,7 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target,
        }
 
        /* update mcam entry with default unicast rule attributes */
-       if (def_ucast_rule && (msg_from_vf || (req->default_rule && req->append))) {
+       if (def_ucast_rule && (req->default_rule && req->append)) {
                missing_features = (def_ucast_rule->features ^ features) &
                                        def_ucast_rule->features;
                if (missing_features)
@@ -1130,6 +1153,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
                                      struct npc_install_flow_rsp *rsp)
 {
        bool from_vf = !!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK);
+       struct rvu_switch *rswitch = &rvu->rswitch;
        int blkaddr, nixlf, err;
        struct rvu_pfvf *pfvf;
        bool pf_set_vfs_mac = false;
@@ -1139,14 +1163,14 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
        blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
        if (blkaddr < 0) {
                dev_err(rvu->dev, "%s: NPC block not implemented\n", __func__);
-               return -ENODEV;
+               return NPC_MCAM_INVALID_REQ;
        }
 
        if (!is_npc_interface_valid(rvu, req->intf))
-               return -EINVAL;
+               return NPC_FLOW_INTF_INVALID;
 
        if (from_vf && req->default_rule)
-               return NPC_MCAM_PERM_DENIED;
+               return NPC_FLOW_VF_PERM_DENIED;
 
        /* Each PF/VF info is maintained in struct rvu_pfvf.
         * rvu_pfvf for the target PF/VF needs to be retrieved
@@ -1172,12 +1196,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
 
        err = npc_check_unsupported_flows(rvu, req->features, req->intf);
        if (err)
-               return err;
-
-       /* Skip channel validation if AF is installing */
-       if (!is_pffunc_af(req->hdr.pcifunc) &&
-           npc_mcam_verify_channel(rvu, target, req->intf, req->channel))
-               return -EINVAL;
+               return NPC_FLOW_NOT_SUPPORTED;
 
        pfvf = rvu_get_pfvf(rvu, target);
 
@@ -1195,7 +1214,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
        /* Proceed if NIXLF is attached or not for TX rules */
        err = nix_get_nixlf(rvu, target, &nixlf, NULL);
        if (err && is_npc_intf_rx(req->intf) && !pf_set_vfs_mac)
-               return -EINVAL;
+               return NPC_FLOW_NO_NIXLF;
 
        /* don't enable rule when nixlf not attached or initialized */
        if (!(is_nixlf_attached(rvu, target) &&
@@ -1211,7 +1230,7 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
 
        /* Do not allow requests from uninitialized VFs */
        if (from_vf && !enable)
-               return -EINVAL;
+               return NPC_FLOW_VF_NOT_INIT;
 
        /* PF sets VF mac & VF NIXLF is not attached, update the mac addr */
        if (pf_set_vfs_mac && !enable) {
@@ -1221,15 +1240,12 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
                return 0;
        }
 
-       /* If message is from VF then its flow should not overlap with
-        * reserved unicast flow.
-        */
-       if (from_vf && pfvf->def_ucast_rule && is_npc_intf_rx(req->intf) &&
-           pfvf->def_ucast_rule->features & req->features)
-               return -EINVAL;
+       mutex_lock(&rswitch->switch_lock);
+       err = npc_install_flow(rvu, blkaddr, target, nixlf, pfvf,
+                              req, rsp, enable, pf_set_vfs_mac);
+       mutex_unlock(&rswitch->switch_lock);
 
-       return npc_install_flow(rvu, blkaddr, target, nixlf, pfvf, req, rsp,
-                               enable, pf_set_vfs_mac);
+       return err;
 }
 
 static int npc_delete_flow(struct rvu *rvu, struct rvu_npc_mcam_rule *rule,
index e266f0c..b3150f0 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -33,8 +30,8 @@ static struct hw_reg_map txsch_reg_map[NIX_TXSCH_LVL_CNT] = {
        {NIX_TXSCH_LVL_SMQ, 2, 0xFFFF, {{0x0700, 0x0708}, {0x1400, 0x14C8} } },
        {NIX_TXSCH_LVL_TL4, 3, 0xFFFF, {{0x0B00, 0x0B08}, {0x0B10, 0x0B18},
                              {0x1200, 0x12E0} } },
-       {NIX_TXSCH_LVL_TL3, 3, 0xFFFF, {{0x1000, 0x10E0}, {0x1600, 0x1608},
-                             {0x1610, 0x1618} } },
+       {NIX_TXSCH_LVL_TL3, 4, 0xFFFF, {{0x1000, 0x10E0}, {0x1600, 0x1608},
+                             {0x1610, 0x1618}, {0x1700, 0x17B0} } },
        {NIX_TXSCH_LVL_TL2, 2, 0xFFFF, {{0x0E00, 0x0EE0}, {0x1700, 0x17B0} } },
        {NIX_TXSCH_LVL_TL1, 1, 0xFFFF, {{0x0C00, 0x0D98} } },
 };
index 8b01ef6..21f1ed4 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef RVU_REG_H
@@ -53,7 +50,7 @@
 #define RVU_AF_SMMU_TXN_REQ                (0x6008)
 #define RVU_AF_SMMU_ADDR_RSP_STS           (0x6010)
 #define RVU_AF_SMMU_ADDR_TLN               (0x6018)
-#define RVU_AF_SMMU_TLN_FLIT1              (0x6030)
+#define RVU_AF_SMMU_TLN_FLIT0              (0x6020)
 
 /* Admin function's privileged PF/VF registers */
 #define RVU_PRIV_CONST                      (0x8000000)
 #define NPA_AF_AQ_DONE_INT_W1S          (0x0688)
 #define NPA_AF_AQ_DONE_ENA_W1S          (0x0690)
 #define NPA_AF_AQ_DONE_ENA_W1C          (0x0698)
+#define NPA_AF_BATCH_CTL               (0x06a0)
 #define NPA_AF_LFX_AURAS_CFG(a)         (0x4000 | (a) << 18)
 #define NPA_AF_LFX_LOC_AURAS_BASE(a)    (0x4010 | (a) << 18)
 #define NPA_AF_LFX_QINTS_CFG(a)         (0x4100 | (a) << 18)
 #define NIX_AF_SDP_TX_FIFO_STATUS      (0x0640)
 #define NIX_AF_TX_NPC_CAPTURE_CONFIG   (0x0660)
 #define NIX_AF_TX_NPC_CAPTURE_INFO     (0x0670)
+#define NIX_AF_SEB_CFG                 (0x05F0)
 
 #define NIX_AF_DEBUG_NPC_RESP_DATAX(a)          (0x680 | (a) << 3)
 #define NIX_AF_SMQX_CFG(a)                      (0x700 | (a) << 16)
 #define NIX_AF_SQM_DBG_CTL_STATUS               (0x750)
+#define NIX_AF_DWRR_SDP_MTU                     (0x790)
+#define NIX_AF_DWRR_RPM_MTU                     (0x7A0)
 #define NIX_AF_PSE_CHANNEL_LEVEL                (0x800)
 #define NIX_AF_PSE_SHAPER_CFG                   (0x810)
 #define NIX_AF_TX_EXPR_CREDIT                  (0x830)
 #define        APR_AF_LMT_CFG                  (0x000ull)
 #define        APR_AF_LMT_MAP_BASE             (0x008ull)
 #define        APR_AF_LMT_CTL                  (0x010ull)
+#define APR_LMT_MAP_ENT_DIS_SCH_CMP_SHIFT      23
+#define APR_LMT_MAP_ENT_SCH_ENA_SHIFT          22
+#define APR_LMT_MAP_ENT_DIS_LINE_PREF_SHIFT    21
 
 #endif /* RVU_REG_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c
new file mode 100644 (file)
index 0000000..b04fb22
--- /dev/null
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell RVU Admin Function driver
+ *
+ * Copyright (C) 2021 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include "rvu.h"
+
+/* SDP PF device id */
+#define PCI_DEVID_OTX2_SDP_PF   0xA0F6
+
+/* Maximum SDP blocks in a chip */
+#define MAX_SDP                2
+
+/* SDP PF number */
+static int sdp_pf_num[MAX_SDP] = {-1, -1};
+
+bool is_sdp_pfvf(u16 pcifunc)
+{
+       u16 pf = rvu_get_pf(pcifunc);
+       u32 found = 0, i = 0;
+
+       while (i < MAX_SDP) {
+               if (pf == sdp_pf_num[i])
+                       found = 1;
+               i++;
+       }
+
+       if (!found)
+               return false;
+
+       return true;
+}
+
+bool is_sdp_pf(u16 pcifunc)
+{
+       return (is_sdp_pfvf(pcifunc) &&
+               !(pcifunc & RVU_PFVF_FUNC_MASK));
+}
+
+bool is_sdp_vf(u16 pcifunc)
+{
+       return (is_sdp_pfvf(pcifunc) &&
+               !!(pcifunc & RVU_PFVF_FUNC_MASK));
+}
+
+int rvu_sdp_init(struct rvu *rvu)
+{
+       struct pci_dev *pdev = NULL;
+       struct rvu_pfvf *pfvf;
+       u32 i = 0;
+
+       while ((i < MAX_SDP) && (pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+                                                      PCI_DEVID_OTX2_SDP_PF,
+                                                      pdev)) != NULL) {
+               /* The RVU PF number is one less than bus number */
+               sdp_pf_num[i] = pdev->bus->number - 1;
+               pfvf = &rvu->pf[sdp_pf_num[i]];
+
+               pfvf->sdp_info = devm_kzalloc(rvu->dev,
+                                             sizeof(struct sdp_node_info),
+                                             GFP_KERNEL);
+               if (!pfvf->sdp_info)
+                       return -ENOMEM;
+
+               dev_info(rvu->dev, "SDP PF number:%d\n", sdp_pf_num[i]);
+
+               put_device(&pdev->dev);
+               i++;
+       }
+
+       return 0;
+}
+
+int
+rvu_mbox_handler_set_sdp_chan_info(struct rvu *rvu,
+                                  struct sdp_chan_info_msg *req,
+                                  struct msg_rsp *rsp)
+{
+       struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+
+       memcpy(pfvf->sdp_info, &req->info, sizeof(struct sdp_node_info));
+       dev_info(rvu->dev, "AF: SDP%d max_vfs %d num_pf_rings %d pf_srn %d\n",
+                req->info.node_id, req->info.max_vfs, req->info.num_pf_rings,
+                req->info.pf_srn);
+       return 0;
+}
+
+int
+rvu_mbox_handler_get_sdp_chan_info(struct rvu *rvu, struct msg_req *req,
+                                  struct sdp_get_chan_info_msg *rsp)
+{
+       struct rvu_hwinfo *hw = rvu->hw;
+       int blkaddr;
+
+       if (!hw->cap.programmable_chans) {
+               rsp->chan_base = NIX_CHAN_SDP_CH_START;
+               rsp->num_chan = NIX_CHAN_SDP_NUM_CHANS;
+       } else {
+               blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+               rsp->chan_base = hw->sdp_chan_base;
+               rsp->num_chan = rvu_read64(rvu, blkaddr, NIX_AF_CONST1) & 0xFFFUL;
+       }
+
+       return 0;
+}
index 5bbe672..77ac966 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*  Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
- * Copyright (C) 2018 Marvell International Ltd.
+ * Copyright (C) 2018 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef RVU_STRUCT_H
index 820adf3..3392487 100644 (file)
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver
+/* Marvell RVU Admin Function driver
  *
  * Copyright (C) 2021 Marvell.
+ *
  */
 
 #include <linux/bitfield.h>
index 56f90cf..775fd4c 100644 (file)
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Admin Function driver tracepoints
+/* Marvell RVU Admin Function driver
+ *
+ * Copyright (C) 2020 Marvell.
  *
- * Copyright (C) 2020 Marvell International Ltd.
  */
 
 #define CREATE_TRACE_POINTS
index 64aa7d3..28984d0 100644 (file)
@@ -1,7 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Admin Function driver tracepoints
+/* Marvell RVU Admin Function driver
+ *
+ * Copyright (C) 2020 Marvell.
  *
- * Copyright (C) 2020 Marvell International Ltd.
  */
 
 #undef TRACE_SYSTEM
@@ -14,6 +15,8 @@
 #include <linux/tracepoint.h>
 #include <linux/pci.h>
 
+#include "mbox.h"
+
 TRACE_EVENT(otx2_msg_alloc,
            TP_PROTO(const struct pci_dev *pdev, u16 id, u64 size),
            TP_ARGS(pdev, id, size),
@@ -25,8 +28,8 @@ TRACE_EVENT(otx2_msg_alloc,
                           __entry->id = id;
                           __entry->size = size;
            ),
-           TP_printk("[%s] msg:(0x%x) size:%lld\n", __get_str(dev),
-                     __entry->id, __entry->size)
+           TP_printk("[%s] msg:(%s) size:%lld\n", __get_str(dev),
+                     otx2_mbox_id2name(__entry->id), __entry->size)
 );
 
 TRACE_EVENT(otx2_msg_send,
@@ -88,8 +91,8 @@ TRACE_EVENT(otx2_msg_process,
                           __entry->id = id;
                           __entry->err = err;
            ),
-           TP_printk("[%s] msg:(0x%x) error:%d\n", __get_str(dev),
-                     __entry->id, __entry->err)
+           TP_printk("[%s] msg:(%s) error:%d\n", __get_str(dev),
+                     otx2_mbox_id2name(__entry->id), __entry->err)
 );
 
 #endif /* __RVU_TRACE_H */
index 3254b02..b92c267 100644 (file)
@@ -1,13 +1,14 @@
 # SPDX-License-Identifier: GPL-2.0
 #
-# Makefile for Marvell's OcteonTX2 ethernet device drivers
+# Makefile for Marvell's RVU Ethernet device drivers
 #
 
 obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o
 obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o
 
 rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
-               otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o
-rvu_nicvf-y := otx2_vf.o
+               otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \
+               otx2_devlink.o
+rvu_nicvf-y := otx2_vf.o otx2_devlink.o
 
 ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
index 184de94..3cc76f1 100644 (file)
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Physcial Function ethernet driver
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2021 Marvell.
  *
- * Copyright (C) 2020 Marvell.
  */
 
 #include "cn10k.h"
@@ -92,8 +93,7 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
        aq->sq.ena = 1;
        /* Only one SMQ is allocated, map all SQ's to that SMQ  */
        aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
-       /* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/
-       aq->sq.smq_rr_weight = pfvf->netdev->mtu;
+       aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
        aq->sq.default_chan = pfvf->hw.tx_chan_base;
        aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
        aq->sq.sqb_aura = sqb_aura;
index 1a1ae33..8ae9681 100644 (file)
@@ -1,7 +1,8 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Ethernet driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2021 Marvell.
  *
- * Copyright (C) 2020 Marvell.
  */
 
 #ifndef CN10K_H
@@ -9,6 +10,20 @@
 
 #include "otx2_common.h"
 
+static inline int mtu_to_dwrr_weight(struct otx2_nic *pfvf, int mtu)
+{
+       u32 weight;
+
+       /* On OTx2, since AF returns DWRR_MTU as '1', this logic
+        * will work on those silicons as well.
+        */
+       weight = mtu / pfvf->hw.dwrr_mtu;
+       if (mtu % pfvf->hw.dwrr_mtu)
+               weight += 1;
+
+       return weight;
+}
+
 void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
 void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx);
 int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
index 70fcc1f..ce25c27 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
  *
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/interrupt.h>
@@ -208,7 +205,8 @@ int otx2_set_mac_address(struct net_device *netdev, void *p)
        if (!otx2_hw_set_mac_addr(pfvf, addr->sa_data)) {
                memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
                /* update dmac field in vlan offload rule */
-               if (pfvf->flags & OTX2_FLAG_RX_VLAN_SUPPORT)
+               if (netif_running(netdev) &&
+                   pfvf->flags & OTX2_FLAG_RX_VLAN_SUPPORT)
                        otx2_install_rxvlan_offload_flow(pfvf);
                /* update dmac address in ntuple and DMAC filter list */
                if (pfvf->flags & OTX2_FLAG_DMACFLTR_SUPPORT)
@@ -268,6 +266,7 @@ unlock:
 int otx2_set_flowkey_cfg(struct otx2_nic *pfvf)
 {
        struct otx2_rss_info *rss = &pfvf->hw.rss_info;
+       struct nix_rss_flowkey_cfg_rsp *rsp;
        struct nix_rss_flowkey_cfg *req;
        int err;
 
@@ -282,6 +281,18 @@ int otx2_set_flowkey_cfg(struct otx2_nic *pfvf)
        req->group = DEFAULT_RSS_CONTEXT_GROUP;
 
        err = otx2_sync_mbox_msg(&pfvf->mbox);
+       if (err)
+               goto fail;
+
+       rsp = (struct nix_rss_flowkey_cfg_rsp *)
+                       otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr);
+       if (IS_ERR(rsp)) {
+               err = PTR_ERR(rsp);
+               goto fail;
+       }
+
+       pfvf->hw.flowkey_alg_idx = rsp->alg_idx;
+fail:
        mutex_unlock(&pfvf->mbox.lock);
        return err;
 }
@@ -572,30 +583,14 @@ void otx2_get_mac_from_af(struct net_device *netdev)
 }
 EXPORT_SYMBOL(otx2_get_mac_from_af);
 
-static int otx2_get_link(struct otx2_nic *pfvf)
-{
-       int link = 0;
-       u16 map;
-
-       /* cgx lmac link */
-       if (pfvf->hw.tx_chan_base >= CGX_CHAN_BASE) {
-               map = pfvf->hw.tx_chan_base & 0x7FF;
-               link = 4 * ((map >> 8) & 0xF) + ((map >> 4) & 0xF);
-       }
-       /* LBK channel */
-       if (pfvf->hw.tx_chan_base < SDP_CHAN_BASE) {
-               map = pfvf->hw.tx_chan_base & 0x7FF;
-               link = pfvf->hw.cgx_links | ((map >> 8) & 0xF);
-       }
-
-       return link;
-}
-
 int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
 {
        struct otx2_hw *hw = &pfvf->hw;
        struct nix_txschq_config *req;
        u64 schq, parent;
+       u64 dwrr_val;
+
+       dwrr_val = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
 
        req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox);
        if (!req)
@@ -621,21 +616,21 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
                req->num_regs++;
                /* Set DWRR quantum */
                req->reg[2] = NIX_AF_MDQX_SCHEDULE(schq);
-               req->regval[2] =  DFLT_RR_QTM;
+               req->regval[2] =  dwrr_val;
        } else if (lvl == NIX_TXSCH_LVL_TL4) {
                parent =  hw->txschq_list[NIX_TXSCH_LVL_TL3][0];
                req->reg[0] = NIX_AF_TL4X_PARENT(schq);
                req->regval[0] = parent << 16;
                req->num_regs++;
                req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq);
-               req->regval[1] = DFLT_RR_QTM;
+               req->regval[1] = dwrr_val;
        } else if (lvl == NIX_TXSCH_LVL_TL3) {
                parent = hw->txschq_list[NIX_TXSCH_LVL_TL2][0];
                req->reg[0] = NIX_AF_TL3X_PARENT(schq);
                req->regval[0] = parent << 16;
                req->num_regs++;
                req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq);
-               req->regval[1] = DFLT_RR_QTM;
+               req->regval[1] = dwrr_val;
        } else if (lvl == NIX_TXSCH_LVL_TL2) {
                parent =  hw->txschq_list[NIX_TXSCH_LVL_TL1][0];
                req->reg[0] = NIX_AF_TL2X_PARENT(schq);
@@ -643,11 +638,10 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
 
                req->num_regs++;
                req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq);
-               req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | DFLT_RR_QTM;
+               req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val;
 
                req->num_regs++;
-               req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq,
-                                                       otx2_get_link(pfvf));
+               req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq, hw->tx_link);
                /* Enable this queue and backpressure */
                req->regval[2] = BIT_ULL(13) | BIT_ULL(12);
 
@@ -656,7 +650,10 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
                 * For VF this is always ignored.
                 */
 
-               /* Set DWRR quantum */
+               /* On CN10K, if RR_WEIGHT is greater than 16384, HW will
+                * clip it to 16384, so configuring a 24bit max value
+                * will work on both OTx2 and CN10K.
+                */
                req->reg[0] = NIX_AF_TL1X_SCHEDULE(schq);
                req->regval[0] = TXSCH_TL1_DFLT_RR_QTM;
 
@@ -803,7 +800,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
        aq->sq.ena = 1;
        /* Only one SMQ is allocated, map all SQ's to that SMQ  */
        aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
-       aq->sq.smq_rr_quantum = DFLT_RR_QTM;
+       aq->sq.smq_rr_quantum = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
        aq->sq.default_chan = pfvf->hw.tx_chan_base;
        aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
        aq->sq.sqb_aura = sqb_aura;
@@ -1190,7 +1187,22 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
        /* Enable backpressure for RQ aura */
        if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) {
                aq->aura.bp_ena = 0;
+               /* If NIX1 LF is attached then specify NIX1_RX.
+                *
+                * Below NPA_AURA_S[BP_ENA] is set according to the
+                * NPA_BPINTF_E enumeration given as:
+                * 0x0 + a*0x1 where 'a' is 0 for NIX0_RX and 1 for NIX1_RX so
+                * NIX0_RX is 0x0 + 0*0x1 = 0
+                * NIX1_RX is 0x0 + 1*0x1 = 1
+                * But in HRM it is given that
+                * "NPA_AURA_S[BP_ENA](w1[33:32]) - Enable aura backpressure to
+                * NIX-RX based on [BP] level. One bit per NIX-RX; index
+                * enumerated by NPA_BPINTF_E."
+                */
+               if (pfvf->nix_blkaddr == BLKADDR_NIX1)
+                       aq->aura.bp_ena = 1;
                aq->aura.nix0_bpid = pfvf->bpid[0];
+
                /* Set backpressure level for RQ's Aura */
                aq->aura.bp = RQ_BP_LVL_AURA;
        }
@@ -1577,6 +1589,7 @@ void mbox_handler_nix_lf_alloc(struct otx2_nic *pfvf,
        pfvf->hw.lso_tsov6_idx = rsp->lso_tsov6_idx;
        pfvf->hw.cgx_links = rsp->cgx_links;
        pfvf->hw.lbk_links = rsp->lbk_links;
+       pfvf->hw.tx_link = rsp->tx_link;
 }
 EXPORT_SYMBOL(mbox_handler_nix_lf_alloc);
 
@@ -1668,6 +1681,11 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf)
                 * SMQ errors
                 */
                max_mtu = rsp->max_mtu - 8 - OTX2_ETH_HLEN;
+
+               /* Also save DWRR MTU, needed for DWRR weight calculation */
+               pfvf->hw.dwrr_mtu = rsp->rpm_dwrr_mtu;
+               if (!pfvf->hw.dwrr_mtu)
+                       pfvf->hw.dwrr_mtu = 1;
        }
 
 out:
index 8fd58cd..48227ce 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
  *
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef OTX2_COMMON_H
 #include <linux/timecounter.h>
 #include <linux/soc/marvell/octeontx2/asm.h>
 #include <net/pkt_cls.h>
+#include <net/devlink.h>
 
 #include <mbox.h>
 #include <npc.h>
 #include "otx2_reg.h"
 #include "otx2_txrx.h"
+#include "otx2_devlink.h"
 #include <rvu_trace.h>
 
 /* PCI device IDs */
@@ -181,6 +180,7 @@ struct otx2_hw {
        /* NIX */
        u16             txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
        u16                     matchall_ipolicer;
+       u32                     dwrr_mtu;
 
        /* HW settings, coalescing etc */
        u16                     rx_chan_base;
@@ -196,6 +196,9 @@ struct otx2_hw {
        u8                      lso_udpv4_idx;
        u8                      lso_udpv6_idx;
 
+       /* RSS */
+       u8                      flowkey_alg_idx;
+
        /* MSI-X */
        u8                      cint_cnt; /* CQ interrupt count */
        u16                     npa_msixoff; /* Offset of NPA vectors */
@@ -212,6 +215,7 @@ struct otx2_hw {
        u64                     cgx_fec_uncorr_blks;
        u8                      cgx_links;  /* No. of CGX links present in HW */
        u8                      lbk_links;  /* No. of LBK links present in HW */
+       u8                      tx_link;    /* Transmit channel link number */
 #define HW_TSO                 0
 #define CN10K_MBOX             1
 #define CN10K_LMTST            2
@@ -267,7 +271,6 @@ struct otx2_mac_table {
 };
 
 struct otx2_flow_config {
-       u16                     entry[NPC_MAX_NONCONTIG_ENTRIES];
        u16                     *flow_ent;
        u16                     *def_ent;
        u16                     nr_flows;
@@ -278,16 +281,13 @@ struct otx2_flow_config {
 #define OTX2_MCAM_COUNT                (OTX2_DEFAULT_FLOWCOUNT + \
                                 OTX2_MAX_UNICAST_FLOWS + \
                                 OTX2_MAX_VLAN_FLOWS)
-       u16                     ntuple_offset;
        u16                     unicast_offset;
        u16                     rx_vlan_offset;
        u16                     vf_vlan_offset;
 #define OTX2_PER_VF_VLAN_FLOWS 2 /* Rx + Tx per VF */
 #define OTX2_VF_VLAN_RX_INDEX  0
 #define OTX2_VF_VLAN_TX_INDEX  1
-       u16                     tc_flower_offset;
-       u16                     ntuple_max_flows;
-       u16                     tc_max_flows;
+       u16                     max_flows;
        u8                      dmacflt_max_flows;
        u8                      *bmap_to_dmacindex;
        unsigned long           dmacflt_bmap;
@@ -298,8 +298,7 @@ struct otx2_tc_info {
        /* hash table to store TC offloaded flows */
        struct rhashtable               flow_table;
        struct rhashtable_params        flow_ht_params;
-       DECLARE_BITMAP(tc_entries_bitmap, OTX2_MAX_TC_FLOWS);
-       unsigned long                   num_entries;
+       unsigned long                   *tc_entries_bitmap;
 };
 
 struct dev_hw_ops {
@@ -352,6 +351,11 @@ struct otx2_nic {
        struct otx2_vf_config   *vf_configs;
        struct cgx_link_user_info linfo;
 
+       /* NPC MCAM */
+       struct otx2_flow_config *flow_cfg;
+       struct otx2_mac_table   *mac_table;
+       struct otx2_tc_info     tc_info;
+
        u64                     reset_count;
        struct work_struct      reset_task;
        struct workqueue_struct *flr_wq;
@@ -359,7 +363,6 @@ struct otx2_nic {
        struct refill_work      *refill_wrk;
        struct workqueue_struct *otx2_wq;
        struct work_struct      rx_mode_work;
-       struct otx2_mac_table   *mac_table;
 
        /* Ethtool stuff */
        u32                     msg_enable;
@@ -375,9 +378,10 @@ struct otx2_nic {
        struct otx2_ptp         *ptp;
        struct hwtstamp_config  tstamp;
 
-       struct otx2_flow_config *flow_cfg;
-       struct otx2_tc_info     tc_info;
        unsigned long           rq_bmap;
+
+       /* Devlink */
+       struct otx2_devlink     *dl;
 };
 
 static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
@@ -709,6 +713,11 @@ MBOX_UP_CGX_MESSAGES
 #define        RVU_PFVF_FUNC_SHIFT     0
 #define        RVU_PFVF_FUNC_MASK      0x3FF
 
+static inline bool is_otx2_vf(u16 pcifunc)
+{
+       return !!(pcifunc & RVU_PFVF_FUNC_MASK);
+}
+
 static inline int rvu_get_pf(u16 pcifunc)
 {
        return (pcifunc >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK;
@@ -814,7 +823,8 @@ int otx2_set_real_num_queues(struct net_device *netdev,
                             int tx_queues, int rx_queues);
 /* MCAM filter related APIs */
 int otx2_mcam_flow_init(struct otx2_nic *pf);
-int otx2_alloc_mcam_entries(struct otx2_nic *pfvf);
+int otx2vf_mcam_flow_init(struct otx2_nic *pfvf);
+int otx2_alloc_mcam_entries(struct otx2_nic *pfvf, u16 count);
 void otx2_mcam_flow_del(struct otx2_nic *pf);
 int otx2_destroy_ntuple_flows(struct otx2_nic *pf);
 int otx2_destroy_mcam_flows(struct otx2_nic *pfvf);
@@ -825,8 +835,7 @@ int otx2_get_all_flows(struct otx2_nic *pfvf,
 int otx2_add_flow(struct otx2_nic *pfvf,
                  struct ethtool_rxnfc *nfc);
 int otx2_remove_flow(struct otx2_nic *pfvf, u32 location);
-int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
-                             struct npc_install_flow_req *req);
+int otx2_get_maxflows(struct otx2_flow_config *flow_cfg);
 void otx2_rss_ctx_flow_del(struct otx2_nic *pfvf, int ctx_id);
 int otx2_del_macfilter(struct net_device *netdev, const u8 *mac);
 int otx2_add_macfilter(struct net_device *netdev, const u8 *mac);
@@ -838,6 +847,7 @@ int otx2_init_tc(struct otx2_nic *nic);
 void otx2_shutdown_tc(struct otx2_nic *nic);
 int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
                  void *type_data);
+int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic);
 /* CGX/RPM DMAC filters support */
 int otx2_dmacflt_get_max_cnt(struct otx2_nic *pf);
 int otx2_dmacflt_add(struct otx2_nic *pf, const u8 *mac, u8 bit_pos);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
new file mode 100644 (file)
index 0000000..7ac3ef2
--- /dev/null
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell RVU PF/VF Netdev Devlink
+ *
+ * Copyright (C) 2021 Marvell.
+ */
+
+#include "otx2_common.h"
+
+/* Devlink Params APIs */
+static int otx2_dl_mcam_count_validate(struct devlink *devlink, u32 id,
+                                      union devlink_param_value val,
+                                      struct netlink_ext_ack *extack)
+{
+       struct otx2_devlink *otx2_dl = devlink_priv(devlink);
+       struct otx2_nic *pfvf = otx2_dl->pfvf;
+       struct otx2_flow_config *flow_cfg;
+
+       if (!pfvf->flow_cfg) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "pfvf->flow_cfg not initialized");
+               return -EINVAL;
+       }
+
+       flow_cfg = pfvf->flow_cfg;
+       if (flow_cfg && flow_cfg->nr_flows) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Cannot modify count when there are active rules");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int otx2_dl_mcam_count_set(struct devlink *devlink, u32 id,
+                                 struct devlink_param_gset_ctx *ctx)
+{
+       struct otx2_devlink *otx2_dl = devlink_priv(devlink);
+       struct otx2_nic *pfvf = otx2_dl->pfvf;
+
+       if (!pfvf->flow_cfg)
+               return 0;
+
+       otx2_alloc_mcam_entries(pfvf, ctx->val.vu16);
+       otx2_tc_alloc_ent_bitmap(pfvf);
+
+       return 0;
+}
+
+static int otx2_dl_mcam_count_get(struct devlink *devlink, u32 id,
+                                 struct devlink_param_gset_ctx *ctx)
+{
+       struct otx2_devlink *otx2_dl = devlink_priv(devlink);
+       struct otx2_nic *pfvf = otx2_dl->pfvf;
+       struct otx2_flow_config *flow_cfg;
+
+       if (!pfvf->flow_cfg) {
+               ctx->val.vu16 = 0;
+               return 0;
+       }
+
+       flow_cfg = pfvf->flow_cfg;
+       ctx->val.vu16 = flow_cfg->max_flows;
+
+       return 0;
+}
+
+enum otx2_dl_param_id {
+       OTX2_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+       OTX2_DEVLINK_PARAM_ID_MCAM_COUNT,
+};
+
+static const struct devlink_param otx2_dl_params[] = {
+       DEVLINK_PARAM_DRIVER(OTX2_DEVLINK_PARAM_ID_MCAM_COUNT,
+                            "mcam_count", DEVLINK_PARAM_TYPE_U16,
+                            BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+                            otx2_dl_mcam_count_get, otx2_dl_mcam_count_set,
+                            otx2_dl_mcam_count_validate),
+};
+
+/* Devlink OPs */
+static int otx2_devlink_info_get(struct devlink *devlink,
+                                struct devlink_info_req *req,
+                                struct netlink_ext_ack *extack)
+{
+       struct otx2_devlink *otx2_dl = devlink_priv(devlink);
+       struct otx2_nic *pfvf = otx2_dl->pfvf;
+
+       if (is_otx2_vf(pfvf->pcifunc))
+               return devlink_info_driver_name_put(req, "rvu_nicvf");
+
+       return devlink_info_driver_name_put(req, "rvu_nicpf");
+}
+
+static const struct devlink_ops otx2_devlink_ops = {
+       .info_get = otx2_devlink_info_get,
+};
+
+int otx2_register_dl(struct otx2_nic *pfvf)
+{
+       struct otx2_devlink *otx2_dl;
+       struct devlink *dl;
+       int err;
+
+       dl = devlink_alloc(&otx2_devlink_ops,
+                          sizeof(struct otx2_devlink), pfvf->dev);
+       if (!dl) {
+               dev_warn(pfvf->dev, "devlink_alloc failed\n");
+               return -ENOMEM;
+       }
+
+       err = devlink_register(dl);
+       if (err) {
+               dev_err(pfvf->dev, "devlink register failed with error %d\n", err);
+               devlink_free(dl);
+               return err;
+       }
+
+       otx2_dl = devlink_priv(dl);
+       otx2_dl->dl = dl;
+       otx2_dl->pfvf = pfvf;
+       pfvf->dl = otx2_dl;
+
+       err = devlink_params_register(dl, otx2_dl_params,
+                                     ARRAY_SIZE(otx2_dl_params));
+       if (err) {
+               dev_err(pfvf->dev,
+                       "devlink params register failed with error %d", err);
+               goto err_dl;
+       }
+
+       devlink_params_publish(dl);
+
+       return 0;
+
+err_dl:
+       devlink_unregister(dl);
+       devlink_free(dl);
+       return err;
+}
+
+void otx2_unregister_dl(struct otx2_nic *pfvf)
+{
+       struct otx2_devlink *otx2_dl = pfvf->dl;
+       struct devlink *dl;
+
+       if (!otx2_dl || !otx2_dl->dl)
+               return;
+
+       dl = otx2_dl->dl;
+
+       devlink_params_unregister(dl, otx2_dl_params,
+                                 ARRAY_SIZE(otx2_dl_params));
+
+       devlink_unregister(dl);
+       devlink_free(dl);
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.h
new file mode 100644 (file)
index 0000000..c7bd4f3
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell RVU PF/VF Netdev Devlink
+ *
+ * Copyright (C) 2021 Marvell.
+ *
+ */
+
+#ifndef        OTX2_DEVLINK_H
+#define        OTX2_DEVLINK_H
+
+struct otx2_devlink {
+       struct devlink *dl;
+       struct otx2_nic *pfvf;
+};
+
+/* Devlink APIs */
+int otx2_register_dl(struct otx2_nic *pfvf);
+void otx2_unregister_dl(struct otx2_nic *pfvf);
+
+#endif /* RVU_DEVLINK_H */
index 383a6b5..2ec800f 100644 (file)
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Physcial Function ethernet driver
+/* Marvell RVU Ethernet driver
  *
  * Copyright (C) 2021 Marvell.
+ *
  */
 
 #include "otx2_common.h"
index b906a0e..799486c 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
  *
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/pci.h>
@@ -33,9 +30,6 @@ struct otx2_stat {
        .index = offsetof(struct otx2_dev_stats, stat) / sizeof(u64), \
 }
 
-/* Physical link config */
-#define OTX2_ETHTOOL_SUPPORTED_MODES 0x638CCBF //110001110001100110010111111
-
 enum link_mode {
        OTX2_MODE_SUPPORTED,
        OTX2_MODE_ADVERTISED
@@ -415,7 +409,9 @@ static int otx2_set_ringparam(struct net_device *netdev,
 }
 
 static int otx2_get_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *cmd)
+                            struct ethtool_coalesce *cmd,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct otx2_nic *pfvf = netdev_priv(netdev);
        struct otx2_hw *hw = &pfvf->hw;
@@ -429,7 +425,9 @@ static int otx2_get_coalesce(struct net_device *netdev,
 }
 
 static int otx2_set_coalesce(struct net_device *netdev,
-                            struct ethtool_coalesce *ec)
+                            struct ethtool_coalesce *ec,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct otx2_nic *pfvf = netdev_priv(netdev);
        struct otx2_hw *hw = &pfvf->hw;
@@ -645,6 +643,7 @@ static int otx2_set_rss_hash_opts(struct otx2_nic *pfvf,
 static int otx2_get_rxnfc(struct net_device *dev,
                          struct ethtool_rxnfc *nfc, u32 *rules)
 {
+       bool ntuple = !!(dev->features & NETIF_F_NTUPLE);
        struct otx2_nic *pfvf = netdev_priv(dev);
        int ret = -EOPNOTSUPP;
 
@@ -654,14 +653,18 @@ static int otx2_get_rxnfc(struct net_device *dev,
                ret = 0;
                break;
        case ETHTOOL_GRXCLSRLCNT:
-               nfc->rule_cnt = pfvf->flow_cfg->nr_flows;
-               ret = 0;
+               if (netif_running(dev) && ntuple) {
+                       nfc->rule_cnt = pfvf->flow_cfg->nr_flows;
+                       ret = 0;
+               }
                break;
        case ETHTOOL_GRXCLSRULE:
-               ret = otx2_get_flow(pfvf, nfc,  nfc->fs.location);
+               if (netif_running(dev) && ntuple)
+                       ret = otx2_get_flow(pfvf, nfc,  nfc->fs.location);
                break;
        case ETHTOOL_GRXCLSRLALL:
-               ret = otx2_get_all_flows(pfvf, nfc, rules);
+               if (netif_running(dev) && ntuple)
+                       ret = otx2_get_all_flows(pfvf, nfc, rules);
                break;
        case ETHTOOL_GRXFH:
                return otx2_get_rss_hash_opts(pfvf, nfc);
@@ -696,41 +699,6 @@ static int otx2_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc)
        return ret;
 }
 
-static int otx2vf_get_rxnfc(struct net_device *dev,
-                           struct ethtool_rxnfc *nfc, u32 *rules)
-{
-       struct otx2_nic *pfvf = netdev_priv(dev);
-       int ret = -EOPNOTSUPP;
-
-       switch (nfc->cmd) {
-       case ETHTOOL_GRXRINGS:
-               nfc->data = pfvf->hw.rx_queues;
-               ret = 0;
-               break;
-       case ETHTOOL_GRXFH:
-               return otx2_get_rss_hash_opts(pfvf, nfc);
-       default:
-               break;
-       }
-       return ret;
-}
-
-static int otx2vf_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc)
-{
-       struct otx2_nic *pfvf = netdev_priv(dev);
-       int ret = -EOPNOTSUPP;
-
-       switch (nfc->cmd) {
-       case ETHTOOL_SRXFH:
-               ret = otx2_set_rss_hash_opts(pfvf, nfc);
-               break;
-       default:
-               break;
-       }
-
-       return ret;
-}
-
 static u32 otx2_get_rxfh_key_size(struct net_device *netdev)
 {
        struct otx2_nic *pfvf = netdev_priv(netdev);
@@ -1116,8 +1084,6 @@ static void otx2_get_link_mode_info(u64 link_mode_bmap,
        };
        u8 bit;
 
-       link_mode_bmap = link_mode_bmap & OTX2_ETHTOOL_SUPPORTED_MODES;
-
        for_each_set_bit(bit, (unsigned long *)&link_mode_bmap, 27) {
                /* SGMII mode is set */
                if (bit == 0)
@@ -1357,8 +1323,8 @@ static const struct ethtool_ops otx2vf_ethtool_ops = {
        .get_sset_count         = otx2vf_get_sset_count,
        .set_channels           = otx2_set_channels,
        .get_channels           = otx2_get_channels,
-       .get_rxnfc              = otx2vf_get_rxnfc,
-       .set_rxnfc              = otx2vf_set_rxnfc,
+       .get_rxnfc              = otx2_get_rxnfc,
+       .set_rxnfc              = otx2_set_rxnfc,
        .get_rxfh_key_size      = otx2_get_rxfh_key_size,
        .get_rxfh_indir_size    = otx2_get_rxfh_indir_size,
        .get_rxfh               = otx2_get_rxfh,
index 4d9de52..77a13fb 100644 (file)
@@ -1,15 +1,19 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Physical Function ethernet driver
+/* Marvell RVU Ethernet driver
  *
  * Copyright (C) 2020 Marvell.
+ *
  */
 
 #include <net/ipv6.h>
+#include <linux/sort.h>
 
 #include "otx2_common.h"
 
 #define OTX2_DEFAULT_ACTION    0x1
 
+static int otx2_mcam_entry_init(struct otx2_nic *pfvf);
+
 struct otx2_flow {
        struct ethtool_rx_flow_spec flow_spec;
        struct list_head list;
@@ -30,8 +34,7 @@ static void otx2_clear_ntuple_flow_info(struct otx2_nic *pfvf, struct otx2_flow_
 {
        devm_kfree(pfvf->dev, flow_cfg->flow_ent);
        flow_cfg->flow_ent = NULL;
-       flow_cfg->ntuple_max_flows = 0;
-       flow_cfg->tc_max_flows = 0;
+       flow_cfg->max_flows = 0;
 }
 
 static int otx2_free_ntuple_mcam_entries(struct otx2_nic *pfvf)
@@ -40,11 +43,11 @@ static int otx2_free_ntuple_mcam_entries(struct otx2_nic *pfvf)
        struct npc_mcam_free_entry_req *req;
        int ent, err;
 
-       if (!flow_cfg->ntuple_max_flows)
+       if (!flow_cfg->max_flows)
                return 0;
 
        mutex_lock(&pfvf->mbox.lock);
-       for (ent = 0; ent < flow_cfg->ntuple_max_flows; ent++) {
+       for (ent = 0; ent < flow_cfg->max_flows; ent++) {
                req = otx2_mbox_alloc_msg_npc_mcam_free_entry(&pfvf->mbox);
                if (!req)
                        break;
@@ -61,7 +64,12 @@ static int otx2_free_ntuple_mcam_entries(struct otx2_nic *pfvf)
        return 0;
 }
 
-static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count)
+static int mcam_entry_cmp(const void *a, const void *b)
+{
+       return *(u16 *)a - *(u16 *)b;
+}
+
+int otx2_alloc_mcam_entries(struct otx2_nic *pfvf, u16 count)
 {
        struct otx2_flow_config *flow_cfg = pfvf->flow_cfg;
        struct npc_mcam_alloc_entry_req *req;
@@ -76,8 +84,12 @@ static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count)
 
        flow_cfg->flow_ent = devm_kmalloc_array(pfvf->dev, count,
                                                sizeof(u16), GFP_KERNEL);
-       if (!flow_cfg->flow_ent)
+       if (!flow_cfg->flow_ent) {
+               netdev_err(pfvf->netdev,
+                          "%s: Unable to allocate memory for flow entries\n",
+                           __func__);
                return -ENOMEM;
+       }
 
        mutex_lock(&pfvf->mbox.lock);
 
@@ -92,8 +104,14 @@ static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count)
                req->contig = false;
                req->count = (count - allocated) > NPC_MAX_NONCONTIG_ENTRIES ?
                                NPC_MAX_NONCONTIG_ENTRIES : count - allocated;
-               req->priority = NPC_MCAM_HIGHER_PRIO;
-               req->ref_entry = flow_cfg->def_ent[0];
+
+               /* Allocate higher priority entries for PFs, so that VF's entries
+                * will be on top of PF.
+                */
+               if (!is_otx2_vf(pfvf->pcifunc)) {
+                       req->priority = NPC_MCAM_HIGHER_PRIO;
+                       req->ref_entry = flow_cfg->def_ent[0];
+               }
 
                /* Send message to AF */
                if (otx2_sync_mbox_msg(&pfvf->mbox))
@@ -114,22 +132,34 @@ static int otx2_alloc_ntuple_mcam_entries(struct otx2_nic *pfvf, u16 count)
                        break;
        }
 
+       /* Multiple MCAM entry alloc requests could result in non-sequential
+        * MCAM entries in the flow_ent[] array. Sort them in an ascending order,
+        * otherwise user installed ntuple filter index and MCAM entry index will
+        * not be in sync.
+        */
+       if (allocated)
+               sort(&flow_cfg->flow_ent[0], allocated,
+                    sizeof(flow_cfg->flow_ent[0]), mcam_entry_cmp, NULL);
+
 exit:
        mutex_unlock(&pfvf->mbox.lock);
 
-       flow_cfg->ntuple_offset = 0;
-       flow_cfg->ntuple_max_flows = allocated;
-       flow_cfg->tc_max_flows = allocated;
+       flow_cfg->max_flows = allocated;
+
+       if (allocated) {
+               pfvf->flags |= OTX2_FLAG_MCAM_ENTRIES_ALLOC;
+               pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT;
+       }
 
        if (allocated != count)
                netdev_info(pfvf->netdev,
-                           "Unable to allocate %d MCAM entries for ntuple, got %d\n",
+                           "Unable to allocate %d MCAM entries, got only %d\n",
                            count, allocated);
-
        return allocated;
 }
+EXPORT_SYMBOL(otx2_alloc_mcam_entries);
 
-int otx2_alloc_mcam_entries(struct otx2_nic *pfvf)
+static int otx2_mcam_entry_init(struct otx2_nic *pfvf)
 {
        struct otx2_flow_config *flow_cfg = pfvf->flow_cfg;
        struct npc_mcam_alloc_entry_req *req;
@@ -189,18 +219,35 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf)
        mutex_unlock(&pfvf->mbox.lock);
 
        /* Allocate entries for Ntuple filters */
-       count = otx2_alloc_ntuple_mcam_entries(pfvf, OTX2_DEFAULT_FLOWCOUNT);
+       count = otx2_alloc_mcam_entries(pfvf, OTX2_DEFAULT_FLOWCOUNT);
        if (count <= 0) {
                otx2_clear_ntuple_flow_info(pfvf, flow_cfg);
                return 0;
        }
 
-       pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT;
        pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT;
 
        return 0;
 }
 
+int otx2vf_mcam_flow_init(struct otx2_nic *pfvf)
+{
+       struct otx2_flow_config *flow_cfg;
+
+       pfvf->flow_cfg = devm_kzalloc(pfvf->dev,
+                                     sizeof(struct otx2_flow_config),
+                                     GFP_KERNEL);
+       if (!pfvf->flow_cfg)
+               return -ENOMEM;
+
+       flow_cfg = pfvf->flow_cfg;
+       INIT_LIST_HEAD(&flow_cfg->flow_list);
+       flow_cfg->max_flows = 0;
+
+       return 0;
+}
+EXPORT_SYMBOL(otx2vf_mcam_flow_init);
+
 int otx2_mcam_flow_init(struct otx2_nic *pf)
 {
        int err;
@@ -212,7 +259,10 @@ int otx2_mcam_flow_init(struct otx2_nic *pf)
 
        INIT_LIST_HEAD(&pf->flow_cfg->flow_list);
 
-       err = otx2_alloc_mcam_entries(pf);
+       /* Allocate bare minimum number of MCAM entries needed for
+        * unicast and ntuple filters.
+        */
+       err = otx2_mcam_entry_init(pf);
        if (err)
                return err;
 
@@ -248,6 +298,7 @@ void otx2_mcam_flow_del(struct otx2_nic *pf)
 {
        otx2_destroy_mcam_flows(pf);
 }
+EXPORT_SYMBOL(otx2_mcam_flow_del);
 
 /*  On success adds mcam entry
  *  On failure enable promisous mode
@@ -379,15 +430,19 @@ static void otx2_add_flow_to_list(struct otx2_nic *pfvf, struct otx2_flow *flow)
        list_add(&flow->list, head);
 }
 
-static int otx2_get_maxflows(struct otx2_flow_config *flow_cfg)
+int otx2_get_maxflows(struct otx2_flow_config *flow_cfg)
 {
-       if (flow_cfg->nr_flows == flow_cfg->ntuple_max_flows ||
+       if (!flow_cfg)
+               return 0;
+
+       if (flow_cfg->nr_flows == flow_cfg->max_flows ||
            bitmap_weight(&flow_cfg->dmacflt_bmap,
                          flow_cfg->dmacflt_max_flows))
-               return flow_cfg->ntuple_max_flows + flow_cfg->dmacflt_max_flows;
+               return flow_cfg->max_flows + flow_cfg->dmacflt_max_flows;
        else
-               return flow_cfg->ntuple_max_flows;
+               return flow_cfg->max_flows;
 }
+EXPORT_SYMBOL(otx2_get_maxflows);
 
 int otx2_get_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc,
                  u32 location)
@@ -708,7 +763,7 @@ static int otx2_prepare_ipv6_flow(struct ethtool_rx_flow_spec *fsp,
        return 0;
 }
 
-int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
+static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
                              struct npc_install_flow_req *req)
 {
        struct ethhdr *eth_mask = &fsp->m_u.ether_spec;
@@ -732,7 +787,7 @@ int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
                        ether_addr_copy(pmask->dmac, eth_mask->h_dest);
                        req->features |= BIT_ULL(NPC_DMAC);
                }
-               if (eth_mask->h_proto) {
+               if (eth_hdr->h_proto) {
                        memcpy(&pkt->etype, &eth_hdr->h_proto,
                               sizeof(pkt->etype));
                        memcpy(&pmask->etype, &eth_mask->h_proto,
@@ -764,14 +819,31 @@ int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
                return -EOPNOTSUPP;
        }
        if (fsp->flow_type & FLOW_EXT) {
-               if (fsp->m_ext.vlan_etype)
-                       return -EINVAL;
-               if (fsp->m_ext.vlan_tci) {
-                       if (fsp->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK))
+               u16 vlan_etype;
+
+               if (fsp->m_ext.vlan_etype) {
+                       /* Partial masks not supported */
+                       if (be16_to_cpu(fsp->m_ext.vlan_etype) != 0xFFFF)
                                return -EINVAL;
-                       if (be16_to_cpu(fsp->h_ext.vlan_tci) >= VLAN_N_VID)
+
+                       vlan_etype = be16_to_cpu(fsp->h_ext.vlan_etype);
+                       /* Only ETH_P_8021Q and ETH_P_802AD types supported */
+                       if (vlan_etype != ETH_P_8021Q &&
+                           vlan_etype != ETH_P_8021AD)
                                return -EINVAL;
 
+                       memcpy(&pkt->vlan_etype, &fsp->h_ext.vlan_etype,
+                              sizeof(pkt->vlan_etype));
+                       memcpy(&pmask->vlan_etype, &fsp->m_ext.vlan_etype,
+                              sizeof(pmask->vlan_etype));
+
+                       if (vlan_etype == ETH_P_8021Q)
+                               req->features |= BIT_ULL(NPC_VLAN_ETYPE_CTAG);
+                       else
+                               req->features |= BIT_ULL(NPC_VLAN_ETYPE_STAG);
+               }
+
+               if (fsp->m_ext.vlan_tci) {
                        memcpy(&pkt->vlan_tci, &fsp->h_ext.vlan_tci,
                               sizeof(pkt->vlan_tci));
                        memcpy(&pmask->vlan_tci, &fsp->m_ext.vlan_tci,
@@ -858,6 +930,7 @@ static int otx2_add_flow_msg(struct otx2_nic *pfvf, struct otx2_flow *flow)
                if (flow->flow_spec.flow_type & FLOW_RSS) {
                        req->op = NIX_RX_ACTIONOP_RSS;
                        req->index = flow->rss_ctx_id;
+                       req->flow_key_alg = pfvf->hw.flowkey_alg_idx;
                } else {
                        req->op = NIX_RX_ACTIONOP_UCAST;
                        req->index = ethtool_get_flow_spec_ring(ring_cookie);
@@ -894,7 +967,7 @@ static int otx2_add_flow_with_pfmac(struct otx2_nic *pfvf,
 
        pf_mac->entry = 0;
        pf_mac->dmac_filter = true;
-       pf_mac->location = pfvf->flow_cfg->ntuple_max_flows;
+       pf_mac->location = pfvf->flow_cfg->max_flows;
        memcpy(&pf_mac->flow_spec, &flow->flow_spec,
               sizeof(struct ethtool_rx_flow_spec));
        pf_mac->flow_spec.location = pf_mac->location;
@@ -923,6 +996,12 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
        int err = 0;
        u32 ring;
 
+       if (!flow_cfg->max_flows) {
+               netdev_err(pfvf->netdev,
+                          "Ntuple rule count is 0, allocate and retry\n");
+               return -EINVAL;
+       }
+
        ring = ethtool_get_flow_spec_ring(fsp->ring_cookie);
        if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT))
                return -ENOMEM;
@@ -939,6 +1018,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
                if (!flow)
                        return -ENOMEM;
                flow->location = fsp->location;
+               flow->entry = flow_cfg->flow_ent[flow->location];
                new = true;
        }
        /* struct copy */
@@ -975,7 +1055,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
                flow->dmac_filter = true;
                flow->entry = find_first_zero_bit(&flow_cfg->dmacflt_bmap,
                                                  flow_cfg->dmacflt_max_flows);
-               fsp->location = flow_cfg->ntuple_max_flows + flow->entry;
+               fsp->location = flow_cfg->max_flows + flow->entry;
                flow->flow_spec.location = fsp->location;
                flow->location = fsp->location;
 
@@ -983,19 +1063,20 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
                otx2_dmacflt_add(pfvf, eth_hdr->h_dest, flow->entry);
 
        } else {
-               if (flow->location >= pfvf->flow_cfg->ntuple_max_flows) {
+               if (flow->location >= pfvf->flow_cfg->max_flows) {
                        netdev_warn(pfvf->netdev,
                                    "Can't insert non dmac ntuple rule at %d, allowed range %d-0\n",
                                    flow->location,
-                                   flow_cfg->ntuple_max_flows - 1);
+                                   flow_cfg->max_flows - 1);
                        err = -EINVAL;
                } else {
-                       flow->entry = flow_cfg->flow_ent[flow->location];
                        err = otx2_add_flow_msg(pfvf, flow);
                }
        }
 
        if (err) {
+               if (err == MBOX_MSG_INVALID)
+                       err = -EINVAL;
                if (new)
                        kfree(flow);
                return err;
@@ -1132,6 +1213,9 @@ int otx2_destroy_ntuple_flows(struct otx2_nic *pfvf)
        if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT))
                return 0;
 
+       if (!flow_cfg->max_flows)
+               return 0;
+
        mutex_lock(&pfvf->mbox.lock);
        req = otx2_mbox_alloc_msg_npc_delete_flow(&pfvf->mbox);
        if (!req) {
@@ -1140,7 +1224,7 @@ int otx2_destroy_ntuple_flows(struct otx2_nic *pfvf)
        }
 
        req->start = flow_cfg->flow_ent[0];
-       req->end   = flow_cfg->flow_ent[flow_cfg->ntuple_max_flows - 1];
+       req->end   = flow_cfg->flow_ent[flow_cfg->max_flows - 1];
        err = otx2_sync_mbox_msg(&pfvf->mbox);
        mutex_unlock(&pfvf->mbox.lock);
 
index 2c24944..2f2e8a3 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Physical Function ethernet driver
+/* Marvell RVU Physical Function ethernet driver
  *
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -1787,17 +1784,10 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
 static netdev_features_t otx2_fix_features(struct net_device *dev,
                                           netdev_features_t features)
 {
-       /* check if n-tuple filters are ON */
-       if ((features & NETIF_F_HW_TC) && (dev->features & NETIF_F_NTUPLE)) {
-               netdev_info(dev, "Disabling n-tuple filters\n");
-               features &= ~NETIF_F_NTUPLE;
-       }
-
-       /* check if tc hw offload is ON */
-       if ((features & NETIF_F_NTUPLE) && (dev->features & NETIF_F_HW_TC)) {
-               netdev_info(dev, "Disabling TC hardware offload\n");
-               features &= ~NETIF_F_HW_TC;
-       }
+       if (features & NETIF_F_HW_VLAN_CTAG_RX)
+               features |= NETIF_F_HW_VLAN_STAG_RX;
+       else
+               features &= ~NETIF_F_HW_VLAN_STAG_RX;
 
        return features;
 }
@@ -1854,6 +1844,7 @@ static int otx2_set_features(struct net_device *netdev,
        netdev_features_t changed = features ^ netdev->features;
        bool ntuple = !!(features & NETIF_F_NTUPLE);
        struct otx2_nic *pf = netdev_priv(netdev);
+       bool tc = !!(features & NETIF_F_HW_TC);
 
        if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev))
                return otx2_cgx_config_loopback(pf,
@@ -1866,12 +1857,42 @@ static int otx2_set_features(struct net_device *netdev,
        if ((changed & NETIF_F_NTUPLE) && !ntuple)
                otx2_destroy_ntuple_flows(pf);
 
-       if ((netdev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) &&
-           pf->tc_info.num_entries) {
+       if ((changed & NETIF_F_NTUPLE) && ntuple) {
+               if (!pf->flow_cfg->max_flows) {
+                       netdev_err(netdev,
+                                  "Can't enable NTUPLE, MCAM entries not allocated\n");
+                       return -EINVAL;
+               }
+       }
+
+       if ((changed & NETIF_F_HW_TC) && tc) {
+               if (!pf->flow_cfg->max_flows) {
+                       netdev_err(netdev,
+                                  "Can't enable TC, MCAM entries not allocated\n");
+                       return -EINVAL;
+               }
+       }
+
+       if ((changed & NETIF_F_HW_TC) && !tc &&
+           pf->flow_cfg && pf->flow_cfg->nr_flows) {
                netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n");
                return -EBUSY;
        }
 
+       if ((changed & NETIF_F_NTUPLE) && ntuple &&
+           (netdev->features & NETIF_F_HW_TC) && !(changed & NETIF_F_HW_TC)) {
+               netdev_err(netdev,
+                          "Can't enable NTUPLE when TC is active, disable TC and retry\n");
+               return -EINVAL;
+       }
+
+       if ((changed & NETIF_F_HW_TC) && tc &&
+           (netdev->features & NETIF_F_NTUPLE) && !(changed & NETIF_F_NTUPLE)) {
+               netdev_err(netdev,
+                          "Can't enable TC when NTUPLE is active, disable NTUPLE and retry\n");
+               return -EINVAL;
+       }
+
        return 0;
 }
 
@@ -2331,7 +2352,7 @@ static const struct net_device_ops otx2_netdev_ops = {
        .ndo_set_features       = otx2_set_features,
        .ndo_tx_timeout         = otx2_tx_timeout,
        .ndo_get_stats64        = otx2_get_stats64,
-       .ndo_do_ioctl           = otx2_ioctl,
+       .ndo_eth_ioctl          = otx2_ioctl,
        .ndo_set_vf_mac         = otx2_set_vf_mac,
        .ndo_set_vf_vlan        = otx2_set_vf_vlan,
        .ndo_get_vf_config      = otx2_get_vf_config,
@@ -2569,8 +2590,6 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                               NETIF_F_GSO_UDP_L4);
        netdev->features |= netdev->hw_features;
 
-       netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL;
-
        err = otx2_mcam_flow_init(pf);
        if (err)
                goto err_ptp_destroy;
@@ -2594,12 +2613,13 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (pf->flags & OTX2_FLAG_TC_FLOWER_SUPPORT)
                netdev->hw_features |= NETIF_F_HW_TC;
 
+       netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL;
+
        netdev->gso_max_segs = OTX2_MAX_GSO_SEGS;
        netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
 
        netdev->netdev_ops = &otx2_netdev_ops;
 
-       /* MTU range: 64 - 9190 */
        netdev->min_mtu = OTX2_MIN_MTU;
        netdev->max_mtu = otx2_get_max_mtu(pf);
 
@@ -2619,6 +2639,10 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (err)
                goto err_mcam_flow_del;
 
+       err = otx2_register_dl(pf);
+       if (err)
+               goto err_mcam_flow_del;
+
        /* Initialize SR-IOV resources */
        err = otx2_sriov_vfcfg_init(pf);
        if (err)
@@ -2776,6 +2800,7 @@ static void otx2_remove(struct pci_dev *pdev)
        /* Disable link notifications */
        otx2_cgx_config_linkevents(pf, false);
 
+       otx2_unregister_dl(pf);
        unregister_netdev(netdev);
        otx2_sriov_disable(pf->pdev);
        otx2_sriov_vfcfg_cleanup(pf);
index 56390a6..ec9e499 100644 (file)
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 PTP support for ethernet driver
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2020 Marvell.
  *
- * Copyright (C) 2020 Marvell International Ltd.
  */
 
 #include "otx2_common.h"
index 706d63a..6ff2842 100644 (file)
@@ -1,5 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 PTP support for ethernet driver */
+/* Marvell RVU Ethernet driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
 
 #ifndef OTX2_PTP_H
 #define OTX2_PTP_H
index f4fd72e..1b967ea 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
  *
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef OTX2_REG_H
index 1f49b3c..4bbd12f 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
  *
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef OTX2_STRUCT_H
index 972b202..626961a 100644 (file)
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Physcial Function ethernet driver
+/* Marvell RVU Ethernet driver
  *
  * Copyright (C) 2021 Marvell.
+ *
  */
+
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/inetdevice.h>
@@ -52,6 +54,29 @@ struct otx2_tc_flow {
        bool                            is_act_police;
 };
 
+int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic)
+{
+       struct otx2_tc_info *tc = &nic->tc_info;
+
+       if (!nic->flow_cfg->max_flows || is_otx2_vf(nic->pcifunc))
+               return 0;
+
+       /* Max flows changed, free the existing bitmap */
+       kfree(tc->tc_entries_bitmap);
+
+       tc->tc_entries_bitmap =
+                       kcalloc(BITS_TO_LONGS(nic->flow_cfg->max_flows),
+                               sizeof(long), GFP_KERNEL);
+       if (!tc->tc_entries_bitmap) {
+               netdev_err(nic->netdev,
+                          "Unable to alloc TC flow entries bitmap\n");
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap);
+
 static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
                                      u32 *burst_mantissa)
 {
@@ -485,8 +510,8 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
                                   match.key->vlan_priority << 13;
 
                        vlan_tci_mask = match.mask->vlan_id |
-                                       match.key->vlan_dei << 12 |
-                                       match.key->vlan_priority << 13;
+                                       match.mask->vlan_dei << 12 |
+                                       match.mask->vlan_priority << 13;
 
                        flow_spec->vlan_tci = htons(vlan_tci);
                        flow_mask->vlan_tci = htons(vlan_tci_mask);
@@ -596,6 +621,7 @@ static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry)
 static int otx2_tc_del_flow(struct otx2_nic *nic,
                            struct flow_cls_offload *tc_flow_cmd)
 {
+       struct otx2_flow_config *flow_cfg = nic->flow_cfg;
        struct otx2_tc_info *tc_info = &nic->tc_info;
        struct otx2_tc_flow *flow_node;
        int err;
@@ -638,7 +664,7 @@ static int otx2_tc_del_flow(struct otx2_nic *nic,
        kfree_rcu(flow_node, rcu);
 
        clear_bit(flow_node->bitpos, tc_info->tc_entries_bitmap);
-       tc_info->num_entries--;
+       flow_cfg->nr_flows--;
 
        return 0;
 }
@@ -647,6 +673,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
                            struct flow_cls_offload *tc_flow_cmd)
 {
        struct netlink_ext_ack *extack = tc_flow_cmd->common.extack;
+       struct otx2_flow_config *flow_cfg = nic->flow_cfg;
        struct otx2_tc_info *tc_info = &nic->tc_info;
        struct otx2_tc_flow *new_node, *old_node;
        struct npc_install_flow_req *req, dummy;
@@ -655,9 +682,9 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
        if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT))
                return -ENOMEM;
 
-       if (bitmap_full(tc_info->tc_entries_bitmap, nic->flow_cfg->tc_max_flows)) {
+       if (bitmap_full(tc_info->tc_entries_bitmap, flow_cfg->max_flows)) {
                NL_SET_ERR_MSG_MOD(extack,
-                                  "Not enough MCAM space to add the flow");
+                                  "Free MCAM entry not available to add the flow");
                return -ENOMEM;
        }
 
@@ -695,10 +722,9 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
        memcpy(req, &dummy, sizeof(struct npc_install_flow_req));
 
        new_node->bitpos = find_first_zero_bit(tc_info->tc_entries_bitmap,
-                                              nic->flow_cfg->tc_max_flows);
+                                              flow_cfg->max_flows);
        req->channel = nic->hw.rx_chan_base;
-       req->entry = nic->flow_cfg->flow_ent[nic->flow_cfg->tc_flower_offset +
-                               nic->flow_cfg->tc_max_flows - new_node->bitpos];
+       req->entry = flow_cfg->flow_ent[flow_cfg->max_flows - new_node->bitpos - 1];
        req->intf = NIX_INTF_RX;
        req->set_cntr = 1;
        new_node->entry = req->entry;
@@ -723,7 +749,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
        }
 
        set_bit(new_node->bitpos, tc_info->tc_entries_bitmap);
-       tc_info->num_entries++;
+       flow_cfg->nr_flows++;
 
        return 0;
 
@@ -1008,10 +1034,21 @@ static const struct rhashtable_params tc_flow_ht_params = {
 int otx2_init_tc(struct otx2_nic *nic)
 {
        struct otx2_tc_info *tc = &nic->tc_info;
+       int err;
 
        /* Exclude receive queue 0 being used for police action */
        set_bit(0, &nic->rq_bmap);
 
+       if (!nic->flow_cfg) {
+               netdev_err(nic->netdev,
+                          "Can't init TC, nic->flow_cfg is not setup\n");
+               return -EINVAL;
+       }
+
+       err = otx2_tc_alloc_ent_bitmap(nic);
+       if (err)
+               return err;
+
        tc->flow_ht_params = tc_flow_ht_params;
        return rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
 }
@@ -1020,5 +1057,6 @@ void otx2_shutdown_tc(struct otx2_nic *nic)
 {
        struct otx2_tc_info *tc = &nic->tc_info;
 
+       kfree(tc->tc_entries_bitmap);
        rhashtable_destroy(&tc->flow_table);
 }
index 22ec03a..f42b1d4 100644 (file)
@@ -1,11 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
  *
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/etherdevice.h>
index 2f144e2..869de5f 100644 (file)
@@ -1,11 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Marvell OcteonTx2 RVU Ethernet driver
+/* Marvell RVU Ethernet driver
  *
- * Copyright (C) 2020 Marvell International Ltd.
+ * Copyright (C) 2020 Marvell.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef OTX2_TXRX_H
index a8bee5a..03b4ec6 100644 (file)
@@ -1,5 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Marvell OcteonTx2 RVU Virtual Function ethernet driver */
+/* Marvell RVU Virtual Function ethernet driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
 
 #include <linux/etherdevice.h>
 #include <linux/module.h>
@@ -464,6 +468,28 @@ static void otx2vf_reset_task(struct work_struct *work)
        rtnl_unlock();
 }
 
+static int otx2vf_set_features(struct net_device *netdev,
+                              netdev_features_t features)
+{
+       netdev_features_t changed = features ^ netdev->features;
+       bool ntuple_enabled = !!(features & NETIF_F_NTUPLE);
+       struct otx2_nic *vf = netdev_priv(netdev);
+
+       if (changed & NETIF_F_NTUPLE) {
+               if (!ntuple_enabled) {
+                       otx2_mcam_flow_del(vf);
+                       return 0;
+               }
+
+               if (!otx2_get_maxflows(vf->flow_cfg)) {
+                       netdev_err(netdev,
+                                  "Can't enable NTUPLE, MCAM entries not allocated\n");
+                       return -EINVAL;
+               }
+       }
+       return 0;
+}
+
 static const struct net_device_ops otx2vf_netdev_ops = {
        .ndo_open = otx2vf_open,
        .ndo_stop = otx2vf_stop,
@@ -471,6 +497,7 @@ static const struct net_device_ops otx2vf_netdev_ops = {
        .ndo_set_rx_mode = otx2vf_set_rx_mode,
        .ndo_set_mac_address = otx2_set_mac_address,
        .ndo_change_mtu = otx2vf_change_mtu,
+       .ndo_set_features = otx2vf_set_features,
        .ndo_get_stats64 = otx2_get_stats64,
        .ndo_tx_timeout = otx2_tx_timeout,
 };
@@ -627,12 +654,14 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                                NETIF_F_HW_VLAN_STAG_TX;
        netdev->features |= netdev->hw_features;
 
+       netdev->hw_features |= NETIF_F_NTUPLE;
+       netdev->hw_features |= NETIF_F_RXALL;
+
        netdev->gso_max_segs = OTX2_MAX_GSO_SEGS;
        netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
 
        netdev->netdev_ops = &otx2vf_netdev_ops;
 
-       /* MTU range: 68 - 9190 */
        netdev->min_mtu = OTX2_MIN_MTU;
        netdev->max_mtu = otx2_get_max_mtu(vf);
 
@@ -658,6 +687,14 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        otx2vf_set_ethtool_ops(netdev);
 
+       err = otx2vf_mcam_flow_init(vf);
+       if (err)
+               goto err_unreg_netdev;
+
+       err = otx2_register_dl(vf);
+       if (err)
+               goto err_unreg_netdev;
+
        /* Enable pause frames by default */
        vf->flags |= OTX2_FLAG_RX_PAUSE_ENABLED;
        vf->flags |= OTX2_FLAG_TX_PAUSE_ENABLED;
@@ -695,6 +732,7 @@ static void otx2vf_remove(struct pci_dev *pdev)
        vf = netdev_priv(netdev);
 
        cancel_work_sync(&vf->reset_task);
+       otx2_unregister_dl(vf);
        unregister_netdev(netdev);
        if (vf->otx2_wq)
                destroy_workqueue(vf->otx2_wq);
index fa7a068..68b442e 100644 (file)
@@ -390,11 +390,12 @@ static const struct devlink_ops prestera_dl_ops = {
        .trap_drop_counter_get = prestera_drop_counter_get,
 };
 
-struct prestera_switch *prestera_devlink_alloc(void)
+struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev)
 {
        struct devlink *dl;
 
-       dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch));
+       dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch),
+                          dev->dev);
 
        return devlink_priv(dl);
 }
@@ -411,7 +412,7 @@ int prestera_devlink_register(struct prestera_switch *sw)
        struct devlink *dl = priv_to_devlink(sw);
        int err;
 
-       err = devlink_register(dl, sw->dev->dev);
+       err = devlink_register(dl);
        if (err) {
                dev_err(prestera_dev(sw), "devlink_register failed: %d\n", err);
                return err;
index 5d73aa9..cc34c3d 100644 (file)
@@ -6,7 +6,7 @@
 
 #include "prestera.h"
 
-struct prestera_switch *prestera_devlink_alloc(void);
+struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev);
 void prestera_devlink_free(struct prestera_switch *sw);
 
 int prestera_devlink_register(struct prestera_switch *sw);
index 226f4ff..44c6708 100644 (file)
@@ -746,7 +746,8 @@ static int prestera_netdev_port_event(struct net_device *lower,
        case NETDEV_CHANGEUPPER:
                if (netif_is_bridge_master(upper)) {
                        if (info->linking)
-                               return prestera_bridge_port_join(upper, port);
+                               return prestera_bridge_port_join(upper, port,
+                                                                extack);
                        else
                                prestera_bridge_port_leave(upper, port);
                } else if (netif_is_lag_master(upper)) {
@@ -904,7 +905,7 @@ int prestera_device_register(struct prestera_device *dev)
        struct prestera_switch *sw;
        int err;
 
-       sw = prestera_devlink_alloc();
+       sw = prestera_devlink_alloc(dev);
        if (!sw)
                return -ENOMEM;
 
index 9a30916..3ce6ccd 100644 (file)
@@ -480,7 +480,8 @@ err_port_flood_set:
 }
 
 int prestera_bridge_port_join(struct net_device *br_dev,
-                             struct prestera_port *port)
+                             struct prestera_port *port,
+                             struct netlink_ext_ack *extack)
 {
        struct prestera_switchdev *swdev = port->sw->swdev;
        struct prestera_bridge_port *br_port;
@@ -500,6 +501,11 @@ int prestera_bridge_port_join(struct net_device *br_dev,
                goto err_brport_create;
        }
 
+       err = switchdev_bridge_port_offload(br_port->dev, port->dev, NULL,
+                                           NULL, NULL, false, extack);
+       if (err)
+               goto err_switchdev_offload;
+
        if (bridge->vlan_enabled)
                return 0;
 
@@ -510,6 +516,8 @@ int prestera_bridge_port_join(struct net_device *br_dev,
        return 0;
 
 err_port_join:
+       switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL);
+err_switchdev_offload:
        prestera_bridge_port_put(br_port);
 err_brport_create:
        prestera_bridge_put(bridge);
@@ -584,6 +592,8 @@ void prestera_bridge_port_leave(struct net_device *br_dev,
        else
                prestera_bridge_1d_port_leave(br_port);
 
+       switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL);
+
        prestera_hw_port_learning_set(port, false);
        prestera_hw_port_flood_set(port, BR_FLOOD | BR_MCAST_FLOOD, 0);
        prestera_port_vid_stp_set(port, PRESTERA_VID_ALL, BR_STATE_FORWARDING);
index a91bc35..0e93fda 100644 (file)
@@ -8,7 +8,8 @@ int prestera_switchdev_init(struct prestera_switch *sw);
 void prestera_switchdev_fini(struct prestera_switch *sw);
 
 int prestera_bridge_port_join(struct net_device *br_dev,
-                             struct prestera_port *port);
+                             struct prestera_port *port,
+                             struct netlink_ext_ack *extack);
 
 void prestera_bridge_port_leave(struct net_device *br_dev,
                                struct prestera_port *port);
index 9b48ae4..fab53c9 100644 (file)
@@ -1377,7 +1377,7 @@ static const struct net_device_ops pxa168_eth_netdev_ops = {
        .ndo_set_rx_mode        = pxa168_eth_set_rx_mode,
        .ndo_set_mac_address    = pxa168_eth_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = phy_do_ioctl,
+       .ndo_eth_ioctl          = phy_do_ioctl,
        .ndo_change_mtu         = pxa168_eth_change_mtu,
        .ndo_tx_timeout         = pxa168_eth_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index d4bb27b..051dd3f 100644 (file)
@@ -615,7 +615,9 @@ static inline u32 skge_usecs2clk(const struct skge_hw *hw, u32 usec)
 }
 
 static int skge_get_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *ecmd)
+                            struct ethtool_coalesce *ecmd,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct skge_port *skge = netdev_priv(dev);
        struct skge_hw *hw = skge->hw;
@@ -639,7 +641,9 @@ static int skge_get_coalesce(struct net_device *dev,
 
 /* Note: interrupt timer is per board, but can turn on/off per port */
 static int skge_set_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *ecmd)
+                            struct ethtool_coalesce *ecmd,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct skge_port *skge = netdev_priv(dev);
        struct skge_hw *hw = skge->hw;
@@ -3787,7 +3791,7 @@ static const struct net_device_ops skge_netdev_ops = {
        .ndo_open               = skge_up,
        .ndo_stop               = skge_down,
        .ndo_start_xmit         = skge_xmit_frame,
-       .ndo_do_ioctl           = skge_ioctl,
+       .ndo_eth_ioctl          = skge_ioctl,
        .ndo_get_stats          = skge_get_stats,
        .ndo_tx_timeout         = skge_tx_timeout,
        .ndo_change_mtu         = skge_change_mtu,
index 8b8bff5..e9fc74e 100644 (file)
@@ -4052,7 +4052,9 @@ static int sky2_set_pauseparam(struct net_device *dev,
 }
 
 static int sky2_get_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *ecmd)
+                            struct ethtool_coalesce *ecmd,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct sky2_port *sky2 = netdev_priv(dev);
        struct sky2_hw *hw = sky2->hw;
@@ -4087,7 +4089,9 @@ static int sky2_get_coalesce(struct net_device *dev,
 
 /* Note: this affect both ports */
 static int sky2_set_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *ecmd)
+                            struct ethtool_coalesce *ecmd,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct sky2_port *sky2 = netdev_priv(dev);
        struct sky2_hw *hw = sky2->hw;
@@ -4693,7 +4697,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = {
        .ndo_open               = sky2_open,
        .ndo_stop               = sky2_close,
        .ndo_start_xmit         = sky2_xmit_frame,
-       .ndo_do_ioctl           = sky2_ioctl,
+       .ndo_eth_ioctl          = sky2_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = sky2_set_mac_address,
        .ndo_set_rx_mode        = sky2_set_multicast,
@@ -4710,7 +4714,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = {
        .ndo_open               = sky2_open,
        .ndo_stop               = sky2_close,
        .ndo_start_xmit         = sky2_xmit_frame,
-       .ndo_do_ioctl           = sky2_ioctl,
+       .ndo_eth_ioctl          = sky2_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = sky2_set_mac_address,
        .ndo_set_rx_mode        = sky2_set_multicast,
@@ -4884,7 +4888,7 @@ static int sky2_test_msi(struct sky2_hw *hw)
 /* This driver supports yukon2 chipset only */
 static const char *sky2_name(u8 chipid, char *buf, int sz)
 {
-       const char *name[] = {
+       static const char *const name[] = {
                "XL",           /* 0xb3 */
                "EC Ultra",     /* 0xb4 */
                "Extreme",      /* 0xb5 */
index 64adfd2..398c23c 100644 (file)
@@ -2933,7 +2933,7 @@ static const struct net_device_ops mtk_netdev_ops = {
        .ndo_start_xmit         = mtk_start_xmit,
        .ndo_set_mac_address    = mtk_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = mtk_do_ioctl,
+       .ndo_eth_ioctl          = mtk_do_ioctl,
        .ndo_change_mtu         = mtk_change_mtu,
        .ndo_tx_timeout         = mtk_tx_timeout,
        .ndo_get_stats64        = mtk_get_stats64,
index 96d2891..1d5dd20 100644 (file)
@@ -1162,7 +1162,7 @@ static const struct net_device_ops mtk_star_netdev_ops = {
        .ndo_start_xmit         = mtk_star_netdev_start_xmit,
        .ndo_get_stats64        = mtk_star_netdev_get_stats64,
        .ndo_set_rx_mode        = mtk_star_set_rx_mode,
-       .ndo_do_ioctl           = mtk_star_netdev_ioctl,
+       .ndo_eth_ioctl          = mtk_star_netdev_ioctl,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 };
index 400e611..1b4b1f6 100644 (file)
@@ -6,8 +6,8 @@
 config MLX4_EN
        tristate "Mellanox Technologies 1/10/40Gbit Ethernet support"
        depends on PCI && NETDEVICES && ETHERNET && INET
+       depends on PTP_1588_CLOCK_OPTIONAL
        select MLX4_CORE
-       imply PTP_1588_CLOCK
        help
          This driver supports Mellanox Technologies ConnectX Ethernet
          devices.
index 3616b77..ef518b1 100644 (file)
@@ -998,7 +998,9 @@ mlx4_en_set_link_ksettings(struct net_device *dev,
 }
 
 static int mlx4_en_get_coalesce(struct net_device *dev,
-                             struct ethtool_coalesce *coal)
+                               struct ethtool_coalesce *coal,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
 
@@ -1020,7 +1022,9 @@ static int mlx4_en_get_coalesce(struct net_device *dev,
 }
 
 static int mlx4_en_set_coalesce(struct net_device *dev,
-                             struct ethtool_coalesce *coal)
+                               struct ethtool_coalesce *coal,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
 
index 5d0c9c6..a2f61a8 100644 (file)
@@ -2828,7 +2828,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
        .ndo_set_mac_address    = mlx4_en_set_mac,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_change_mtu         = mlx4_en_change_mtu,
-       .ndo_do_ioctl           = mlx4_en_ioctl,
+       .ndo_eth_ioctl          = mlx4_en_ioctl,
        .ndo_tx_timeout         = mlx4_en_tx_timeout,
        .ndo_vlan_rx_add_vid    = mlx4_en_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = mlx4_en_vlan_rx_kill_vid,
index 442991d..7f6d3b8 100644 (file)
@@ -991,7 +991,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
                 * expense of more costly truesize accounting
                 */
                priv->frag_info[0].frag_stride = PAGE_SIZE;
-               priv->dma_dir = PCI_DMA_BIDIRECTIONAL;
+               priv->dma_dir = DMA_BIDIRECTIONAL;
                priv->rx_headroom = XDP_PACKET_HEADROOM;
                i = 1;
        } else {
@@ -1021,7 +1021,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
                        buf_size += frag_size;
                        i++;
                }
-               priv->dma_dir = PCI_DMA_FROMDEVICE;
+               priv->dma_dir = DMA_FROM_DEVICE;
                priv->rx_headroom = 0;
        }
 
index 31b74bd..c56b9db 100644 (file)
@@ -297,12 +297,12 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
                        dma_unmap_single(priv->ddev,
                                         tx_info->map0_dma,
                                         tx_info->map0_byte_count,
-                                        PCI_DMA_TODEVICE);
+                                        DMA_TO_DEVICE);
                else
                        dma_unmap_page(priv->ddev,
                                       tx_info->map0_dma,
                                       tx_info->map0_byte_count,
-                                      PCI_DMA_TODEVICE);
+                                      DMA_TO_DEVICE);
                /* Optimize the common case when there are no wraparounds */
                if (likely((void *)tx_desc +
                           (tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
@@ -311,7 +311,7 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
                                dma_unmap_page(priv->ddev,
                                        (dma_addr_t)be64_to_cpu(data->addr),
                                        be32_to_cpu(data->byte_count),
-                                       PCI_DMA_TODEVICE);
+                                       DMA_TO_DEVICE);
                        }
                } else {
                        if ((void *)data >= end)
@@ -325,7 +325,7 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
                                dma_unmap_page(priv->ddev,
                                        (dma_addr_t)be64_to_cpu(data->addr),
                                        be32_to_cpu(data->byte_count),
-                                       PCI_DMA_TODEVICE);
+                                       DMA_TO_DEVICE);
                        }
                }
        }
@@ -831,7 +831,7 @@ static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv,
 
                dma = dma_map_single(ddev, skb->data +
                                     lso_header_size, byte_count,
-                                    PCI_DMA_TODEVICE);
+                                    DMA_TO_DEVICE);
                if (dma_mapping_error(ddev, dma))
                        goto tx_drop_unmap;
 
@@ -853,7 +853,7 @@ tx_drop_unmap:
                ++data;
                dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr),
                               be32_to_cpu(data->byte_count),
-                              PCI_DMA_TODEVICE);
+                              DMA_TO_DEVICE);
        }
 
        return false;
@@ -1170,7 +1170,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
        tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
 
        dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
-                                        length, PCI_DMA_TODEVICE);
+                                        length, DMA_TO_DEVICE);
 
        data->addr = cpu_to_be64(dma + frame->page_offset);
        dma_wmb();
index 28ac469..5a6b0fc 100644 (file)
@@ -3806,24 +3806,15 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 
        pci_set_master(pdev);
 
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
        if (err) {
                dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
                if (err) {
                        dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
                        goto err_release_regions;
                }
        }
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (err) {
-               dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-               if (err) {
-                       dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
-                       goto err_release_regions;
-               }
-       }
 
        /* Allow large DMA segments, up to the firmware limit of 1 GB */
        dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
@@ -4005,7 +3996,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 
        printk_once(KERN_INFO "%s", mlx4_version);
 
-       devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv));
+       devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv), &pdev->dev);
        if (!devlink)
                return -ENOMEM;
        priv = devlink_priv(devlink);
@@ -4024,7 +4015,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        mutex_init(&dev->persist->interface_state_mutex);
        mutex_init(&dev->persist->pci_status_mutex);
 
-       ret = devlink_register(devlink, &pdev->dev);
+       ret = devlink_register(devlink);
        if (ret)
                goto err_persist_free;
        ret = devlink_params_register(devlink, mlx4_devlink_params,
index 427e7a3..b149e60 100644 (file)
@@ -739,7 +739,7 @@ static void mlx4_cleanup_qp_zones(struct mlx4_dev *dev)
                int i;
 
                for (i = 0;
-                    i < sizeof(qp_table->zones_uids)/sizeof(qp_table->zones_uids[0]);
+                    i < ARRAY_SIZE(qp_table->zones_uids);
                     i++) {
                        struct mlx4_bitmap *bitmap =
                                mlx4_zone_get_bitmap(qp_table->zones,
@@ -917,7 +917,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 {
        int err;
        int i;
-       enum mlx4_qp_state states[] = {
+       static const enum mlx4_qp_state states[] = {
                MLX4_QP_STATE_RST,
                MLX4_QP_STATE_INIT,
                MLX4_QP_STATE_RTR,
index e1a5a79..9205645 100644 (file)
@@ -10,7 +10,7 @@ config MLX5_CORE
        select NET_DEVLINK
        depends on VXLAN || !VXLAN
        depends on MLXFW || !MLXFW
-       depends on PTP_1588_CLOCK || !PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK_OPTIONAL
        depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE
        help
          Core driver for low level functionality of the ConnectX-4 and
index b5072a3..63032cd 100644 (file)
@@ -15,14 +15,15 @@ mlx5_core-y :=      main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
                health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
                transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
                fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
-               lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
+               lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
                diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
                fw_reset.o qos.o
 
 #
 # Netdev basic
 #
-mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
+mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \
+               en/channels.o en_main.o en_common.o en_fs.o en_ethtool.o \
                en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
                en_selftest.o en/port.o en/monitor_stats.o en/health.o \
                en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
@@ -43,19 +44,22 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT)     += en_tc.o en/rep/tc.o en/rep/neigh.o \
                                        lib/fs_chains.o en/tc_tun.o \
                                        esw/indir_table.o en/tc_tun_encap.o \
                                        en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
-                                       en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o
+                                       en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \
+                                       en/tc/post_act.o
 mlx5_core-$(CONFIG_MLX5_TC_CT)      += en/tc_ct.o
+mlx5_core-$(CONFIG_MLX5_TC_SAMPLE)   += en/tc/sample.o
 
 #
 # Core extra
 #
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
-                                     ecpf.o rdma.o esw/legacy.o
+                                     ecpf.o rdma.o esw/legacy.o \
+                                     esw/devlink_port.o esw/vporttbl.o esw/qos.o
+
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
                                      esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
-                                     esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \
-                                     esw/devlink_port.o esw/vporttbl.o
-mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += esw/sample.o
+                                     esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
+
 mlx5_core-$(CONFIG_MLX5_BRIDGE)    += esw/bridge.o en/rep/bridge.o
 
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
index 9d79c5e..db5dfff 100644 (file)
@@ -877,7 +877,7 @@ static void cb_timeout_handler(struct work_struct *work)
        ent->ret = -ETIMEDOUT;
        mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n",
                       ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
-       mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+       mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
 
 out:
        cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */
@@ -994,7 +994,7 @@ static void cmd_work_handler(struct work_struct *work)
                MLX5_SET(mbox_out, ent->out, status, status);
                MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
 
-               mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+               mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
                return;
        }
 
@@ -1008,7 +1008,7 @@ static void cmd_work_handler(struct work_struct *work)
                poll_timeout(ent);
                /* make sure we read the descriptor after ownership is SW */
                rmb();
-               mlx5_cmd_comp_handler(dev, 1UL << ent->idx, (ent->ret == -ETIMEDOUT));
+               mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT));
        }
 }
 
@@ -1068,7 +1068,7 @@ static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev,
                       mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
 
        ent->ret = -ETIMEDOUT;
-       mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+       mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true);
 }
 
 static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
index 360e093..cf97985 100644 (file)
@@ -89,7 +89,8 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
                        u32 *in, int inlen, u32 *out, int outlen)
 {
-       int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+       int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
+                          c_eqn_or_apu_element);
        u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
        struct mlx5_eq_comp *eq;
        int err;
index 20bb372..e8093c4 100644 (file)
@@ -53,7 +53,7 @@ static bool is_eth_rep_supported(struct mlx5_core_dev *dev)
        return true;
 }
 
-static bool is_eth_supported(struct mlx5_core_dev *dev)
+bool mlx5_eth_supported(struct mlx5_core_dev *dev)
 {
        if (!IS_ENABLED(CONFIG_MLX5_CORE_EN))
                return false;
@@ -105,7 +105,18 @@ static bool is_eth_supported(struct mlx5_core_dev *dev)
        return true;
 }
 
-static bool is_vnet_supported(struct mlx5_core_dev *dev)
+static bool is_eth_enabled(struct mlx5_core_dev *dev)
+{
+       union devlink_param_value val;
+       int err;
+
+       err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+                                                DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+                                                &val);
+       return err ? false : val.vbool;
+}
+
+bool mlx5_vnet_supported(struct mlx5_core_dev *dev)
 {
        if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET))
                return false;
@@ -127,6 +138,17 @@ static bool is_vnet_supported(struct mlx5_core_dev *dev)
        return true;
 }
 
+static bool is_vnet_enabled(struct mlx5_core_dev *dev)
+{
+       union devlink_param_value val;
+       int err;
+
+       err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+                                                DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+                                                &val);
+       return err ? false : val.vbool;
+}
+
 static bool is_ib_rep_supported(struct mlx5_core_dev *dev)
 {
        if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
@@ -170,7 +192,7 @@ static bool is_mp_supported(struct mlx5_core_dev *dev)
        return true;
 }
 
-static bool is_ib_supported(struct mlx5_core_dev *dev)
+bool mlx5_rdma_supported(struct mlx5_core_dev *dev)
 {
        if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND))
                return false;
@@ -187,6 +209,17 @@ static bool is_ib_supported(struct mlx5_core_dev *dev)
        return true;
 }
 
+static bool is_ib_enabled(struct mlx5_core_dev *dev)
+{
+       union devlink_param_value val;
+       int err;
+
+       err = devlink_param_driverinit_value_get(priv_to_devlink(dev),
+                                                DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+                                                &val);
+       return err ? false : val.vbool;
+}
+
 enum {
        MLX5_INTERFACE_PROTOCOL_ETH,
        MLX5_INTERFACE_PROTOCOL_ETH_REP,
@@ -201,13 +234,17 @@ enum {
 static const struct mlx5_adev_device {
        const char *suffix;
        bool (*is_supported)(struct mlx5_core_dev *dev);
+       bool (*is_enabled)(struct mlx5_core_dev *dev);
 } mlx5_adev_devices[] = {
        [MLX5_INTERFACE_PROTOCOL_VNET] = { .suffix = "vnet",
-                                          .is_supported = &is_vnet_supported },
+                                          .is_supported = &mlx5_vnet_supported,
+                                          .is_enabled = &is_vnet_enabled },
        [MLX5_INTERFACE_PROTOCOL_IB] = { .suffix = "rdma",
-                                        .is_supported = &is_ib_supported },
+                                        .is_supported = &mlx5_rdma_supported,
+                                        .is_enabled = &is_ib_enabled },
        [MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth",
-                                         .is_supported = &is_eth_supported },
+                                         .is_supported = &mlx5_eth_supported,
+                                         .is_enabled = &is_eth_enabled },
        [MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep",
                                           .is_supported = &is_eth_rep_supported },
        [MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep",
@@ -308,6 +345,14 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
                if (!priv->adev[i]) {
                        bool is_supported = false;
 
+                       if (mlx5_adev_devices[i].is_enabled) {
+                               bool enabled;
+
+                               enabled = mlx5_adev_devices[i].is_enabled(dev);
+                               if (!enabled)
+                                       continue;
+                       }
+
                        if (mlx5_adev_devices[i].is_supported)
                                is_supported = mlx5_adev_devices[i].is_supported(dev);
 
@@ -360,6 +405,14 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
                if (!priv->adev[i])
                        continue;
 
+               if (mlx5_adev_devices[i].is_enabled) {
+                       bool enabled;
+
+                       enabled = mlx5_adev_devices[i].is_enabled(dev);
+                       if (!enabled)
+                               goto skip_suspend;
+               }
+
                adev = &priv->adev[i]->adev;
                /* Auxiliary driver was unbind manually through sysfs */
                if (!adev->dev.driver)
@@ -447,12 +500,21 @@ static void delete_drivers(struct mlx5_core_dev *dev)
                if (!priv->adev[i])
                        continue;
 
+               if (mlx5_adev_devices[i].is_enabled) {
+                       bool enabled;
+
+                       enabled = mlx5_adev_devices[i].is_enabled(dev);
+                       if (!enabled)
+                               goto del_adev;
+               }
+
                if (mlx5_adev_devices[i].is_supported && !delete_all)
                        is_supported = mlx5_adev_devices[i].is_supported(dev);
 
                if (is_supported)
                        continue;
 
+del_adev:
                del_adev(&priv->adev[i]->adev);
                priv->adev[i] = NULL;
        }
index d791d35..e84287f 100644 (file)
@@ -7,6 +7,7 @@
 #include "fw_reset.h"
 #include "fs_core.h"
 #include "eswitch.h"
+#include "esw/qos.h"
 #include "sf/dev/dev.h"
 #include "sf/sf.h"
 
@@ -292,6 +293,13 @@ static const struct devlink_ops mlx5_devlink_ops = {
        .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
        .port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get,
        .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set,
+       .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set,
+       .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set,
+       .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set,
+       .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set,
+       .rate_node_new = mlx5_esw_devlink_rate_node_new,
+       .rate_node_del = mlx5_esw_devlink_rate_node_del,
+       .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
 #endif
 #ifdef CONFIG_MLX5_SF_MANAGER
        .port_new = mlx5_devlink_sf_port_new,
@@ -359,9 +367,10 @@ int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
        return 0;
 }
 
-struct devlink *mlx5_devlink_alloc(void)
+struct devlink *mlx5_devlink_alloc(struct device *dev)
 {
-       return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev));
+       return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev),
+                            dev);
 }
 
 void mlx5_devlink_free(struct devlink *devlink)
@@ -595,6 +604,157 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
 #endif
 }
 
+static const struct devlink_param enable_eth_param =
+       DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+                             NULL, NULL, NULL);
+
+static int mlx5_devlink_eth_param_register(struct devlink *devlink)
+{
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       union devlink_param_value value;
+       int err;
+
+       if (!mlx5_eth_supported(dev))
+               return 0;
+
+       err = devlink_param_register(devlink, &enable_eth_param);
+       if (err)
+               return err;
+
+       value.vbool = true;
+       devlink_param_driverinit_value_set(devlink,
+                                          DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+                                          value);
+       devlink_param_publish(devlink, &enable_eth_param);
+       return 0;
+}
+
+static void mlx5_devlink_eth_param_unregister(struct devlink *devlink)
+{
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+       if (!mlx5_eth_supported(dev))
+               return;
+
+       devlink_param_unpublish(devlink, &enable_eth_param);
+       devlink_param_unregister(devlink, &enable_eth_param);
+}
+
+static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id,
+                                            union devlink_param_value val,
+                                            struct netlink_ext_ack *extack)
+{
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       bool new_state = val.vbool;
+
+       if (new_state && !mlx5_rdma_supported(dev))
+               return -EOPNOTSUPP;
+       return 0;
+}
+
+static const struct devlink_param enable_rdma_param =
+       DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+                             NULL, NULL, mlx5_devlink_enable_rdma_validate);
+
+static int mlx5_devlink_rdma_param_register(struct devlink *devlink)
+{
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       union devlink_param_value value;
+       int err;
+
+       if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND) || MLX5_ESWITCH_MANAGER(dev))
+               return 0;
+
+       err = devlink_param_register(devlink, &enable_rdma_param);
+       if (err)
+               return err;
+
+       value.vbool = true;
+       devlink_param_driverinit_value_set(devlink,
+                                          DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+                                          value);
+       devlink_param_publish(devlink, &enable_rdma_param);
+       return 0;
+}
+
+static void mlx5_devlink_rdma_param_unregister(struct devlink *devlink)
+{
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+       if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND) || MLX5_ESWITCH_MANAGER(dev))
+               return;
+
+       devlink_param_unpublish(devlink, &enable_rdma_param);
+       devlink_param_unregister(devlink, &enable_rdma_param);
+}
+
+static const struct devlink_param enable_vnet_param =
+       DEVLINK_PARAM_GENERIC(ENABLE_VNET, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+                             NULL, NULL, NULL);
+
+static int mlx5_devlink_vnet_param_register(struct devlink *devlink)
+{
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       union devlink_param_value value;
+       int err;
+
+       if (!mlx5_vnet_supported(dev))
+               return 0;
+
+       err = devlink_param_register(devlink, &enable_vnet_param);
+       if (err)
+               return err;
+
+       value.vbool = true;
+       devlink_param_driverinit_value_set(devlink,
+                                          DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+                                          value);
+       devlink_param_publish(devlink, &enable_rdma_param);
+       return 0;
+}
+
+static void mlx5_devlink_vnet_param_unregister(struct devlink *devlink)
+{
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+       if (!mlx5_vnet_supported(dev))
+               return;
+
+       devlink_param_unpublish(devlink, &enable_vnet_param);
+       devlink_param_unregister(devlink, &enable_vnet_param);
+}
+
+static int mlx5_devlink_auxdev_params_register(struct devlink *devlink)
+{
+       int err;
+
+       err = mlx5_devlink_eth_param_register(devlink);
+       if (err)
+               return err;
+
+       err = mlx5_devlink_rdma_param_register(devlink);
+       if (err)
+               goto rdma_err;
+
+       err = mlx5_devlink_vnet_param_register(devlink);
+       if (err)
+               goto vnet_err;
+       return 0;
+
+vnet_err:
+       mlx5_devlink_rdma_param_unregister(devlink);
+rdma_err:
+       mlx5_devlink_eth_param_unregister(devlink);
+       return err;
+}
+
+static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink)
+{
+       mlx5_devlink_vnet_param_unregister(devlink);
+       mlx5_devlink_rdma_param_unregister(devlink);
+       mlx5_devlink_eth_param_unregister(devlink);
+}
+
 #define MLX5_TRAP_DROP(_id, _group_id)                                 \
        DEVLINK_TRAP_GENERIC(DROP, DROP, _id,                           \
                             DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \
@@ -638,11 +798,11 @@ static void mlx5_devlink_traps_unregister(struct devlink *devlink)
                                       ARRAY_SIZE(mlx5_trap_groups_arr));
 }
 
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
+int mlx5_devlink_register(struct devlink *devlink)
 {
        int err;
 
-       err = devlink_register(devlink, dev);
+       err = devlink_register(devlink);
        if (err)
                return err;
 
@@ -653,6 +813,10 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
        mlx5_devlink_set_params_init_values(devlink);
        devlink_params_publish(devlink);
 
+       err = mlx5_devlink_auxdev_params_register(devlink);
+       if (err)
+               goto auxdev_reg_err;
+
        err = mlx5_devlink_traps_register(devlink);
        if (err)
                goto traps_reg_err;
@@ -660,6 +824,8 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
        return 0;
 
 traps_reg_err:
+       mlx5_devlink_auxdev_params_unregister(devlink);
+auxdev_reg_err:
        devlink_params_unregister(devlink, mlx5_devlink_params,
                                  ARRAY_SIZE(mlx5_devlink_params));
 params_reg_err:
@@ -670,6 +836,8 @@ params_reg_err:
 void mlx5_devlink_unregister(struct devlink *devlink)
 {
        mlx5_devlink_traps_unregister(devlink);
+       mlx5_devlink_auxdev_params_unregister(devlink);
+       devlink_params_unpublish(devlink);
        devlink_params_unregister(devlink, mlx5_devlink_params,
                                  ARRAY_SIZE(mlx5_devlink_params));
        devlink_unregister(devlink);
index 7318d44..30bf488 100644 (file)
@@ -31,9 +31,9 @@ int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev);
 int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
                                  enum devlink_trap_action *action);
 
-struct devlink *mlx5_devlink_alloc(void);
+struct devlink *mlx5_devlink_alloc(struct device *dev);
 void mlx5_devlink_free(struct devlink *devlink);
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev);
+int mlx5_devlink_register(struct devlink *devlink);
 void mlx5_devlink_unregister(struct devlink *devlink);
 
 #endif /* __MLX5_DEVLINK_H__ */
index b1b51bb..669a75f 100644 (file)
@@ -58,6 +58,7 @@
 #include "en/qos.h"
 #include "lib/hv_vhca.h"
 #include "lib/clock.h"
+#include "en/rx_res.h"
 
 extern const struct net_device_ops mlx5e_netdev_ops;
 struct page_pool;
@@ -65,14 +66,13 @@ struct page_pool;
 #define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
 #define MLX5E_METADATA_ETHER_LEN 8
 
-#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
-
 #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
 
 #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
 #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
 
 #define MLX5E_MAX_NUM_TC       8
+#define MLX5E_MAX_NUM_MQPRIO_CH_TC TC_QOPT_MAX_QUEUE
 
 #define MLX5_RX_HEADROOM NET_SKB_PAD
 #define MLX5_SKB_FRAG_SZ(len)  (SKB_DATA_ALIGN(len) +  \
@@ -126,7 +126,6 @@ struct page_pool;
 
 #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW            0x2
 
-#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
 #define MLX5E_DEFAULT_LRO_TIMEOUT                       32
 #define MLX5E_LRO_TIMEOUT_ARR_SIZE                      4
 
@@ -139,8 +138,6 @@ struct page_pool;
 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES                0x80
 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW            0x2
 
-#define MLX5E_LOG_INDIR_RQT_SIZE       0x8
-#define MLX5E_INDIR_RQT_SIZE           BIT(MLX5E_LOG_INDIR_RQT_SIZE)
 #define MLX5E_MIN_NUM_CHANNELS         0x1
 #define MLX5E_MAX_NUM_CHANNELS         (MLX5E_INDIR_RQT_SIZE / 2)
 #define MLX5E_MAX_NUM_SQS              (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
@@ -252,7 +249,10 @@ struct mlx5e_params {
        u8  rq_wq_type;
        u8  log_rq_mtu_frames;
        u16 num_channels;
-       u8  num_tc;
+       struct {
+               u16 mode;
+               u8 num_tc;
+       } mqprio;
        bool rx_cqe_compress_def;
        bool tunneled_offload_en;
        struct dim_cq_moder rx_cq_moderation;
@@ -272,6 +272,12 @@ struct mlx5e_params {
        bool ptp_rx;
 };
 
+static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params)
+{
+       return params->mqprio.mode == TC_MQPRIO_MODE_DCB ?
+               params->mqprio.num_tc : 1;
+}
+
 enum {
        MLX5E_RQ_STATE_ENABLED,
        MLX5E_RQ_STATE_RECOVERING,
@@ -745,29 +751,11 @@ enum {
        MLX5E_STATE_XDP_ACTIVE,
 };
 
-struct mlx5e_rqt {
-       u32              rqtn;
-       bool             enabled;
-};
-
-struct mlx5e_tir {
-       u32               tirn;
-       struct mlx5e_rqt  rqt;
-       struct list_head  list;
-};
-
 enum {
        MLX5E_TC_PRIO = 0,
        MLX5E_NIC_PRIO
 };
 
-struct mlx5e_rss_params {
-       u32     indirection_rqt[MLX5E_INDIR_RQT_SIZE];
-       u32     rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
-       u8      toeplitz_hash_key[40];
-       u8      hfunc;
-};
-
 struct mlx5e_modify_sq_param {
        int curr_state;
        int next_state;
@@ -837,13 +825,7 @@ struct mlx5e_priv {
 
        struct mlx5e_channels      channels;
        u32                        tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC];
-       struct mlx5e_rqt           indir_rqt;
-       struct mlx5e_tir           indir_tir[MLX5E_NUM_INDIR_TIRS];
-       struct mlx5e_tir           inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
-       struct mlx5e_tir           direct_tir[MLX5E_MAX_NUM_CHANNELS];
-       struct mlx5e_tir           xsk_tir[MLX5E_MAX_NUM_CHANNELS];
-       struct mlx5e_tir           ptp_tir;
-       struct mlx5e_rss_params    rss_params;
+       struct mlx5e_rx_res       *rx_res;
        u32                        tx_rates[MLX5E_MAX_NUM_SQS];
 
        struct mlx5e_flow_steering fs;
@@ -948,25 +930,6 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
                           u16 vid);
 void mlx5e_timestamp_init(struct mlx5e_priv *priv);
 
-struct mlx5e_redirect_rqt_param {
-       bool is_rss;
-       union {
-               u32 rqn; /* Direct RQN (Non-RSS) */
-               struct {
-                       u8 hfunc;
-                       struct mlx5e_channels *channels;
-               } rss; /* RSS data */
-       };
-};
-
-int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
-                      struct mlx5e_redirect_rqt_param rrp);
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
-                                   const struct mlx5e_tirc_config *ttconfig,
-                                   void *tirc, bool inner);
-void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in);
-struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
-
 struct mlx5e_xsk_param;
 
 struct mlx5e_rq_param;
@@ -1028,9 +991,6 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
 int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
 
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
-                                  int num_channels);
-
 int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
 void mlx5e_activate_rq(struct mlx5e_rq *rq);
 void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
@@ -1065,10 +1025,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
 
 extern const struct ethtool_ops mlx5e_ethtool_ops;
 
-int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir,
-                    u32 *in);
-void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
-                      struct mlx5e_tir *tir);
 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
 int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
@@ -1084,17 +1040,6 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
 int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node);
 void mlx5e_free_di_list(struct mlx5e_rq *rq);
 
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
-
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
-void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
-
 int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
 void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
 
@@ -1106,7 +1051,6 @@ int mlx5e_close(struct net_device *netdev);
 int mlx5e_open(struct net_device *netdev);
 
 void mlx5e_queue_update_stats(struct mlx5e_priv *priv);
-int mlx5e_bits_invert(unsigned long a, int size);
 
 int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv);
 int mlx5e_set_dev_port_mtu_ctx(struct mlx5e_priv *priv, void *context);
@@ -1183,8 +1127,6 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
 void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
-                           u16 num_channels);
 void mlx5e_rx_dim_work(struct work_struct *work);
 void mlx5e_tx_dim_work(struct work_struct *work);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
new file mode 100644 (file)
index 0000000..e7c14c0
--- /dev/null
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "channels.h"
+#include "en.h"
+#include "en/ptp.h"
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
+{
+       return chs->num;
+}
+
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+       struct mlx5e_channel *c;
+
+       WARN_ON(ix >= mlx5e_channels_get_num(chs));
+       c = chs->c[ix];
+
+       *rqn = c->rq.rqn;
+}
+
+bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+       struct mlx5e_channel *c;
+
+       WARN_ON(ix >= mlx5e_channels_get_num(chs));
+       c = chs->c[ix];
+
+       if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+               return false;
+
+       *rqn = c->xskrq.rqn;
+       return true;
+}
+
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
+{
+       struct mlx5e_ptp *c = chs->ptp;
+
+       if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+               return false;
+
+       *rqn = c->rq.rqn;
+       return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
new file mode 100644 (file)
index 0000000..ca00cbc
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_CHANNELS_H__
+#define __MLX5_EN_CHANNELS_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_channels;
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
+
+#endif /* __MLX5_EN_CHANNELS_H__ */
index bc33eaa..86e0793 100644 (file)
@@ -55,19 +55,15 @@ void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv)
 {
        struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
 
-       if (dl_port->registered)
-               devlink_port_unregister(dl_port);
+       devlink_port_unregister(dl_port);
 }
 
 struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
-       struct devlink_port *port;
 
        if (!netif_device_present(dev))
                return NULL;
-       port = mlx5e_devlink_get_dl_port(priv);
-       if (port->registered)
-               return port;
-       return NULL;
+
+       return mlx5e_devlink_get_dl_port(priv);
 }
index 1d5ce07..41684a6 100644 (file)
@@ -5,6 +5,9 @@
 #define __MLX5E_FLOW_STEER_H__
 
 #include "mod_hdr.h"
+#include "lib/fs_ttc.h"
+
+struct mlx5e_post_act;
 
 enum {
        MLX5E_TC_FT_LEVEL = 0,
@@ -18,6 +21,7 @@ struct mlx5e_tc_table {
        struct mutex                    t_lock;
        struct mlx5_flow_table          *t;
        struct mlx5_fs_chains           *chains;
+       struct mlx5e_post_act           *post_act;
 
        struct rhashtable               ht;
 
@@ -67,27 +71,7 @@ struct mlx5e_l2_table {
        bool                       promisc_enabled;
 };
 
-enum mlx5e_traffic_types {
-       MLX5E_TT_IPV4_TCP,
-       MLX5E_TT_IPV6_TCP,
-       MLX5E_TT_IPV4_UDP,
-       MLX5E_TT_IPV6_UDP,
-       MLX5E_TT_IPV4_IPSEC_AH,
-       MLX5E_TT_IPV6_IPSEC_AH,
-       MLX5E_TT_IPV4_IPSEC_ESP,
-       MLX5E_TT_IPV6_IPSEC_ESP,
-       MLX5E_TT_IPV4,
-       MLX5E_TT_IPV6,
-       MLX5E_TT_ANY,
-       MLX5E_NUM_TT,
-       MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
-};
-
-struct mlx5e_tirc_config {
-       u8 l3_prot_type;
-       u8 l4_prot_type;
-       u32 rx_hash_fields;
-};
+#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1)
 
 #define MLX5_HASH_IP           (MLX5_HASH_FIELD_SEL_SRC_IP   |\
                                 MLX5_HASH_FIELD_SEL_DST_IP)
@@ -99,30 +83,6 @@ struct mlx5e_tirc_config {
                                 MLX5_HASH_FIELD_SEL_DST_IP   |\
                                 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
 
-enum mlx5e_tunnel_types {
-       MLX5E_TT_IPV4_GRE,
-       MLX5E_TT_IPV6_GRE,
-       MLX5E_TT_IPV4_IPIP,
-       MLX5E_TT_IPV6_IPIP,
-       MLX5E_TT_IPV4_IPV6,
-       MLX5E_TT_IPV6_IPV6,
-       MLX5E_NUM_TUNNEL_TT,
-};
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
-
-struct mlx5e_ttc_rule {
-       struct mlx5_flow_handle *rule;
-       struct mlx5_flow_destination default_dest;
-};
-
-/* L3/L4 traffic type classifier */
-struct mlx5e_ttc_table {
-       struct mlx5e_flow_table ft;
-       struct mlx5e_ttc_rule rules[MLX5E_NUM_TT];
-       struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT];
-};
-
 /* NIC prio FTS */
 enum {
        MLX5E_PROMISC_FT_LEVEL,
@@ -144,21 +104,7 @@ enum {
 #endif
 };
 
-#define MLX5E_TTC_NUM_GROUPS   3
-#define MLX5E_TTC_GROUP1_SIZE  (BIT(3) + MLX5E_NUM_TUNNEL_TT)
-#define MLX5E_TTC_GROUP2_SIZE   BIT(1)
-#define MLX5E_TTC_GROUP3_SIZE   BIT(0)
-#define MLX5E_TTC_TABLE_SIZE   (MLX5E_TTC_GROUP1_SIZE +\
-                                MLX5E_TTC_GROUP2_SIZE +\
-                                MLX5E_TTC_GROUP3_SIZE)
-
-#define MLX5E_INNER_TTC_NUM_GROUPS     3
-#define MLX5E_INNER_TTC_GROUP1_SIZE    BIT(3)
-#define MLX5E_INNER_TTC_GROUP2_SIZE    BIT(1)
-#define MLX5E_INNER_TTC_GROUP3_SIZE    BIT(0)
-#define MLX5E_INNER_TTC_TABLE_SIZE     (MLX5E_INNER_TTC_GROUP1_SIZE +\
-                                        MLX5E_INNER_TTC_GROUP2_SIZE +\
-                                        MLX5E_INNER_TTC_GROUP3_SIZE)
+struct mlx5e_priv;
 
 #ifdef CONFIG_MLX5_EN_RXNFC
 
@@ -226,8 +172,8 @@ struct mlx5e_flow_steering {
        struct mlx5e_promisc_table      promisc;
        struct mlx5e_vlan_table         *vlan;
        struct mlx5e_l2_table           l2;
-       struct mlx5e_ttc_table          ttc;
-       struct mlx5e_ttc_table          inner_ttc;
+       struct mlx5_ttc_table           *ttc;
+       struct mlx5_ttc_table           *inner_ttc;
 #ifdef CONFIG_MLX5_EN_ARFS
        struct mlx5e_arfs_tables       *arfs;
 #endif
@@ -239,33 +185,13 @@ struct mlx5e_flow_steering {
        struct mlx5e_ptp_fs            *ptp_fs;
 };
 
-struct ttc_params {
-       struct mlx5_flow_table_attr ft_attr;
-       u32 any_tt_tirn;
-       u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
-       struct mlx5e_ttc_table *inner_ttc;
-};
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params);
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params);
-void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params);
-
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
-                          struct mlx5e_ttc_table *ttc);
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
-                            struct mlx5e_ttc_table *ttc);
+void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+                         struct ttc_params *ttc_params, bool tunnel);
 
-int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
-                                struct mlx5e_ttc_table *ttc);
-void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
-                                  struct mlx5e_ttc_table *ttc);
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv);
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv);
 
 void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
-int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type,
-                      struct mlx5_flow_destination *new_dest);
-struct mlx5_flow_destination
-mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type);
-int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type);
 
 void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
 void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
@@ -273,7 +199,6 @@ void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
 int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
 
-u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt);
 int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int  trap_id, int tir_num);
 void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv);
 int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int  trap_id, int tir_num);
index 909faa6..7aa25a5 100644 (file)
@@ -33,22 +33,22 @@ static char *fs_udp_type2str(enum fs_udp_type i)
        }
 }
 
-static enum mlx5e_traffic_types fs_udp2tt(enum fs_udp_type i)
+static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i)
 {
        switch (i) {
        case FS_IPV4_UDP:
-               return MLX5E_TT_IPV4_UDP;
+               return MLX5_TT_IPV4_UDP;
        default: /* FS_IPV6_UDP */
-               return MLX5E_TT_IPV6_UDP;
+               return MLX5_TT_IPV6_UDP;
        }
 }
 
-static enum fs_udp_type tt2fs_udp(enum mlx5e_traffic_types i)
+static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i)
 {
        switch (i) {
-       case MLX5E_TT_IPV4_UDP:
+       case MLX5_TT_IPV4_UDP:
                return FS_IPV4_UDP;
-       case MLX5E_TT_IPV6_UDP:
+       case MLX5_TT_IPV6_UDP:
                return FS_IPV6_UDP;
        default:
                return FS_UDP_NUM_TYPES;
@@ -75,7 +75,7 @@ static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type
 
 struct mlx5_flow_handle *
 mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
-                                 enum mlx5e_traffic_types ttc_type,
+                                 enum mlx5_traffic_types ttc_type,
                                  u32 tir_num, u16 d_port)
 {
        enum fs_udp_type type = tt2fs_udp(ttc_type);
@@ -124,7 +124,7 @@ static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type typ
        fs_udp = priv->fs.udp;
        fs_udp_t = &fs_udp->tables[type];
 
-       dest = mlx5e_ttc_get_default_dest(priv, fs_udp2tt(type));
+       dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_udp2tt(type));
        rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -259,7 +259,7 @@ static int fs_udp_disable(struct mlx5e_priv *priv)
 
        for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
                /* Modify ttc rules destination to point back to the indir TIRs */
-               err = mlx5e_ttc_fwd_default_dest(priv, fs_udp2tt(i));
+               err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_udp2tt(i));
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -281,7 +281,7 @@ static int fs_udp_enable(struct mlx5e_priv *priv)
                dest.ft = priv->fs.udp->tables[i].t;
 
                /* Modify ttc rules destination to point on the accel_fs FTs */
-               err = mlx5e_ttc_fwd_dest(priv, fs_udp2tt(i), &dest);
+               err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_udp2tt(i), &dest);
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
@@ -401,7 +401,7 @@ static int fs_any_add_default_rule(struct mlx5e_priv *priv)
        fs_any = priv->fs.any;
        fs_any_t = &fs_any->table;
 
-       dest = mlx5e_ttc_get_default_dest(priv, MLX5E_TT_ANY);
+       dest = mlx5_ttc_get_default_dest(priv->fs.ttc, MLX5_TT_ANY);
        rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -514,11 +514,11 @@ static int fs_any_disable(struct mlx5e_priv *priv)
        int err;
 
        /* Modify ttc rules destination to point back to the indir TIRs */
-       err = mlx5e_ttc_fwd_default_dest(priv, MLX5E_TT_ANY);
+       err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, MLX5_TT_ANY);
        if (err) {
                netdev_err(priv->netdev,
                           "%s: modify ttc[%d] default destination failed, err(%d)\n",
-                          __func__, MLX5E_TT_ANY, err);
+                          __func__, MLX5_TT_ANY, err);
                return err;
        }
        return 0;
@@ -533,11 +533,11 @@ static int fs_any_enable(struct mlx5e_priv *priv)
        dest.ft = priv->fs.any->table.t;
 
        /* Modify ttc rules destination to point on the accel_fs FTs */
-       err = mlx5e_ttc_fwd_dest(priv, MLX5E_TT_ANY, &dest);
+       err = mlx5_ttc_fwd_dest(priv->fs.ttc, MLX5_TT_ANY, &dest);
        if (err) {
                netdev_err(priv->netdev,
                           "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
-                          __func__, MLX5E_TT_ANY, err);
+                          __func__, MLX5_TT_ANY, err);
                return err;
        }
        return 0;
index 8385df2..7a70c4f 100644 (file)
@@ -12,7 +12,7 @@ void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule);
 /* UDP traffic type redirect */
 struct mlx5_flow_handle *
 mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
-                                 enum mlx5e_traffic_types ttc_type,
+                                 enum mlx5_traffic_types ttc_type,
                                  u32 tir_num, u16 d_port);
 void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv);
 int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv);
index ea321e5..4e72ca8 100644 (file)
@@ -5,11 +5,15 @@
 #include <linux/slab.h>
 #include <linux/xarray.h>
 #include <linux/hashtable.h>
+#include <linux/refcount.h>
 
 #include "mapping.h"
 
 #define MAPPING_GRACE_PERIOD 2000
 
+static LIST_HEAD(shared_ctx_list);
+static DEFINE_MUTEX(shared_ctx_lock);
+
 struct mapping_ctx {
        struct xarray xarray;
        DECLARE_HASHTABLE(ht, 8);
@@ -20,6 +24,10 @@ struct mapping_ctx {
        struct delayed_work dwork;
        struct list_head pending_list;
        spinlock_t pending_list_lock; /* Guards pending list */
+       u64 id;
+       u8 type;
+       struct list_head list;
+       refcount_t refcount;
 };
 
 struct mapping_item {
@@ -205,11 +213,48 @@ mapping_create(size_t data_size, u32 max_id, bool delayed_removal)
        mutex_init(&ctx->lock);
        xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1);
 
+       refcount_set(&ctx->refcount, 1);
+       INIT_LIST_HEAD(&ctx->list);
+
+       return ctx;
+}
+
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal)
+{
+       struct mapping_ctx *ctx;
+
+       mutex_lock(&shared_ctx_lock);
+       list_for_each_entry(ctx, &shared_ctx_list, list) {
+               if (ctx->id == id && ctx->type == type) {
+                       if (refcount_inc_not_zero(&ctx->refcount))
+                               goto unlock;
+                       break;
+               }
+       }
+
+       ctx = mapping_create(data_size, max_id, delayed_removal);
+       if (IS_ERR(ctx))
+               goto unlock;
+
+       ctx->id = id;
+       ctx->type = type;
+       list_add(&ctx->list, &shared_ctx_list);
+
+unlock:
+       mutex_unlock(&shared_ctx_lock);
        return ctx;
 }
 
 void mapping_destroy(struct mapping_ctx *ctx)
 {
+       if (!refcount_dec_and_test(&ctx->refcount))
+               return;
+
+       mutex_lock(&shared_ctx_lock);
+       list_del(&ctx->list);
+       mutex_unlock(&shared_ctx_lock);
+
        mapping_flush_work(ctx);
        xa_destroy(&ctx->xarray);
        mutex_destroy(&ctx->lock);
index 285525c..4e2119f 100644 (file)
@@ -24,4 +24,9 @@ struct mapping_ctx *mapping_create(size_t data_size, u32 max_id,
                                   bool delayed_removal);
 void mapping_destroy(struct mapping_ctx *ctx);
 
+/* adds mapping with an id or get an existing mapping with the same id
+ */
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal);
+
 #endif /* __MLX5_MAPPING_H__ */
index 2cbf18c..3cbb596 100644 (file)
@@ -167,6 +167,18 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
        return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
 }
 
+struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params)
+{
+       struct mlx5e_lro_param lro_param;
+
+       lro_param = (struct mlx5e_lro_param) {
+               .enabled = params->lro_en,
+               .timeout = params->lro_timeout,
+       };
+
+       return lro_param;
+}
+
 u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 {
        bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
index e9593f5..879ad46 100644 (file)
@@ -11,6 +11,11 @@ struct mlx5e_xsk_param {
        u16 chunk_size;
 };
 
+struct mlx5e_lro_param {
+       bool enabled;
+       u32 timeout;
+};
+
 struct mlx5e_cq_param {
        u32                        cqc[MLX5_ST_SZ_DW(cqc)];
        struct mlx5_wq_param       wq;
@@ -120,6 +125,7 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
 u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
                          struct mlx5e_params *params,
                          struct mlx5e_xsk_param *xsk);
+struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params);
 
 /* Build queue parameters */
 
index efef4ad..ee688de 100644 (file)
@@ -326,13 +326,14 @@ static int mlx5e_ptp_open_txqsqs(struct mlx5e_ptp *c,
                                 struct mlx5e_ptp_params *cparams)
 {
        struct mlx5e_params *params = &cparams->params;
+       u8 num_tc = mlx5e_get_dcb_num_tc(params);
        int ix_base;
        int err;
        int tc;
 
-       ix_base = params->num_tc * params->num_channels;
+       ix_base = num_tc * params->num_channels;
 
-       for (tc = 0; tc < params->num_tc; tc++) {
+       for (tc = 0; tc < num_tc; tc++) {
                int txq_ix = ix_base + tc;
 
                err = mlx5e_ptp_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
@@ -365,9 +366,12 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
        struct mlx5e_create_cq_param ccp = {};
        struct dim_cq_moder ptp_moder = {};
        struct mlx5e_cq_param *cq_param;
+       u8 num_tc;
        int err;
        int tc;
 
+       num_tc = mlx5e_get_dcb_num_tc(params);
+
        ccp.node     = dev_to_node(mlx5_core_dma_dev(c->mdev));
        ccp.ch_stats = c->stats;
        ccp.napi     = &c->napi;
@@ -375,7 +379,7 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
 
        cq_param = &cparams->txq_sq_param.cqp;
 
-       for (tc = 0; tc < params->num_tc; tc++) {
+       for (tc = 0; tc < num_tc; tc++) {
                struct mlx5e_cq *cq = &c->ptpsq[tc].txqsq.cq;
 
                err = mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
@@ -383,7 +387,7 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
                        goto out_err_txqsq_cq;
        }
 
-       for (tc = 0; tc < params->num_tc; tc++) {
+       for (tc = 0; tc < num_tc; tc++) {
                struct mlx5e_cq *cq = &c->ptpsq[tc].ts_cq;
                struct mlx5e_ptpsq *ptpsq = &c->ptpsq[tc];
 
@@ -399,7 +403,7 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
 out_err_ts_cq:
        for (--tc; tc >= 0; tc--)
                mlx5e_close_cq(&c->ptpsq[tc].ts_cq);
-       tc = params->num_tc;
+       tc = num_tc;
 out_err_txqsq_cq:
        for (--tc; tc >= 0; tc--)
                mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq);
@@ -475,7 +479,7 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
        params->num_channels = orig->num_channels;
        params->hard_mtu = orig->hard_mtu;
        params->sw_mtu = orig->sw_mtu;
-       params->num_tc = orig->num_tc;
+       params->mqprio = orig->mqprio;
 
        /* SQ */
        if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
@@ -605,9 +609,9 @@ static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv)
 
 static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
 {
+       u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res);
        struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
        struct mlx5_flow_handle *rule;
-       u32 tirn = priv->ptp_tir.tirn;
        int err;
 
        if (ptp_fs->valid)
@@ -617,7 +621,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
        if (err)
                goto out_free;
 
-       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV4_UDP,
+       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP,
                                                 tirn, PTP_EV_PORT);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -625,7 +629,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
        }
        ptp_fs->udp_v4_rule = rule;
 
-       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV6_UDP,
+       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP,
                                                 tirn, PTP_EV_PORT);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -680,7 +684,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
        c->pdev     = mlx5_core_dma_dev(priv->mdev);
        c->netdev   = priv->netdev;
        c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
-       c->num_tc   = params->num_tc;
+       c->num_tc   = mlx5e_get_dcb_num_tc(params);
        c->stats    = &priv->ptp_stats.ch;
        c->lag_port = lag_port;
 
index 5efe327..e8a8d78 100644 (file)
@@ -132,7 +132,7 @@ static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
         */
        bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS);
 
-       return (chs->params.num_channels + is_ptp) * chs->params.num_tc + qid;
+       return (chs->params.num_channels + is_ptp) * mlx5e_get_dcb_num_tc(&chs->params) + qid;
 }
 
 int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid)
@@ -733,8 +733,8 @@ static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
        spin_unlock_bh(qdisc_lock(qdisc));
 }
 
-int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
-                      u16 *new_qid, struct netlink_ext_ack *extack)
+int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid,
+                      struct netlink_ext_ack *extack)
 {
        struct mlx5e_qos_node *node;
        struct netdev_queue *txq;
@@ -742,11 +742,9 @@ int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
        bool opened;
        int err;
 
-       qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", classid);
-
-       *old_qid = *new_qid = 0;
+       qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid);
 
-       node = mlx5e_sw_node_find(priv, classid);
+       node = mlx5e_sw_node_find(priv, *classid);
        if (!node)
                return -ENOENT;
 
@@ -764,7 +762,7 @@ int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
        err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
        if (err) /* Not fatal. */
                qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
-                        node->hw_id, classid, err);
+                        node->hw_id, *classid, err);
 
        mlx5e_sw_node_delete(priv, node);
 
@@ -826,8 +824,7 @@ int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
        if (opened)
                mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
 
-       *old_qid = mlx5e_qid_from_qos(&priv->channels, moved_qid);
-       *new_qid = mlx5e_qid_from_qos(&priv->channels, qid);
+       *classid = node->classid;
        return 0;
 }
 
index 5af7991..757682b 100644 (file)
@@ -34,8 +34,8 @@ int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
                               struct netlink_ext_ack *extack);
 int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
                            u64 rate, u64 ceil, struct netlink_ext_ack *extack);
-int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
-                      u16 *new_qid, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid,
+                      struct netlink_ext_ack *extack);
 int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
                            struct netlink_ext_ack *extack);
 int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
index 3c0032c..0c38c2e 100644 (file)
@@ -15,9 +15,116 @@ struct mlx5_bridge_switchdev_fdb_work {
        struct work_struct work;
        struct switchdev_notifier_fdb_info fdb_info;
        struct net_device *dev;
+       struct mlx5_esw_bridge_offloads *br_offloads;
        bool add;
 };
 
+static bool mlx5_esw_bridge_dev_same_esw(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+
+       return esw == priv->mdev->priv.eswitch;
+}
+
+static bool mlx5_esw_bridge_dev_same_hw(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5_core_dev *mdev, *esw_mdev;
+       u64 system_guid, esw_system_guid;
+
+       mdev = priv->mdev;
+       esw_mdev = esw->dev;
+
+       system_guid = mlx5_query_nic_system_image_guid(mdev);
+       esw_system_guid = mlx5_query_nic_system_image_guid(esw_mdev);
+
+       return system_guid == esw_system_guid;
+}
+
+static struct net_device *
+mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw)
+{
+       struct net_device *lower;
+       struct list_head *iter;
+
+       netdev_for_each_lower_dev(dev, lower, iter) {
+               struct mlx5_core_dev *mdev;
+               struct mlx5e_priv *priv;
+
+               if (!mlx5e_eswitch_rep(lower))
+                       continue;
+
+               priv = netdev_priv(lower);
+               mdev = priv->mdev;
+               if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw))
+                       return lower;
+       }
+
+       return NULL;
+}
+
+static struct net_device *
+mlx5_esw_bridge_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw,
+                                         u16 *vport_num, u16 *esw_owner_vhca_id)
+{
+       struct mlx5e_rep_priv *rpriv;
+       struct mlx5e_priv *priv;
+
+       if (netif_is_lag_master(dev))
+               dev = mlx5_esw_bridge_lag_rep_get(dev, esw);
+
+       if (!dev || !mlx5e_eswitch_rep(dev) || !mlx5_esw_bridge_dev_same_hw(dev, esw))
+               return NULL;
+
+       priv = netdev_priv(dev);
+       rpriv = priv->ppriv;
+       *vport_num = rpriv->rep->vport;
+       *esw_owner_vhca_id = MLX5_CAP_GEN(priv->mdev, vhca_id);
+       return dev;
+}
+
+static struct net_device *
+mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_eswitch *esw,
+                                               u16 *vport_num, u16 *esw_owner_vhca_id)
+{
+       struct net_device *lower_dev;
+       struct list_head *iter;
+
+       if (netif_is_lag_master(dev) || mlx5e_eswitch_rep(dev))
+               return mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, vport_num,
+                                                                esw_owner_vhca_id);
+
+       netdev_for_each_lower_dev(dev, lower_dev, iter) {
+               struct net_device *rep;
+
+               if (netif_is_bridge_master(lower_dev))
+                       continue;
+
+               rep = mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(lower_dev, esw, vport_num,
+                                                                     esw_owner_vhca_id);
+               if (rep)
+                       return rep;
+       }
+
+       return NULL;
+}
+
+static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device *rep,
+                                    struct mlx5_eswitch *esw)
+{
+       struct mlx5_core_dev *mdev;
+       struct mlx5e_priv *priv;
+
+       if (!mlx5_esw_bridge_dev_same_esw(rep, esw))
+               return false;
+
+       priv = netdev_priv(rep);
+       mdev = priv->mdev;
+       if (netif_is_lag_master(dev))
+               return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev);
+       return true;
+}
+
 static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr)
 {
        struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
@@ -25,37 +132,36 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr
                                                                    netdev_nb);
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct netdev_notifier_changeupper_info *info = ptr;
+       struct net_device *upper = info->upper_dev, *rep;
+       struct mlx5_eswitch *esw = br_offloads->esw;
+       u16 vport_num, esw_owner_vhca_id;
        struct netlink_ext_ack *extack;
-       struct mlx5e_rep_priv *rpriv;
-       struct mlx5_eswitch *esw;
-       struct mlx5_vport *vport;
-       struct net_device *upper;
-       struct mlx5e_priv *priv;
-       u16 vport_num;
-
-       if (!mlx5e_eswitch_rep(dev))
-               return 0;
+       int ifindex = upper->ifindex;
+       int err;
 
-       upper = info->upper_dev;
        if (!netif_is_bridge_master(upper))
                return 0;
 
-       esw = br_offloads->esw;
-       priv = netdev_priv(dev);
-       if (esw != priv->mdev->priv.eswitch)
+       rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id);
+       if (!rep)
                return 0;
 
-       rpriv = priv->ppriv;
-       vport_num = rpriv->rep->vport;
-       vport = mlx5_eswitch_get_vport(esw, vport_num);
-       if (IS_ERR(vport))
-               return PTR_ERR(vport);
-
        extack = netdev_notifier_info_to_extack(&info->info);
 
-       return info->linking ?
-               mlx5_esw_bridge_vport_link(upper->ifindex, br_offloads, vport, extack) :
-               mlx5_esw_bridge_vport_unlink(upper->ifindex, br_offloads, vport, extack);
+       if (mlx5_esw_bridge_is_local(dev, rep, esw))
+               err = info->linking ?
+                       mlx5_esw_bridge_vport_link(ifindex, vport_num, esw_owner_vhca_id,
+                                                  br_offloads, extack) :
+                       mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id,
+                                                    br_offloads, extack);
+       else if (mlx5_esw_bridge_dev_same_hw(rep, esw))
+               err = info->linking ?
+                       mlx5_esw_bridge_vport_peer_link(ifindex, vport_num, esw_owner_vhca_id,
+                                                       br_offloads, extack) :
+                       mlx5_esw_bridge_vport_peer_unlink(ifindex, vport_num, esw_owner_vhca_id,
+                                                         br_offloads, extack);
+
+       return err;
 }
 
 static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
@@ -75,31 +181,28 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
        return notifier_from_errno(err);
 }
 
-static int mlx5_esw_bridge_port_obj_add(struct net_device *dev,
-                                       const void *ctx,
-                                       const struct switchdev_obj *obj,
-                                       struct netlink_ext_ack *extack)
+static int
+mlx5_esw_bridge_port_obj_add(struct net_device *dev,
+                            struct switchdev_notifier_port_obj_info *port_obj_info,
+                            struct mlx5_esw_bridge_offloads *br_offloads)
 {
+       struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
+       const struct switchdev_obj *obj = port_obj_info->obj;
        const struct switchdev_obj_port_vlan *vlan;
-       struct mlx5e_rep_priv *rpriv;
-       struct mlx5_eswitch *esw;
-       struct mlx5_vport *vport;
-       struct mlx5e_priv *priv;
-       u16 vport_num;
-       int err = 0;
+       u16 vport_num, esw_owner_vhca_id;
+       int err;
 
-       priv = netdev_priv(dev);
-       rpriv = priv->ppriv;
-       vport_num = rpriv->rep->vport;
-       esw = priv->mdev->priv.eswitch;
-       vport = mlx5_eswitch_get_vport(esw, vport_num);
-       if (IS_ERR(vport))
-               return PTR_ERR(vport);
+       if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+                                                      &esw_owner_vhca_id))
+               return 0;
+
+       port_obj_info->handled = true;
 
        switch (obj->id) {
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
                vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
-               err = mlx5_esw_bridge_port_vlan_add(vlan->vid, vlan->flags, esw, vport, extack);
+               err = mlx5_esw_bridge_port_vlan_add(vport_num, esw_owner_vhca_id, vlan->vid,
+                                                   vlan->flags, br_offloads, extack);
                break;
        default:
                return -EOPNOTSUPP;
@@ -107,29 +210,25 @@ static int mlx5_esw_bridge_port_obj_add(struct net_device *dev,
        return err;
 }
 
-static int mlx5_esw_bridge_port_obj_del(struct net_device *dev,
-                                       const void *ctx,
-                                       const struct switchdev_obj *obj)
+static int
+mlx5_esw_bridge_port_obj_del(struct net_device *dev,
+                            struct switchdev_notifier_port_obj_info *port_obj_info,
+                            struct mlx5_esw_bridge_offloads *br_offloads)
 {
+       const struct switchdev_obj *obj = port_obj_info->obj;
        const struct switchdev_obj_port_vlan *vlan;
-       struct mlx5e_rep_priv *rpriv;
-       struct mlx5_eswitch *esw;
-       struct mlx5_vport *vport;
-       struct mlx5e_priv *priv;
-       u16 vport_num;
+       u16 vport_num, esw_owner_vhca_id;
 
-       priv = netdev_priv(dev);
-       rpriv = priv->ppriv;
-       vport_num = rpriv->rep->vport;
-       esw = priv->mdev->priv.eswitch;
-       vport = mlx5_eswitch_get_vport(esw, vport_num);
-       if (IS_ERR(vport))
-               return PTR_ERR(vport);
+       if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+                                                      &esw_owner_vhca_id))
+               return 0;
+
+       port_obj_info->handled = true;
 
        switch (obj->id) {
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
                vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
-               mlx5_esw_bridge_port_vlan_del(vlan->vid, esw, vport);
+               mlx5_esw_bridge_port_vlan_del(vport_num, esw_owner_vhca_id, vlan->vid, br_offloads);
                break;
        default:
                return -EOPNOTSUPP;
@@ -137,25 +236,21 @@ static int mlx5_esw_bridge_port_obj_del(struct net_device *dev,
        return 0;
 }
 
-static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
-                                            const void *ctx,
-                                            const struct switchdev_attr *attr,
-                                            struct netlink_ext_ack *extack)
+static int
+mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
+                                 struct switchdev_notifier_port_attr_info *port_attr_info,
+                                 struct mlx5_esw_bridge_offloads *br_offloads)
 {
-       struct mlx5e_rep_priv *rpriv;
-       struct mlx5_eswitch *esw;
-       struct mlx5_vport *vport;
-       struct mlx5e_priv *priv;
-       u16 vport_num;
-       int err = 0;
+       struct netlink_ext_ack *extack = switchdev_notifier_info_to_extack(&port_attr_info->info);
+       const struct switchdev_attr *attr = port_attr_info->attr;
+       u16 vport_num, esw_owner_vhca_id;
+       int err;
 
-       priv = netdev_priv(dev);
-       rpriv = priv->ppriv;
-       vport_num = rpriv->rep->vport;
-       esw = priv->mdev->priv.eswitch;
-       vport = mlx5_eswitch_get_vport(esw, vport_num);
-       if (IS_ERR(vport))
-               return PTR_ERR(vport);
+       if (!mlx5_esw_bridge_lower_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+                                                            &esw_owner_vhca_id))
+               return 0;
+
+       port_attr_info->handled = true;
 
        switch (attr->id) {
        case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
@@ -167,10 +262,12 @@ static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
        case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
                break;
        case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
-               err = mlx5_esw_bridge_ageing_time_set(attr->u.ageing_time, esw, vport);
+               err = mlx5_esw_bridge_ageing_time_set(vport_num, esw_owner_vhca_id,
+                                                     attr->u.ageing_time, br_offloads);
                break;
        case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
-               err = mlx5_esw_bridge_vlan_filtering_set(attr->u.vlan_filtering, esw, vport);
+               err = mlx5_esw_bridge_vlan_filtering_set(vport_num, esw_owner_vhca_id,
+                                                        attr->u.vlan_filtering, br_offloads);
                break;
        default:
                err = -EOPNOTSUPP;
@@ -179,27 +276,24 @@ static int mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
        return err;
 }
 
-static int mlx5_esw_bridge_event_blocking(struct notifier_block *unused,
+static int mlx5_esw_bridge_event_blocking(struct notifier_block *nb,
                                          unsigned long event, void *ptr)
 {
+       struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb,
+                                                                   struct mlx5_esw_bridge_offloads,
+                                                                   nb_blk);
        struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
        int err;
 
        switch (event) {
        case SWITCHDEV_PORT_OBJ_ADD:
-               err = switchdev_handle_port_obj_add(dev, ptr,
-                                                   mlx5e_eswitch_rep,
-                                                   mlx5_esw_bridge_port_obj_add);
+               err = mlx5_esw_bridge_port_obj_add(dev, ptr, br_offloads);
                break;
        case SWITCHDEV_PORT_OBJ_DEL:
-               err = switchdev_handle_port_obj_del(dev, ptr,
-                                                   mlx5e_eswitch_rep,
-                                                   mlx5_esw_bridge_port_obj_del);
+               err = mlx5_esw_bridge_port_obj_del(dev, ptr, br_offloads);
                break;
        case SWITCHDEV_PORT_ATTR_SET:
-               err = switchdev_handle_port_attr_set(dev, ptr,
-                                                    mlx5e_eswitch_rep,
-                                                    mlx5_esw_bridge_port_obj_attr_set);
+               err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads);
                break;
        default:
                err = 0;
@@ -222,27 +316,23 @@ static void mlx5_esw_bridge_switchdev_fdb_event_work(struct work_struct *work)
                container_of(work, struct mlx5_bridge_switchdev_fdb_work, work);
        struct switchdev_notifier_fdb_info *fdb_info =
                &fdb_work->fdb_info;
+       struct mlx5_esw_bridge_offloads *br_offloads =
+               fdb_work->br_offloads;
        struct net_device *dev = fdb_work->dev;
-       struct mlx5e_rep_priv *rpriv;
-       struct mlx5_eswitch *esw;
-       struct mlx5_vport *vport;
-       struct mlx5e_priv *priv;
-       u16 vport_num;
+       u16 vport_num, esw_owner_vhca_id;
 
        rtnl_lock();
 
-       priv = netdev_priv(dev);
-       rpriv = priv->ppriv;
-       vport_num = rpriv->rep->vport;
-       esw = priv->mdev->priv.eswitch;
-       vport = mlx5_eswitch_get_vport(esw, vport_num);
-       if (IS_ERR(vport))
+       if (!mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, br_offloads->esw, &vport_num,
+                                                      &esw_owner_vhca_id))
                goto out;
 
        if (fdb_work->add)
-               mlx5_esw_bridge_fdb_create(dev, esw, vport, fdb_info);
+               mlx5_esw_bridge_fdb_create(dev, vport_num, esw_owner_vhca_id, br_offloads,
+                                          fdb_info);
        else
-               mlx5_esw_bridge_fdb_remove(dev, esw, vport, fdb_info);
+               mlx5_esw_bridge_fdb_remove(dev, vport_num, esw_owner_vhca_id, br_offloads,
+                                          fdb_info);
 
 out:
        rtnl_unlock();
@@ -251,7 +341,8 @@ out:
 
 static struct mlx5_bridge_switchdev_fdb_work *
 mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add,
-                                       struct switchdev_notifier_fdb_info *fdb_info)
+                                       struct switchdev_notifier_fdb_info *fdb_info,
+                                       struct mlx5_esw_bridge_offloads *br_offloads)
 {
        struct mlx5_bridge_switchdev_fdb_work *work;
        u8 *addr;
@@ -273,6 +364,7 @@ mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add,
 
        dev_hold(dev);
        work->dev = dev;
+       work->br_offloads = br_offloads;
        work->add = add;
        return work;
 }
@@ -286,20 +378,14 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
        struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
        struct switchdev_notifier_fdb_info *fdb_info;
        struct mlx5_bridge_switchdev_fdb_work *work;
+       struct mlx5_eswitch *esw = br_offloads->esw;
        struct switchdev_notifier_info *info = ptr;
-       struct net_device *upper;
-       struct mlx5e_priv *priv;
-
-       if (!mlx5e_eswitch_rep(dev))
-               return NOTIFY_DONE;
-       priv = netdev_priv(dev);
-       if (priv->mdev->priv.eswitch != br_offloads->esw)
-               return NOTIFY_DONE;
+       u16 vport_num, esw_owner_vhca_id;
+       struct net_device *upper, *rep;
 
        if (event == SWITCHDEV_PORT_ATTR_SET) {
-               int err = switchdev_handle_port_attr_set(dev, ptr,
-                                                        mlx5e_eswitch_rep,
-                                                        mlx5_esw_bridge_port_obj_attr_set);
+               int err = mlx5_esw_bridge_port_obj_attr_set(dev, ptr, br_offloads);
+
                return notifier_from_errno(err);
        }
 
@@ -309,7 +395,27 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
        if (!netif_is_bridge_master(upper))
                return NOTIFY_DONE;
 
+       rep = mlx5_esw_bridge_rep_vport_num_vhca_id_get(dev, esw, &vport_num, &esw_owner_vhca_id);
+       if (!rep)
+               return NOTIFY_DONE;
+
        switch (event) {
+       case SWITCHDEV_FDB_ADD_TO_BRIDGE:
+               /* only handle the event on native eswtich of representor */
+               if (!mlx5_esw_bridge_is_local(dev, rep, esw))
+                       break;
+
+               fdb_info = container_of(info,
+                                       struct switchdev_notifier_fdb_info,
+                                       info);
+               mlx5_esw_bridge_fdb_update_used(dev, vport_num, esw_owner_vhca_id, br_offloads,
+                                               fdb_info);
+               break;
+       case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+               /* only handle the event on peers */
+               if (mlx5_esw_bridge_is_local(dev, rep, esw))
+                       break;
+               fallthrough;
        case SWITCHDEV_FDB_ADD_TO_DEVICE:
        case SWITCHDEV_FDB_DEL_TO_DEVICE:
                fdb_info = container_of(info,
@@ -318,7 +424,8 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb,
 
                work = mlx5_esw_bridge_init_switchdev_fdb_work(dev,
                                                               event == SWITCHDEV_FDB_ADD_TO_DEVICE,
-                                                              fdb_info);
+                                                              fdb_info,
+                                                              br_offloads);
                if (IS_ERR(work)) {
                        WARN_ONCE(1, "Failed to init switchdev work, err=%ld",
                                  PTR_ERR(work));
index 059799e..51a4d80 100644 (file)
@@ -17,7 +17,7 @@
 #include "en/mapping.h"
 #include "en/tc_tun.h"
 #include "lib/port_tun.h"
-#include "esw/sample.h"
+#include "en/tc/sample.h"
 
 struct mlx5e_rep_indr_block_priv {
        struct net_device *netdev;
@@ -516,7 +516,6 @@ void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
                                 mlx5e_rep_indr_block_unbind);
 }
 
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
 static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
                                 struct mlx5e_tc_update_priv *tc_priv,
                                 u32 tunnel_id)
@@ -609,12 +608,13 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
        return true;
 }
 
-static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1,
-                             struct mlx5e_tc_update_priv *tc_priv)
+static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
+                                   struct mlx5e_tc_update_priv *tc_priv)
 {
        struct mlx5e_priv *priv = netdev_priv(skb->dev);
        u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
 
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
        if (chain) {
                struct mlx5_rep_uplink_priv *uplink_priv;
                struct mlx5e_rep_priv *uplink_rpriv;
@@ -636,9 +636,25 @@ static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1,
                                              zone_restore_id))
                        return false;
        }
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
        return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
 }
-#endif /* CONFIG_NET_TC_SKB_EXT */
+
+static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
+                                    struct mlx5_mapped_obj *mapped_obj,
+                                    struct mlx5e_tc_update_priv *tc_priv)
+{
+       if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
+               netdev_dbg(priv->netdev,
+                          "Failed to restore tunnel info for sampled packet\n");
+               return;
+       }
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+       mlx5e_tc_sample_skb(skb, mapped_obj);
+#endif /* CONFIG_MLX5_TC_SAMPLE */
+       mlx5_rep_tc_post_napi_receive(tc_priv);
+}
 
 bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
                             struct sk_buff *skb,
@@ -647,7 +663,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
        struct mlx5_mapped_obj mapped_obj;
        struct mlx5_eswitch *esw;
        struct mlx5e_priv *priv;
-       u32 reg_c0, reg_c1;
+       u32 reg_c0;
        int err;
 
        reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
@@ -659,8 +675,6 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
         */
        skb->mark = 0;
 
-       reg_c1 = be32_to_cpu(cqe->ft_metadata);
-
        priv = netdev_priv(skb->dev);
        esw = priv->mdev->priv.eswitch;
        err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj);
@@ -671,18 +685,14 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
                return false;
        }
 
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-       if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN)
-               return mlx5e_restore_skb(skb, mapped_obj.chain, reg_c1, tc_priv);
-#endif /* CONFIG_NET_TC_SKB_EXT */
-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
-       if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
-               mlx5_esw_sample_skb(skb, &mapped_obj);
+       if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
+               u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
+
+               return mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, tc_priv);
+       } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
+               mlx5e_restore_skb_sample(priv, skb, &mapped_obj, tc_priv);
                return false;
-       }
-#endif /* CONFIG_MLX5_TC_SAMPLE */
-       if (mapped_obj.type != MLX5_MAPPED_OBJ_SAMPLE &&
-           mapped_obj.type != MLX5_MAPPED_OBJ_CHAIN) {
+       } else {
                netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
                return false;
        }
index 9d361ef..bb682fd 100644 (file)
@@ -372,7 +372,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
        for (i = 0; i < priv->channels.num; i++) {
                struct mlx5e_channel *c = priv->channels.c[i];
 
-               for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+               for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
                        struct mlx5e_txqsq *sq = &c->sq[tc];
 
                        err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq, tc);
@@ -384,7 +384,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
        if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
                goto close_sqs_nest;
 
-       for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+       for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
                err = mlx5e_tx_reporter_build_diagnose_output_ptpsq(fmsg,
                                                                    &ptp_ch->ptpsq[tc],
                                                                    tc);
@@ -494,7 +494,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
        for (i = 0; i < priv->channels.num; i++) {
                struct mlx5e_channel *c = priv->channels.c[i];
 
-               for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+               for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
                        struct mlx5e_txqsq *sq = &c->sq[tc];
 
                        err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
@@ -504,7 +504,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
        }
 
        if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) {
-               for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+               for (tc = 0; tc < mlx5e_get_dcb_num_tc(&priv->channels.params); tc++) {
                        struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq;
 
                        err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "PTP SQ");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
new file mode 100644 (file)
index 0000000..b915fb2
--- /dev/null
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "rqt.h"
+#include <linux/mlx5/transobj.h>
+
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+                                        unsigned int num_channels)
+{
+       unsigned int i;
+
+       for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+               indir->table[i] = i % num_channels;
+}
+
+static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+                         u16 max_size, u32 *init_rqns, u16 init_size)
+{
+       void *rqtc;
+       int inlen;
+       int err;
+       u32 *in;
+       int i;
+
+       rqt->mdev = mdev;
+       rqt->size = max_size;
+
+       inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * init_size;
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+
+       MLX5_SET(rqtc, rqtc, rqt_max_size, rqt->size);
+
+       MLX5_SET(rqtc, rqtc, rqt_actual_size, init_size);
+       for (i = 0; i < init_size; i++)
+               MLX5_SET(rqtc, rqtc, rq_num[i], init_rqns[i]);
+
+       err = mlx5_core_create_rqt(rqt->mdev, in, inlen, &rqt->rqtn);
+
+       kvfree(in);
+       return err;
+}
+
+int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+                         bool indir_enabled, u32 init_rqn)
+{
+       u16 max_size = indir_enabled ? MLX5E_INDIR_RQT_SIZE : 1;
+
+       return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1);
+}
+
+static int mlx5e_bits_invert(unsigned long a, int size)
+{
+       int inv = 0;
+       int i;
+
+       for (i = 0; i < size; i++)
+               inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
+
+       return inv;
+}
+
+static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns,
+                                u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+       unsigned int i;
+
+       for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) {
+               unsigned int ix = i;
+
+               if (hfunc == ETH_RSS_HASH_XOR)
+                       ix = mlx5e_bits_invert(ix, ilog2(MLX5E_INDIR_RQT_SIZE));
+
+               ix = indir->table[ix];
+
+               if (WARN_ON(ix >= num_rqns))
+                       /* Could be a bug in the driver or in the kernel part of
+                        * ethtool: indir table refers to non-existent RQs.
+                        */
+                       return -EINVAL;
+               rss_rqns[i] = rqns[ix];
+       }
+
+       return 0;
+}
+
+int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+                        u32 *rqns, unsigned int num_rqns,
+                        u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+       u32 *rss_rqns;
+       int err;
+
+       rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
+       if (!rss_rqns)
+               return -ENOMEM;
+
+       err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+       if (err)
+               goto out;
+
+       err = mlx5e_rqt_init(rqt, mdev, MLX5E_INDIR_RQT_SIZE, rss_rqns, MLX5E_INDIR_RQT_SIZE);
+
+out:
+       kvfree(rss_rqns);
+       return err;
+}
+
+void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
+{
+       mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
+}
+
+static int mlx5e_rqt_redirect(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int size)
+{
+       unsigned int i;
+       void *rqtc;
+       int inlen;
+       u32 *in;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * size;
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
+
+       MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
+       MLX5_SET(rqtc, rqtc, rqt_actual_size, size);
+       for (i = 0; i < size; i++)
+               MLX5_SET(rqtc, rqtc, rq_num[i], rqns[i]);
+
+       err = mlx5_core_modify_rqt(rqt->mdev, rqt->rqtn, in, inlen);
+
+       kvfree(in);
+       return err;
+}
+
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn)
+{
+       return mlx5e_rqt_redirect(rqt, &rqn, 1);
+}
+
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+                            u8 hfunc, struct mlx5e_rss_params_indir *indir)
+{
+       u32 *rss_rqns;
+       int err;
+
+       if (WARN_ON(rqt->size != MLX5E_INDIR_RQT_SIZE))
+               return -EINVAL;
+
+       rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
+       if (!rss_rqns)
+               return -ENOMEM;
+
+       err = mlx5e_calc_indir_rqns(rss_rqns, rqns, num_rqns, hfunc, indir);
+       if (err)
+               goto out;
+
+       err = mlx5e_rqt_redirect(rqt, rss_rqns, MLX5E_INDIR_RQT_SIZE);
+
+out:
+       kvfree(rss_rqns);
+       return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
new file mode 100644 (file)
index 0000000..60c985a
--- /dev/null
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_RQT_H__
+#define __MLX5_EN_RQT_H__
+
+#include <linux/kernel.h>
+
+#define MLX5E_INDIR_RQT_SIZE (1 << 8)
+
+struct mlx5_core_dev;
+
+struct mlx5e_rss_params_indir {
+       u32 table[MLX5E_INDIR_RQT_SIZE];
+};
+
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+                                        unsigned int num_channels);
+
+struct mlx5e_rqt {
+       struct mlx5_core_dev *mdev;
+       u32 rqtn;
+       u16 size;
+};
+
+int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+                         bool indir_enabled, u32 init_rqn);
+int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
+                        u32 *rqns, unsigned int num_rqns,
+                        u8 hfunc, struct mlx5e_rss_params_indir *indir);
+void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt);
+
+static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt)
+{
+       return rqt->rqtn;
+}
+
+int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn);
+int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_rqns,
+                            u8 hfunc, struct mlx5e_rss_params_indir *indir);
+
+#endif /* __MLX5_EN_RQT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
new file mode 100644 (file)
index 0000000..625cd49
--- /dev/null
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.
+
+#include "rss.h"
+
+#define mlx5e_rss_warn(__dev, format, ...)                     \
+       dev_warn((__dev)->device, "%s:%d:(pid %d): " format,    \
+                __func__, __LINE__, current->pid,              \
+                ##__VA_ARGS__)
+
+static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = {
+       [MLX5_TT_IPV4_TCP] = {
+               .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+               .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+               .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+       },
+       [MLX5_TT_IPV6_TCP] = {
+               .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+               .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
+               .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+       },
+       [MLX5_TT_IPV4_UDP] = {
+               .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+               .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+               .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+       },
+       [MLX5_TT_IPV6_UDP] = {
+               .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+               .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
+               .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
+       },
+       [MLX5_TT_IPV4_IPSEC_AH] = {
+               .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+               .l4_prot_type = 0,
+               .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+       },
+       [MLX5_TT_IPV6_IPSEC_AH] = {
+               .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+               .l4_prot_type = 0,
+               .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+       },
+       [MLX5_TT_IPV4_IPSEC_ESP] = {
+               .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+               .l4_prot_type = 0,
+               .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+       },
+       [MLX5_TT_IPV6_IPSEC_ESP] = {
+               .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+               .l4_prot_type = 0,
+               .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
+       },
+       [MLX5_TT_IPV4] = {
+               .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
+               .l4_prot_type = 0,
+               .rx_hash_fields = MLX5_HASH_IP,
+       },
+       [MLX5_TT_IPV6] = {
+               .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
+               .l4_prot_type = 0,
+               .rx_hash_fields = MLX5_HASH_IP,
+       },
+};
+
+struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt)
+{
+       return rss_default_config[tt];
+}
+
+struct mlx5e_rss {
+       struct mlx5e_rss_params_hash hash;
+       struct mlx5e_rss_params_indir indir;
+       u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+       struct mlx5e_tir *tir[MLX5E_NUM_INDIR_TIRS];
+       struct mlx5e_tir *inner_tir[MLX5E_NUM_INDIR_TIRS];
+       struct mlx5e_rqt rqt;
+       struct mlx5_core_dev *mdev;
+       u32 drop_rqn;
+       bool inner_ft_support;
+       bool enabled;
+       refcount_t refcnt;
+};
+
+struct mlx5e_rss *mlx5e_rss_alloc(void)
+{
+       return kvzalloc(sizeof(struct mlx5e_rss), GFP_KERNEL);
+}
+
+void mlx5e_rss_free(struct mlx5e_rss *rss)
+{
+       kvfree(rss);
+}
+
+static void mlx5e_rss_params_init(struct mlx5e_rss *rss)
+{
+       enum mlx5_traffic_types tt;
+
+       rss->hash.hfunc = ETH_RSS_HASH_TOP;
+       netdev_rss_key_fill(rss->hash.toeplitz_hash_key,
+                           sizeof(rss->hash.toeplitz_hash_key));
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+               rss->rx_hash_fields[tt] =
+                       mlx5e_rss_get_default_tt_config(tt).rx_hash_fields;
+}
+
+static struct mlx5e_tir **rss_get_tirp(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+                                      bool inner)
+{
+       return inner ? &rss->inner_tir[tt] : &rss->tir[tt];
+}
+
+static struct mlx5e_tir *rss_get_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+                                    bool inner)
+{
+       return *rss_get_tirp(rss, tt, inner);
+}
+
+static struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+{
+       struct mlx5e_rss_params_traffic_type rss_tt;
+
+       rss_tt = mlx5e_rss_get_default_tt_config(tt);
+       rss_tt.rx_hash_fields = rss->rx_hash_fields[tt];
+       return rss_tt;
+}
+
+static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
+                               enum mlx5_traffic_types tt,
+                               const struct mlx5e_lro_param *init_lro_param,
+                               bool inner)
+{
+       struct mlx5e_rss_params_traffic_type rss_tt;
+       struct mlx5e_tir_builder *builder;
+       struct mlx5e_tir **tir_p;
+       struct mlx5e_tir *tir;
+       u32 rqtn;
+       int err;
+
+       if (inner && !rss->inner_ft_support) {
+               mlx5e_rss_warn(rss->mdev,
+                              "Cannot create inner indirect TIR[%d], RSS inner FT is not supported.\n",
+                              tt);
+               return -EINVAL;
+       }
+
+       tir_p = rss_get_tirp(rss, tt, inner);
+       if (*tir_p)
+               return -EINVAL;
+
+       tir = kvzalloc(sizeof(*tir), GFP_KERNEL);
+       if (!tir)
+               return -ENOMEM;
+
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder) {
+               err = -ENOMEM;
+               goto free_tir;
+       }
+
+       rqtn = mlx5e_rqt_get_rqtn(&rss->rqt);
+       mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn,
+                                   rqtn, rss->inner_ft_support);
+       mlx5e_tir_builder_build_lro(builder, init_lro_param);
+       rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+       mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+
+       err = mlx5e_tir_init(tir, builder, rss->mdev, true);
+       mlx5e_tir_builder_free(builder);
+       if (err) {
+               mlx5e_rss_warn(rss->mdev, "Failed to create %sindirect TIR: err = %d, tt = %d\n",
+                              inner ? "inner " : "", err, tt);
+               goto free_tir;
+       }
+
+       *tir_p = tir;
+       return 0;
+
+free_tir:
+       kvfree(tir);
+       return err;
+}
+
+static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+                                 bool inner)
+{
+       struct mlx5e_tir **tir_p;
+       struct mlx5e_tir *tir;
+
+       tir_p = rss_get_tirp(rss, tt, inner);
+       if (!*tir_p)
+               return;
+
+       tir = *tir_p;
+       mlx5e_tir_destroy(tir);
+       kvfree(tir);
+       *tir_p = NULL;
+}
+
+static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss,
+                                const struct mlx5e_lro_param *init_lro_param,
+                                bool inner)
+{
+       enum mlx5_traffic_types tt, max_tt;
+       int err;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+               err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
+               if (err)
+                       goto err_destroy_tirs;
+       }
+
+       return 0;
+
+err_destroy_tirs:
+       max_tt = tt;
+       for (tt = 0; tt < max_tt; tt++)
+               mlx5e_rss_destroy_tir(rss, tt, inner);
+       return err;
+}
+
+static void mlx5e_rss_destroy_tirs(struct mlx5e_rss *rss, bool inner)
+{
+       enum mlx5_traffic_types tt;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+               mlx5e_rss_destroy_tir(rss, tt, inner);
+}
+
+static int mlx5e_rss_update_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+                               bool inner)
+{
+       struct mlx5e_rss_params_traffic_type rss_tt;
+       struct mlx5e_tir_builder *builder;
+       struct mlx5e_tir *tir;
+       int err;
+
+       tir = rss_get_tir(rss, tt, inner);
+       if (!tir)
+               return 0;
+
+       builder = mlx5e_tir_builder_alloc(true);
+       if (!builder)
+               return -ENOMEM;
+
+       rss_tt = mlx5e_rss_get_tt_config(rss, tt);
+
+       mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
+       err = mlx5e_tir_modify(tir, builder);
+
+       mlx5e_tir_builder_free(builder);
+       return err;
+}
+
+static int mlx5e_rss_update_tirs(struct mlx5e_rss *rss)
+{
+       enum mlx5_traffic_types tt;
+       int err, retval;
+
+       retval = 0;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+               err = mlx5e_rss_update_tir(rss, tt, false);
+               if (err) {
+                       retval = retval ? : err;
+                       mlx5e_rss_warn(rss->mdev,
+                                      "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n",
+                                      tt, err);
+               }
+
+               if (!rss->inner_ft_support)
+                       continue;
+
+               err = mlx5e_rss_update_tir(rss, tt, true);
+               if (err) {
+                       retval = retval ? : err;
+                       mlx5e_rss_warn(rss->mdev,
+                                      "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n",
+                                      tt, err);
+               }
+       }
+       return retval;
+}
+
+int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+                          bool inner_ft_support, u32 drop_rqn)
+{
+       rss->mdev = mdev;
+       rss->inner_ft_support = inner_ft_support;
+       rss->drop_rqn = drop_rqn;
+
+       mlx5e_rss_params_init(rss);
+       refcount_set(&rss->refcnt, 1);
+
+       return mlx5e_rqt_init_direct(&rss->rqt, mdev, true, drop_rqn);
+}
+
+int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+                  bool inner_ft_support, u32 drop_rqn,
+                  const struct mlx5e_lro_param *init_lro_param)
+{
+       int err;
+
+       err = mlx5e_rss_init_no_tirs(rss, mdev, inner_ft_support, drop_rqn);
+       if (err)
+               goto err_out;
+
+       err = mlx5e_rss_create_tirs(rss, init_lro_param, false);
+       if (err)
+               goto err_destroy_rqt;
+
+       if (inner_ft_support) {
+               err = mlx5e_rss_create_tirs(rss, init_lro_param, true);
+               if (err)
+                       goto err_destroy_tirs;
+       }
+
+       return 0;
+
+err_destroy_tirs:
+       mlx5e_rss_destroy_tirs(rss, false);
+err_destroy_rqt:
+       mlx5e_rqt_destroy(&rss->rqt);
+err_out:
+       return err;
+}
+
+int mlx5e_rss_cleanup(struct mlx5e_rss *rss)
+{
+       if (!refcount_dec_if_one(&rss->refcnt))
+               return -EBUSY;
+
+       mlx5e_rss_destroy_tirs(rss, false);
+
+       if (rss->inner_ft_support)
+               mlx5e_rss_destroy_tirs(rss, true);
+
+       mlx5e_rqt_destroy(&rss->rqt);
+
+       return 0;
+}
+
+void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss)
+{
+       refcount_inc(&rss->refcnt);
+}
+
+void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss)
+{
+       refcount_dec(&rss->refcnt);
+}
+
+unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss)
+{
+       return refcount_read(&rss->refcnt);
+}
+
+u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+                      bool inner)
+{
+       struct mlx5e_tir *tir;
+
+       WARN_ON(inner && !rss->inner_ft_support);
+       tir = rss_get_tir(rss, tt, inner);
+       WARN_ON(!tir);
+
+       return mlx5e_tir_get_tirn(tir);
+}
+
+/* Fill the "tirn" output parameter.
+ * Create the requested TIR if it's its first usage.
+ */
+int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+                         enum mlx5_traffic_types tt,
+                         const struct mlx5e_lro_param *init_lro_param,
+                         bool inner, u32 *tirn)
+{
+       struct mlx5e_tir *tir;
+
+       tir = rss_get_tir(rss, tt, inner);
+       if (!tir) { /* TIR doesn't exist, create one */
+               int err;
+
+               err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
+               if (err)
+                       return err;
+               tir = rss_get_tir(rss, tt, inner);
+       }
+
+       *tirn = mlx5e_tir_get_tirn(tir);
+       return 0;
+}
+
+static void mlx5e_rss_apply(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+{
+       int err;
+
+       err = mlx5e_rqt_redirect_indir(&rss->rqt, rqns, num_rqns, rss->hash.hfunc, &rss->indir);
+       if (err)
+               mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to channels: err = %d\n",
+                              mlx5e_rqt_get_rqtn(&rss->rqt), err);
+}
+
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns)
+{
+       rss->enabled = true;
+       mlx5e_rss_apply(rss, rqns, num_rqns);
+}
+
+void mlx5e_rss_disable(struct mlx5e_rss *rss)
+{
+       int err;
+
+       rss->enabled = false;
+       err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn);
+       if (err)
+               mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to drop RQ %#x: err = %d\n",
+                              mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err);
+}
+
+int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param)
+{
+       struct mlx5e_tir_builder *builder;
+       enum mlx5_traffic_types tt;
+       int err, final_err;
+
+       builder = mlx5e_tir_builder_alloc(true);
+       if (!builder)
+               return -ENOMEM;
+
+       mlx5e_tir_builder_build_lro(builder, lro_param);
+
+       final_err = 0;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+               struct mlx5e_tir *tir;
+
+               tir = rss_get_tir(rss, tt, false);
+               if (!tir)
+                       goto inner_tir;
+               err = mlx5e_tir_modify(tir, builder);
+               if (err) {
+                       mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n",
+                                      mlx5e_tir_get_tirn(tir), tt, err);
+                       if (!final_err)
+                               final_err = err;
+               }
+
+inner_tir:
+               if (!rss->inner_ft_support)
+                       continue;
+
+               tir = rss_get_tir(rss, tt, true);
+               if (!tir)
+                       continue;
+               err = mlx5e_tir_modify(tir, builder);
+               if (err) {
+                       mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n",
+                                      mlx5e_tir_get_tirn(tir), tt, err);
+                       if (!final_err)
+                               final_err = err;
+               }
+       }
+
+       mlx5e_tir_builder_free(builder);
+       return final_err;
+}
+
+int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc)
+{
+       unsigned int i;
+
+       if (indir)
+               for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+                       indir[i] = rss->indir.table[i];
+
+       if (key)
+               memcpy(key, rss->hash.toeplitz_hash_key,
+                      sizeof(rss->hash.toeplitz_hash_key));
+
+       if (hfunc)
+               *hfunc = rss->hash.hfunc;
+
+       return 0;
+}
+
+int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+                      const u8 *key, const u8 *hfunc,
+                      u32 *rqns, unsigned int num_rqns)
+{
+       bool changed_indir = false;
+       bool changed_hash = false;
+
+       if (hfunc && *hfunc != rss->hash.hfunc) {
+               switch (*hfunc) {
+               case ETH_RSS_HASH_XOR:
+               case ETH_RSS_HASH_TOP:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               changed_hash = true;
+               changed_indir = true;
+               rss->hash.hfunc = *hfunc;
+       }
+
+       if (key) {
+               if (rss->hash.hfunc == ETH_RSS_HASH_TOP)
+                       changed_hash = true;
+               memcpy(rss->hash.toeplitz_hash_key, key,
+                      sizeof(rss->hash.toeplitz_hash_key));
+       }
+
+       if (indir) {
+               unsigned int i;
+
+               changed_indir = true;
+
+               for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+                       rss->indir.table[i] = indir[i];
+       }
+
+       if (changed_indir && rss->enabled)
+               mlx5e_rss_apply(rss, rqns, num_rqns);
+
+       if (changed_hash)
+               mlx5e_rss_update_tirs(rss);
+
+       return 0;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss)
+{
+       return rss->hash;
+}
+
+u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
+{
+       return rss->rx_hash_fields[tt];
+}
+
+int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+                             u8 rx_hash_fields)
+{
+       u8 old_rx_hash_fields;
+       int err;
+
+       old_rx_hash_fields = rss->rx_hash_fields[tt];
+
+       if (old_rx_hash_fields == rx_hash_fields)
+               return 0;
+
+       rss->rx_hash_fields[tt] = rx_hash_fields;
+
+       err = mlx5e_rss_update_tir(rss, tt, false);
+       if (err) {
+               rss->rx_hash_fields[tt] = old_rx_hash_fields;
+               mlx5e_rss_warn(rss->mdev,
+                              "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n",
+                              tt, err);
+               return err;
+       }
+
+       if (!(rss->inner_ft_support))
+               return 0;
+
+       err = mlx5e_rss_update_tir(rss, tt, true);
+       if (err) {
+               /* Partial update happened. Try to revert - it may fail too, but
+                * there is nothing more we can do.
+                */
+               rss->rx_hash_fields[tt] = old_rx_hash_fields;
+               mlx5e_rss_warn(rss->mdev,
+                              "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n",
+                              tt, err);
+               if (mlx5e_rss_update_tir(rss, tt, false))
+                       mlx5e_rss_warn(rss->mdev,
+                                      "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n",
+                                      tt);
+       }
+
+       return err;
+}
+
+void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch)
+{
+       mlx5e_rss_params_indir_init_uniform(&rss->indir, nch);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
new file mode 100644 (file)
index 0000000..d522a10
--- /dev/null
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_EN_RSS_H__
+#define __MLX5_EN_RSS_H__
+
+#include "rqt.h"
+#include "tir.h"
+#include "fs.h"
+
+struct mlx5e_rss_params_traffic_type
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt);
+
+struct mlx5e_rss;
+
+struct mlx5e_rss *mlx5e_rss_alloc(void);
+void mlx5e_rss_free(struct mlx5e_rss *rss);
+int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+                  bool inner_ft_support, u32 drop_rqn,
+                  const struct mlx5e_lro_param *init_lro_param);
+int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
+                          bool inner_ft_support, u32 drop_rqn);
+int mlx5e_rss_cleanup(struct mlx5e_rss *rss);
+
+void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss);
+void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss);
+unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss);
+
+u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+                      bool inner);
+int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
+                         enum mlx5_traffic_types tt,
+                         const struct mlx5e_lro_param *init_lro_param,
+                         bool inner, u32 *tirn);
+
+void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns);
+void mlx5e_rss_disable(struct mlx5e_rss *rss);
+
+int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param);
+int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
+                      const u8 *key, const u8 *hfunc,
+                      u32 *rqns, unsigned int num_rqns);
+struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss);
+u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt);
+int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
+                             u8 rx_hash_fields);
+void mlx5e_rss_set_indir_uniform(struct mlx5e_rss *rss, unsigned int nch);
+#endif /* __MLX5_EN_RSS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
new file mode 100644 (file)
index 0000000..bf0313e
--- /dev/null
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "rx_res.h"
+#include "channels.h"
+#include "params.h"
+
+#define MLX5E_MAX_NUM_RSS 16
+
+struct mlx5e_rx_res {
+       struct mlx5_core_dev *mdev;
+       enum mlx5e_rx_res_features features;
+       unsigned int max_nch;
+       u32 drop_rqn;
+
+       struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS];
+       bool rss_active;
+       u32 rss_rqns[MLX5E_INDIR_RQT_SIZE];
+       unsigned int rss_nch;
+
+       struct {
+               struct mlx5e_rqt direct_rqt;
+               struct mlx5e_tir direct_tir;
+               struct mlx5e_rqt xsk_rqt;
+               struct mlx5e_tir xsk_tir;
+       } *channels;
+
+       struct {
+               struct mlx5e_rqt rqt;
+               struct mlx5e_tir tir;
+       } ptp;
+};
+
+/* API for rx_res_rss_* */
+
+static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
+                                    const struct mlx5e_lro_param *init_lro_param,
+                                    unsigned int init_nch)
+{
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       struct mlx5e_rss *rss;
+       int err;
+
+       if (WARN_ON(res->rss[0]))
+               return -EINVAL;
+
+       rss = mlx5e_rss_alloc();
+       if (!rss)
+               return -ENOMEM;
+
+       err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
+                            init_lro_param);
+       if (err)
+               goto err_rss_free;
+
+       mlx5e_rss_set_indir_uniform(rss, init_nch);
+
+       res->rss[0] = rss;
+
+       return 0;
+
+err_rss_free:
+       mlx5e_rss_free(rss);
+       return err;
+}
+
+int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch)
+{
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       struct mlx5e_rss *rss;
+       int err, i;
+
+       for (i = 1; i < MLX5E_MAX_NUM_RSS; i++)
+               if (!res->rss[i])
+                       break;
+
+       if (i == MLX5E_MAX_NUM_RSS)
+               return -ENOSPC;
+
+       rss = mlx5e_rss_alloc();
+       if (!rss)
+               return -ENOMEM;
+
+       err = mlx5e_rss_init_no_tirs(rss, res->mdev, inner_ft_support, res->drop_rqn);
+       if (err)
+               goto err_rss_free;
+
+       mlx5e_rss_set_indir_uniform(rss, init_nch);
+       if (res->rss_active)
+               mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+
+       res->rss[i] = rss;
+       *rss_idx = i;
+
+       return 0;
+
+err_rss_free:
+       mlx5e_rss_free(rss);
+       return err;
+}
+
+static int __mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+       struct mlx5e_rss *rss = res->rss[rss_idx];
+       int err;
+
+       err = mlx5e_rss_cleanup(rss);
+       if (err)
+               return err;
+
+       mlx5e_rss_free(rss);
+       res->rss[rss_idx] = NULL;
+
+       return 0;
+}
+
+int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+       struct mlx5e_rss *rss;
+
+       if (rss_idx >= MLX5E_MAX_NUM_RSS)
+               return -EINVAL;
+
+       rss = res->rss[rss_idx];
+       if (!rss)
+               return -EINVAL;
+
+       return __mlx5e_rx_res_rss_destroy(res, rss_idx);
+}
+
+static void mlx5e_rx_res_rss_destroy_all(struct mlx5e_rx_res *res)
+{
+       int i;
+
+       for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+               struct mlx5e_rss *rss = res->rss[i];
+               int err;
+
+               if (!rss)
+                       continue;
+
+               err = __mlx5e_rx_res_rss_destroy(res, i);
+               if (err) {
+                       unsigned int refcount;
+
+                       refcount = mlx5e_rss_refcnt_read(rss);
+                       mlx5_core_warn(res->mdev,
+                                      "Failed to destroy RSS context %d, refcount = %u, err = %d\n",
+                                      i, refcount, err);
+               }
+       }
+}
+
+static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res)
+{
+       int i;
+
+       res->rss_active = true;
+
+       for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+               struct mlx5e_rss *rss = res->rss[i];
+
+               if (!rss)
+                       continue;
+               mlx5e_rss_enable(rss, res->rss_rqns, res->rss_nch);
+       }
+}
+
+static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res)
+{
+       int i;
+
+       res->rss_active = false;
+
+       for (i = 0; i < MLX5E_MAX_NUM_RSS; i++) {
+               struct mlx5e_rss *rss = res->rss[i];
+
+               if (!rss)
+                       continue;
+               mlx5e_rss_disable(rss);
+       }
+}
+
+/* Updates the indirection table SW shadow, does not update the HW resources yet */
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch)
+{
+       WARN_ON_ONCE(res->rss_active);
+       mlx5e_rss_set_indir_uniform(res->rss[0], nch);
+}
+
+int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+                             u32 *indir, u8 *key, u8 *hfunc)
+{
+       struct mlx5e_rss *rss;
+
+       if (rss_idx >= MLX5E_MAX_NUM_RSS)
+               return -EINVAL;
+
+       rss = res->rss[rss_idx];
+       if (!rss)
+               return -ENOENT;
+
+       return mlx5e_rss_get_rxfh(rss, indir, key, hfunc);
+}
+
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+                             const u32 *indir, const u8 *key, const u8 *hfunc)
+{
+       struct mlx5e_rss *rss;
+
+       if (rss_idx >= MLX5E_MAX_NUM_RSS)
+               return -EINVAL;
+
+       rss = res->rss[rss_idx];
+       if (!rss)
+               return -ENOENT;
+
+       return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, res->rss_nch);
+}
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+       struct mlx5e_rss *rss = res->rss[0];
+
+       return mlx5e_rss_get_hash_fields(rss, tt);
+}
+
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+                                    u8 rx_hash_fields)
+{
+       struct mlx5e_rss *rss = res->rss[0];
+
+       return mlx5e_rss_set_hash_fields(rss, tt, rx_hash_fields);
+}
+
+int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res)
+{
+       int i, cnt;
+
+       cnt = 0;
+       for (i = 0; i < MLX5E_MAX_NUM_RSS; i++)
+               if (res->rss[i])
+                       cnt++;
+
+       return cnt;
+}
+
+int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss)
+{
+       int i;
+
+       if (!rss)
+               return -EINVAL;
+
+       for (i = 0; i < MLX5E_MAX_NUM_RSS; i++)
+               if (rss == res->rss[i])
+                       return i;
+
+       return -ENOENT;
+}
+
+struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx)
+{
+       if (rss_idx >= MLX5E_MAX_NUM_RSS)
+               return NULL;
+
+       return res->rss[rss_idx];
+}
+
+/* End of API rx_res_rss_* */
+
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
+{
+       return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL);
+}
+
+static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
+                                     const struct mlx5e_lro_param *init_lro_param)
+{
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       struct mlx5e_tir_builder *builder;
+       int err = 0;
+       int ix;
+
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
+               return -ENOMEM;
+
+       res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL);
+       if (!res->channels) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt,
+                                           res->mdev, false, res->drop_rqn);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_direct_rqts;
+               }
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                           inner_ft_support);
+               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               mlx5e_tir_builder_build_direct(builder);
+
+               err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_direct_tirs;
+               }
+
+               mlx5e_tir_builder_clear(builder);
+       }
+
+       if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+               goto out;
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt,
+                                           res->mdev, false, res->drop_rqn);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_xsk_rqts;
+               }
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                           inner_ft_support);
+               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               mlx5e_tir_builder_build_direct(builder);
+
+               err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_xsk_tirs;
+               }
+
+               mlx5e_tir_builder_clear(builder);
+       }
+
+       goto out;
+
+err_destroy_xsk_tirs:
+       while (--ix >= 0)
+               mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
+
+       ix = res->max_nch;
+err_destroy_xsk_rqts:
+       while (--ix >= 0)
+               mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
+
+       ix = res->max_nch;
+err_destroy_direct_tirs:
+       while (--ix >= 0)
+               mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+
+       ix = res->max_nch;
+err_destroy_direct_rqts:
+       while (--ix >= 0)
+               mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+       kvfree(res->channels);
+
+out:
+       mlx5e_tir_builder_free(builder);
+
+       return err;
+}
+
+static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res)
+{
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       struct mlx5e_tir_builder *builder;
+       int err;
+
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
+               return -ENOMEM;
+
+       err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn);
+       if (err)
+               goto out;
+
+       mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                   mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+                                   inner_ft_support);
+       mlx5e_tir_builder_build_direct(builder);
+
+       err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true);
+       if (err)
+               goto err_destroy_ptp_rqt;
+
+       goto out;
+
+err_destroy_ptp_rqt:
+       mlx5e_rqt_destroy(&res->ptp.rqt);
+
+out:
+       mlx5e_tir_builder_free(builder);
+       return err;
+}
+
+static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
+{
+       unsigned int ix;
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+               mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
+               mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
+       }
+
+       kvfree(res->channels);
+}
+
+static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
+{
+       mlx5e_tir_destroy(&res->ptp.tir);
+       mlx5e_rqt_destroy(&res->ptp.rqt);
+}
+
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+                     enum mlx5e_rx_res_features features, unsigned int max_nch,
+                     u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+                     unsigned int init_nch)
+{
+       int err;
+
+       res->mdev = mdev;
+       res->features = features;
+       res->max_nch = max_nch;
+       res->drop_rqn = drop_rqn;
+
+       err = mlx5e_rx_res_rss_init_def(res, init_lro_param, init_nch);
+       if (err)
+               goto err_out;
+
+       err = mlx5e_rx_res_channels_init(res, init_lro_param);
+       if (err)
+               goto err_rss_destroy;
+
+       err = mlx5e_rx_res_ptp_init(res);
+       if (err)
+               goto err_channels_destroy;
+
+       return 0;
+
+err_channels_destroy:
+       mlx5e_rx_res_channels_destroy(res);
+err_rss_destroy:
+       __mlx5e_rx_res_rss_destroy(res, 0);
+err_out:
+       return err;
+}
+
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res)
+{
+       mlx5e_rx_res_ptp_destroy(res);
+       mlx5e_rx_res_channels_destroy(res);
+       mlx5e_rx_res_rss_destroy_all(res);
+}
+
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res)
+{
+       kvfree(res);
+}
+
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK));
+
+       return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+       struct mlx5e_rss *rss = res->rss[0];
+
+       return mlx5e_rss_get_tirn(rss, tt, false);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+       struct mlx5e_rss *rss = res->rss[0];
+
+       return mlx5e_rss_get_tirn(rss, tt, true);
+}
+
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
+{
+       WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP));
+       return mlx5e_tir_get_tirn(&res->ptp.tir);
+}
+
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
+}
+
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+{
+       unsigned int nch, ix;
+       int err;
+
+       nch = mlx5e_channels_get_num(chs);
+
+       for (ix = 0; ix < chs->num; ix++)
+               mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+       res->rss_nch = chs->num;
+
+       mlx5e_rx_res_rss_enable(res);
+
+       for (ix = 0; ix < nch; ix++) {
+               u32 rqn;
+
+               mlx5e_channels_get_regular_rqn(chs, ix, &rqn);
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                      rqn, ix, err);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
+                       rqn = res->drop_rqn;
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                      rqn, ix, err);
+       }
+       for (ix = nch; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                      res->drop_rqn, ix, err);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                      res->drop_rqn, ix, err);
+       }
+
+       if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+               u32 rqn;
+
+               if (mlx5e_channels_get_ptp_rqn(chs, &rqn))
+                       rqn = res->drop_rqn;
+
+               err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+                                      rqn, err);
+       }
+}
+
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
+{
+       unsigned int ix;
+       int err;
+
+       mlx5e_rx_res_rss_disable(res);
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                      res->drop_rqn, ix, err);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                      res->drop_rqn, ix, err);
+       }
+
+       if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+               err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+                                      res->drop_rqn, err);
+       }
+}
+
+int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+                             unsigned int ix)
+{
+       u32 rqn;
+       int err;
+
+       if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
+               return -EINVAL;
+
+       err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
+       if (err)
+               mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n",
+                              mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                              rqn, ix, err);
+       return err;
+}
+
+int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       int err;
+
+       err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+       if (err)
+               mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                              mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                              res->drop_rqn, ix, err);
+       return err;
+}
+
+int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param)
+{
+       struct mlx5e_tir_builder *builder;
+       int err, final_err;
+       unsigned int ix;
+
+       builder = mlx5e_tir_builder_alloc(true);
+       if (!builder)
+               return -ENOMEM;
+
+       mlx5e_tir_builder_build_lro(builder, lro_param);
+
+       final_err = 0;
+
+       for (ix = 0; ix < MLX5E_MAX_NUM_RSS; ix++) {
+               struct mlx5e_rss *rss = res->rss[ix];
+
+               if (!rss)
+                       continue;
+
+               err = mlx5e_rss_lro_set_param(rss, lro_param);
+               if (err)
+                       final_err = final_err ? : err;
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n",
+                                      mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
+                       if (!final_err)
+                               final_err = err;
+               }
+       }
+
+       mlx5e_tir_builder_free(builder);
+       return final_err;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res)
+{
+       return mlx5e_rss_get_hash(res->rss[0]);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
new file mode 100644 (file)
index 0000000..4a15942
--- /dev/null
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_RX_RES_H__
+#define __MLX5_EN_RX_RES_H__
+
+#include <linux/kernel.h>
+#include "rqt.h"
+#include "tir.h"
+#include "fs.h"
+#include "rss.h"
+
+struct mlx5e_rx_res;
+
+struct mlx5e_channels;
+struct mlx5e_rss_params_hash;
+
+enum mlx5e_rx_res_features {
+       MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
+       MLX5E_RX_RES_FEATURE_XSK = BIT(1),
+       MLX5E_RX_RES_FEATURE_PTP = BIT(2),
+};
+
+/* Setup */
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+                     enum mlx5e_rx_res_features features, unsigned int max_nch,
+                     u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+                     unsigned int init_nch);
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
+
+/* TIRN getters for flow steering */
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
+
+/* RQTN getters for modules that create their own TIRs */
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+
+/* Activate/deactivate API */
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
+int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+                             unsigned int ix);
+int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix);
+
+/* Configuration API */
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
+int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+                             u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
+                             const u32 *indir, const u8 *key, const u8 *hfunc);
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+                                    u8 rx_hash_fields);
+int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param);
+
+int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch);
+int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx);
+int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res);
+int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss);
+struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx);
+
+/* Workaround for hairpin */
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res);
+
+#endif /* __MLX5_EN_RX_RES_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
new file mode 100644 (file)
index 0000000..a3e43e8
--- /dev/null
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "en_tc.h"
+#include "post_act.h"
+#include "mlx5_core.h"
+
+struct mlx5e_post_act {
+       enum mlx5_flow_namespace_type ns_type;
+       struct mlx5_fs_chains *chains;
+       struct mlx5_flow_table *ft;
+       struct mlx5e_priv *priv;
+       struct xarray ids;
+};
+
+struct mlx5e_post_act_handle {
+       enum mlx5_flow_namespace_type ns_type;
+       struct mlx5_flow_attr *attr;
+       struct mlx5_flow_handle *rule;
+       u32 id;
+};
+
+#define MLX5_POST_ACTION_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen)
+#define MLX5_POST_ACTION_MAX GENMASK(MLX5_POST_ACTION_BITS - 1, 0)
+#define MLX5_POST_ACTION_MASK MLX5_POST_ACTION_MAX
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+                      enum mlx5_flow_namespace_type ns_type)
+{
+       struct mlx5e_post_act *post_act;
+       int err;
+
+       if (ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+           !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ignore_flow_level)) {
+               mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
+               err = -EOPNOTSUPP;
+               goto err_check;
+       } else if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+               mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
+               err = -EOPNOTSUPP;
+               goto err_check;
+       }
+
+       post_act = kzalloc(sizeof(*post_act), GFP_KERNEL);
+       if (!post_act) {
+               err = -ENOMEM;
+               goto err_check;
+       }
+       post_act->ft = mlx5_chains_create_global_table(chains);
+       if (IS_ERR(post_act->ft)) {
+               err = PTR_ERR(post_act->ft);
+               mlx5_core_warn(priv->mdev, "failed to create post action table, err: %d\n", err);
+               goto err_ft;
+       }
+       post_act->chains = chains;
+       post_act->ns_type = ns_type;
+       post_act->priv = priv;
+       xa_init_flags(&post_act->ids, XA_FLAGS_ALLOC1);
+       return post_act;
+
+err_ft:
+       kfree(post_act);
+err_check:
+       return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act)
+{
+       if (IS_ERR_OR_NULL(post_act))
+               return;
+
+       xa_destroy(&post_act->ids);
+       mlx5_chains_destroy_global_table(post_act->chains, post_act->ft);
+       kfree(post_act);
+}
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr)
+{
+       u32 attr_sz = ns_to_attr_sz(post_act->ns_type);
+       struct mlx5e_post_act_handle *handle = NULL;
+       struct mlx5_flow_attr *post_attr = NULL;
+       struct mlx5_flow_spec *spec = NULL;
+       int err;
+
+       handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       post_attr = mlx5_alloc_flow_attr(post_act->ns_type);
+       if (!handle || !spec || !post_attr) {
+               kfree(post_attr);
+               kvfree(spec);
+               kfree(handle);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       memcpy(post_attr, attr, attr_sz);
+       post_attr->chain = 0;
+       post_attr->prio = 0;
+       post_attr->ft = post_act->ft;
+       post_attr->inner_match_level = MLX5_MATCH_NONE;
+       post_attr->outer_match_level = MLX5_MATCH_NONE;
+       post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
+
+       handle->ns_type = post_act->ns_type;
+       /* Splits were handled before post action */
+       if (handle->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+               post_attr->esw_attr->split_count = 0;
+
+       err = xa_alloc(&post_act->ids, &handle->id, post_attr,
+                      XA_LIMIT(1, MLX5_POST_ACTION_MAX), GFP_KERNEL);
+       if (err)
+               goto err_xarray;
+
+       /* Post action rule matches on fte_id and executes original rule's
+        * tc rule action
+        */
+       mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG,
+                                   handle->id, MLX5_POST_ACTION_MASK);
+
+       handle->rule = mlx5_tc_rule_insert(post_act->priv, spec, post_attr);
+       if (IS_ERR(handle->rule)) {
+               err = PTR_ERR(handle->rule);
+               netdev_warn(post_act->priv->netdev, "Failed to add post action rule");
+               goto err_rule;
+       }
+       handle->attr = post_attr;
+
+       kvfree(spec);
+       return handle;
+
+err_rule:
+       xa_erase(&post_act->ids, handle->id);
+err_xarray:
+       kfree(post_attr);
+       kvfree(spec);
+       kfree(handle);
+       return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle)
+{
+       mlx5_tc_rule_delete(post_act->priv, handle->rule, handle->attr);
+       xa_erase(&post_act->ids, handle->id);
+       kfree(handle->attr);
+       kfree(handle);
+}
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act)
+{
+       return post_act->ft;
+}
+
+/* Allocate a header modify action to write the post action handle fte id to a register. */
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+                            struct mlx5e_post_act_handle *handle,
+                            struct mlx5e_tc_mod_hdr_acts *acts)
+{
+       return mlx5e_tc_match_to_reg_set(dev, acts, handle->ns_type, FTEID_TO_REG, handle->id);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
new file mode 100644 (file)
index 0000000..b530ec1
--- /dev/null
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_POST_ACTION_H__
+#define __MLX5_POST_ACTION_H__
+
+#include "en.h"
+#include "lib/fs_chains.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_priv;
+struct mlx5e_tc_mod_hdr_acts;
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+                      enum mlx5_flow_namespace_type ns_type);
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act);
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle);
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act);
+
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+                            struct mlx5e_post_act_handle *handle,
+                            struct mlx5e_tc_mod_hdr_acts *acts);
+
+#endif /* __MLX5_POST_ACTION_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
new file mode 100644 (file)
index 0000000..6552ece
--- /dev/null
@@ -0,0 +1,714 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/skbuff.h>
+#include <net/psample.h>
+#include "en/mapping.h"
+#include "en/tc/post_act.h"
+#include "sample.h"
+#include "eswitch.h"
+#include "en_tc.h"
+#include "fs_core.h"
+
+#define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024)
+
+static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = {
+       .max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE,
+       .max_num_groups = 0,    /* default num of groups */
+       .flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP,
+};
+
+struct mlx5e_tc_psample {
+       struct mlx5_eswitch *esw;
+       struct mlx5_flow_table *termtbl;
+       struct mlx5_flow_handle *termtbl_rule;
+       DECLARE_HASHTABLE(hashtbl, 8);
+       struct mutex ht_lock; /* protect hashtbl */
+       DECLARE_HASHTABLE(restore_hashtbl, 8);
+       struct mutex restore_lock; /* protect restore_hashtbl */
+       struct mlx5e_post_act *post_act;
+};
+
+struct mlx5e_sampler {
+       struct hlist_node hlist;
+       u32 sampler_id;
+       u32 sample_ratio;
+       u32 sample_table_id;
+       u32 default_table_id;
+       int count;
+};
+
+struct mlx5e_sample_flow {
+       struct mlx5e_sampler *sampler;
+       struct mlx5e_sample_restore *restore;
+       struct mlx5_flow_attr *pre_attr;
+       struct mlx5_flow_handle *pre_rule;
+       struct mlx5_flow_attr *post_attr;
+       struct mlx5_flow_handle *post_rule;
+       struct mlx5e_post_act_handle *post_act_handle;
+};
+
+struct mlx5e_sample_restore {
+       struct hlist_node hlist;
+       struct mlx5_modify_hdr *modify_hdr;
+       struct mlx5_flow_handle *rule;
+       struct mlx5e_post_act_handle *post_act_handle;
+       u32 obj_id;
+       int count;
+};
+
+static int
+sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample)
+{
+       struct mlx5_eswitch *esw = tc_psample->esw;
+       struct mlx5_flow_table_attr ft_attr = {};
+       struct mlx5_flow_destination dest = {};
+       struct mlx5_core_dev *dev = esw->dev;
+       struct mlx5_flow_namespace *root_ns;
+       struct mlx5_flow_act act = {};
+       int err;
+
+       if (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, termination_table))  {
+               mlx5_core_warn(dev, "termination table is not supported\n");
+               return -EOPNOTSUPP;
+       }
+
+       root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+       if (!root_ns) {
+               mlx5_core_warn(dev, "failed to get FDB flow namespace\n");
+               return -EOPNOTSUPP;
+       }
+
+       ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED;
+       ft_attr.autogroup.max_num_groups = 1;
+       ft_attr.prio = FDB_SLOW_PATH;
+       ft_attr.max_fte = 1;
+       ft_attr.level = 1;
+       tc_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
+       if (IS_ERR(tc_psample->termtbl)) {
+               err = PTR_ERR(tc_psample->termtbl);
+               mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err);
+               return err;
+       }
+
+       act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+       dest.vport.num = esw->manager_vport;
+       tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1);
+       if (IS_ERR(tc_psample->termtbl_rule)) {
+               err = PTR_ERR(tc_psample->termtbl_rule);
+               mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err);
+               mlx5_destroy_flow_table(tc_psample->termtbl);
+               return err;
+       }
+
+       return 0;
+}
+
+static void
+sampler_termtbl_destroy(struct mlx5e_tc_psample *tc_psample)
+{
+       mlx5_del_flow_rules(tc_psample->termtbl_rule);
+       mlx5_destroy_flow_table(tc_psample->termtbl);
+}
+
+static int
+sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5e_sampler *sampler)
+{
+       u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {};
+       u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+       u64 general_obj_types;
+       void *obj;
+       int err;
+
+       general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+       if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER))
+               return -EOPNOTSUPP;
+       if (!MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))
+               return -EOPNOTSUPP;
+
+       obj = MLX5_ADDR_OF(create_sampler_obj_in, in, sampler_object);
+       MLX5_SET(sampler_obj, obj, table_type, FS_FT_FDB);
+       MLX5_SET(sampler_obj, obj, ignore_flow_level, 1);
+       MLX5_SET(sampler_obj, obj, level, 1);
+       MLX5_SET(sampler_obj, obj, sample_ratio, sampler->sample_ratio);
+       MLX5_SET(sampler_obj, obj, sample_table_id, sampler->sample_table_id);
+       MLX5_SET(sampler_obj, obj, default_table_id, sampler->default_table_id);
+       MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+       MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+
+       err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+       if (!err)
+               sampler->sampler_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+       return err;
+}
+
+static void
+sampler_obj_destroy(struct mlx5_core_dev *mdev, u32 sampler_id)
+{
+       u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+       u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+       MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+       MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+       MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id);
+
+       mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static u32
+sampler_hash(u32 sample_ratio, u32 default_table_id)
+{
+       return jhash_2words(sample_ratio, default_table_id, 0);
+}
+
+static int
+sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 default_table_id2)
+{
+       return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2;
+}
+
+static struct mlx5e_sampler *
+sampler_get(struct mlx5e_tc_psample *tc_psample, u32 sample_ratio, u32 default_table_id)
+{
+       struct mlx5e_sampler *sampler;
+       u32 hash_key;
+       int err;
+
+       mutex_lock(&tc_psample->ht_lock);
+       hash_key = sampler_hash(sample_ratio, default_table_id);
+       hash_for_each_possible(tc_psample->hashtbl, sampler, hlist, hash_key)
+               if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id,
+                                sample_ratio, default_table_id))
+                       goto add_ref;
+
+       sampler = kzalloc(sizeof(*sampler), GFP_KERNEL);
+       if (!sampler) {
+               err = -ENOMEM;
+               goto err_alloc;
+       }
+
+       sampler->sample_table_id = tc_psample->termtbl->id;
+       sampler->default_table_id = default_table_id;
+       sampler->sample_ratio = sample_ratio;
+
+       err = sampler_obj_create(tc_psample->esw->dev, sampler);
+       if (err)
+               goto err_create;
+
+       hash_add(tc_psample->hashtbl, &sampler->hlist, hash_key);
+
+add_ref:
+       sampler->count++;
+       mutex_unlock(&tc_psample->ht_lock);
+       return sampler;
+
+err_create:
+       kfree(sampler);
+err_alloc:
+       mutex_unlock(&tc_psample->ht_lock);
+       return ERR_PTR(err);
+}
+
+static void
+sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler)
+{
+       mutex_lock(&tc_psample->ht_lock);
+       if (--sampler->count == 0) {
+               hash_del(&sampler->hlist);
+               sampler_obj_destroy(tc_psample->esw->dev, sampler->sampler_id);
+               kfree(sampler);
+       }
+       mutex_unlock(&tc_psample->ht_lock);
+}
+
+/* obj_id is used to restore the sample parameters.
+ * Set fte_id in original flow table, then match it in the default table.
+ * Only set it for NICs can preserve reg_c or decap action. For other cases,
+ * use the same match in the default table.
+ * Use one header rewrite for both obj_id and fte_id.
+ */
+static struct mlx5_modify_hdr *
+sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id,
+                     struct mlx5e_post_act_handle *handle)
+{
+       struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+       struct mlx5_modify_hdr *modify_hdr;
+       int err;
+
+       err = mlx5e_tc_match_to_reg_set(mdev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+                                       CHAIN_TO_REG, obj_id);
+       if (err)
+               goto err_set_regc0;
+
+       if (handle) {
+               err = mlx5e_tc_post_act_set_handle(mdev, handle, &mod_acts);
+               if (err)
+                       goto err_post_act;
+       }
+
+       modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
+                                             mod_acts.num_actions,
+                                             mod_acts.actions);
+       if (IS_ERR(modify_hdr)) {
+               err = PTR_ERR(modify_hdr);
+               goto err_modify_hdr;
+       }
+
+       dealloc_mod_hdr_actions(&mod_acts);
+       return modify_hdr;
+
+err_modify_hdr:
+err_post_act:
+       dealloc_mod_hdr_actions(&mod_acts);
+err_set_regc0:
+       return ERR_PTR(err);
+}
+
+static u32
+restore_hash(u32 obj_id, struct mlx5e_post_act_handle *post_act_handle)
+{
+       return jhash_2words(obj_id, hash32_ptr(post_act_handle), 0);
+}
+
+static bool
+restore_equal(struct mlx5e_sample_restore *restore, u32 obj_id,
+             struct mlx5e_post_act_handle *post_act_handle)
+{
+       return restore->obj_id == obj_id && restore->post_act_handle == post_act_handle;
+}
+
+static struct mlx5e_sample_restore *
+sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
+                  struct mlx5e_post_act_handle *post_act_handle)
+{
+       struct mlx5_eswitch *esw = tc_psample->esw;
+       struct mlx5_core_dev *mdev = esw->dev;
+       struct mlx5e_sample_restore *restore;
+       struct mlx5_modify_hdr *modify_hdr;
+       u32 hash_key;
+       int err;
+
+       mutex_lock(&tc_psample->restore_lock);
+       hash_key = restore_hash(obj_id, post_act_handle);
+       hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, hash_key)
+               if (restore_equal(restore, obj_id, post_act_handle))
+                       goto add_ref;
+
+       restore = kzalloc(sizeof(*restore), GFP_KERNEL);
+       if (!restore) {
+               err = -ENOMEM;
+               goto err_alloc;
+       }
+       restore->obj_id = obj_id;
+       restore->post_act_handle = post_act_handle;
+
+       modify_hdr = sample_modify_hdr_get(mdev, obj_id, post_act_handle);
+       if (IS_ERR(modify_hdr)) {
+               err = PTR_ERR(modify_hdr);
+               goto err_modify_hdr;
+       }
+       restore->modify_hdr = modify_hdr;
+
+       restore->rule = esw_add_restore_rule(esw, obj_id);
+       if (IS_ERR(restore->rule)) {
+               err = PTR_ERR(restore->rule);
+               goto err_restore;
+       }
+
+       hash_add(tc_psample->restore_hashtbl, &restore->hlist, hash_key);
+add_ref:
+       restore->count++;
+       mutex_unlock(&tc_psample->restore_lock);
+       return restore;
+
+err_restore:
+       mlx5_modify_header_dealloc(mdev, restore->modify_hdr);
+err_modify_hdr:
+       kfree(restore);
+err_alloc:
+       mutex_unlock(&tc_psample->restore_lock);
+       return ERR_PTR(err);
+}
+
+static void
+sample_restore_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sample_restore *restore)
+{
+       mutex_lock(&tc_psample->restore_lock);
+       if (--restore->count == 0)
+               hash_del(&restore->hlist);
+       mutex_unlock(&tc_psample->restore_lock);
+
+       if (!restore->count) {
+               mlx5_del_flow_rules(restore->rule);
+               mlx5_modify_header_dealloc(tc_psample->esw->dev, restore->modify_hdr);
+               kfree(restore);
+       }
+}
+
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj)
+{
+       u32 trunc_size = mapped_obj->sample.trunc_size;
+       struct psample_group psample_group = {};
+       struct psample_metadata md = {};
+
+       md.trunc_size = trunc_size ? min(trunc_size, skb->len) : skb->len;
+       md.in_ifindex = skb->dev->ifindex;
+       psample_group.group_num = mapped_obj->sample.group_id;
+       psample_group.net = &init_net;
+       skb_push(skb, skb->mac_len);
+
+       psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md);
+}
+
+static int
+add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+             struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr,
+             u32 *default_tbl_id)
+{
+       struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+       u32 attr_sz = ns_to_attr_sz(MLX5_FLOW_NAMESPACE_FDB);
+       struct mlx5_vport_tbl_attr per_vport_tbl_attr;
+       struct mlx5_flow_table *default_tbl;
+       struct mlx5_flow_attr *post_attr;
+       int err;
+
+       /* Allocate default table per vport, chain and prio. Otherwise, there is
+        * only one default table for the same sampler object. Rules with different
+        * prio and chain may overlap. For CT sample action, per vport default
+        * table is needed to resotre the metadata.
+        */
+       per_vport_tbl_attr.chain = attr->chain;
+       per_vport_tbl_attr.prio = attr->prio;
+       per_vport_tbl_attr.vport = esw_attr->in_rep->vport;
+       per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+       default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr);
+       if (IS_ERR(default_tbl)) {
+               err = PTR_ERR(default_tbl);
+               goto err_default_tbl;
+       }
+       *default_tbl_id = default_tbl->id;
+
+       post_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+       if (!post_attr) {
+               err = -ENOMEM;
+               goto err_attr;
+       }
+       sample_flow->post_attr = post_attr;
+       memcpy(post_attr, attr, attr_sz);
+       /* Perform the original matches on the default table.
+        * Offload all actions except the sample action.
+        */
+       post_attr->chain = 0;
+       post_attr->prio = 0;
+       post_attr->ft = default_tbl;
+       post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+
+       /* When offloading sample and encap action, if there is no valid
+        * neigh data struct, a slow path rule is offloaded first. Source
+        * port metadata match is set at that time. A per vport table is
+        * already allocated. No need to match it again. So clear the source
+        * port metadata match.
+        */
+       mlx5_eswitch_clear_rule_source_port(esw, spec);
+       sample_flow->post_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, post_attr);
+       if (IS_ERR(sample_flow->post_rule)) {
+               err = PTR_ERR(sample_flow->post_rule);
+               goto err_rule;
+       }
+       return 0;
+
+err_rule:
+       kfree(post_attr);
+err_attr:
+       mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr);
+err_default_tbl:
+       return err;
+}
+
+static void
+del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+             struct mlx5_flow_attr *attr)
+{
+       struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+       struct mlx5_vport_tbl_attr tbl_attr;
+
+       mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, sample_flow->post_attr);
+       kfree(sample_flow->post_attr);
+       tbl_attr.chain = attr->chain;
+       tbl_attr.prio = attr->prio;
+       tbl_attr.vport = esw_attr->in_rep->vport;
+       tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+       mlx5_esw_vporttbl_put(esw, &tbl_attr);
+}
+
+/* For the following typical flow table:
+ *
+ * +-------------------------------+
+ * +       original flow table     +
+ * +-------------------------------+
+ * +         original match        +
+ * +-------------------------------+
+ * + sample action + other actions +
+ * +-------------------------------+
+ *
+ * We translate the tc filter with sample action to the following HW model:
+ *
+ *         +---------------------+
+ *         + original flow table +
+ *         +---------------------+
+ *         +   original match    +
+ *         +---------------------+
+ *               | set fte_id (if reg_c preserve cap)
+ *               | do decap (if required)
+ *               v
+ * +------------------------------------------------+
+ * +                Flow Sampler Object             +
+ * +------------------------------------------------+
+ * +                    sample ratio                +
+ * +------------------------------------------------+
+ * +    sample table id    |    default table id    +
+ * +------------------------------------------------+
+ *            |                            |
+ *            v                            v
+ * +-----------------------------+  +-------------------+
+ * +        sample table         +  +   default table   +
+ * +-----------------------------+  +-------------------+
+ * + forward to management vport +             |
+ * +-----------------------------+             |
+ *                                     +-------+------+
+ *                                     |              |reg_c preserve cap
+ *                                     |              |or decap action
+ *                                     v              v
+ *                        +-----------------+   +-------------+
+ *                        + per vport table +   + post action +
+ *                        +-----------------+   +-------------+
+ *                        + original match  +
+ *                        +-----------------+
+ *                        + other actions   +
+ *                        +-----------------+
+ */
+struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
+                       struct mlx5_flow_spec *spec,
+                       struct mlx5_flow_attr *attr,
+                       u32 tunnel_id)
+{
+       struct mlx5e_post_act_handle *post_act_handle = NULL;
+       struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+       struct mlx5_esw_flow_attr *pre_esw_attr;
+       struct mlx5_mapped_obj restore_obj = {};
+       struct mlx5e_sample_flow *sample_flow;
+       struct mlx5e_sample_attr *sample_attr;
+       struct mlx5_flow_attr *pre_attr;
+       struct mlx5_eswitch *esw;
+       u32 default_tbl_id;
+       u32 obj_id;
+       int err;
+
+       if (IS_ERR_OR_NULL(tc_psample))
+               return ERR_PTR(-EOPNOTSUPP);
+
+       /* If slow path flag is set, eg. when the neigh is invalid for encap,
+        * don't offload sample action.
+        */
+       esw = tc_psample->esw;
+       if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
+               return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+
+       sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
+       if (!sample_flow)
+               return ERR_PTR(-ENOMEM);
+       sample_attr = attr->sample_attr;
+       sample_attr->sample_flow = sample_flow;
+
+       /* For NICs with reg_c_preserve support or decap action, use
+        * post action instead of the per vport, chain and prio table.
+        * Only match the fte id instead of the same match in the
+        * original flow table.
+        */
+       if (MLX5_CAP_GEN(esw->dev, reg_c_preserve) ||
+           attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+               struct mlx5_flow_table *ft;
+
+               ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act);
+               default_tbl_id = ft->id;
+               post_act_handle = mlx5e_tc_post_act_add(tc_psample->post_act, attr);
+               if (IS_ERR(post_act_handle)) {
+                       err = PTR_ERR(post_act_handle);
+                       goto err_post_act;
+               }
+               sample_flow->post_act_handle = post_act_handle;
+       } else {
+               err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id);
+               if (err)
+                       goto err_post_rule;
+       }
+
+       /* Create sampler object. */
+       sample_flow->sampler = sampler_get(tc_psample, sample_attr->rate, default_tbl_id);
+       if (IS_ERR(sample_flow->sampler)) {
+               err = PTR_ERR(sample_flow->sampler);
+               goto err_sampler;
+       }
+
+       /* Create an id mapping reg_c0 value to sample object. */
+       restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
+       restore_obj.sample.group_id = sample_attr->group_num;
+       restore_obj.sample.rate = sample_attr->rate;
+       restore_obj.sample.trunc_size = sample_attr->trunc_size;
+       restore_obj.sample.tunnel_id = tunnel_id;
+       err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id);
+       if (err)
+               goto err_obj_id;
+       sample_attr->restore_obj_id = obj_id;
+
+       /* Create sample restore context. */
+       sample_flow->restore = sample_restore_get(tc_psample, obj_id, post_act_handle);
+       if (IS_ERR(sample_flow->restore)) {
+               err = PTR_ERR(sample_flow->restore);
+               goto err_sample_restore;
+       }
+
+       /* Perform the original matches on the original table. Offload the
+        * sample action. The destination is the sampler object.
+        */
+       pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+       if (!pre_attr) {
+               err = -ENOMEM;
+               goto err_alloc_pre_flow_attr;
+       }
+       pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+       /* For decap action, do decap in the original flow table instead of the
+        * default flow table.
+        */
+       if (tunnel_id)
+               pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+       pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
+       pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE;
+       pre_attr->inner_match_level = attr->inner_match_level;
+       pre_attr->outer_match_level = attr->outer_match_level;
+       pre_attr->chain = attr->chain;
+       pre_attr->prio = attr->prio;
+       pre_attr->sample_attr = attr->sample_attr;
+       sample_attr->sampler_id = sample_flow->sampler->sampler_id;
+       pre_esw_attr = pre_attr->esw_attr;
+       pre_esw_attr->in_mdev = esw_attr->in_mdev;
+       pre_esw_attr->in_rep = esw_attr->in_rep;
+       sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr);
+       if (IS_ERR(sample_flow->pre_rule)) {
+               err = PTR_ERR(sample_flow->pre_rule);
+               goto err_pre_offload_rule;
+       }
+       sample_flow->pre_attr = pre_attr;
+
+       return sample_flow->post_rule;
+
+err_pre_offload_rule:
+       kfree(pre_attr);
+err_alloc_pre_flow_attr:
+       sample_restore_put(tc_psample, sample_flow->restore);
+err_sample_restore:
+       mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id);
+err_obj_id:
+       sampler_put(tc_psample, sample_flow->sampler);
+err_sampler:
+       if (!post_act_handle)
+               del_post_rule(esw, sample_flow, attr);
+err_post_rule:
+       if (post_act_handle)
+               mlx5e_tc_post_act_del(tc_psample->post_act, post_act_handle);
+err_post_act:
+       kfree(sample_flow);
+       return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
+                         struct mlx5_flow_handle *rule,
+                         struct mlx5_flow_attr *attr)
+{
+       struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+       struct mlx5e_sample_flow *sample_flow;
+       struct mlx5_vport_tbl_attr tbl_attr;
+       struct mlx5_eswitch *esw;
+
+       if (IS_ERR_OR_NULL(tc_psample))
+               return;
+
+       /* If slow path flag is set, sample action is not offloaded.
+        * No need to delete sample rule.
+        */
+       esw = tc_psample->esw;
+       if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
+               mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+               return;
+       }
+
+       /* The following delete order can't be changed, otherwise,
+        * will hit fw syndromes.
+        */
+       sample_flow = attr->sample_attr->sample_flow;
+       mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr);
+       if (!sample_flow->post_act_handle)
+               mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule,
+                                               sample_flow->post_attr);
+
+       sample_restore_put(tc_psample, sample_flow->restore);
+       mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id);
+       sampler_put(tc_psample, sample_flow->sampler);
+       if (sample_flow->post_act_handle) {
+               mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle);
+       } else {
+               tbl_attr.chain = attr->chain;
+               tbl_attr.prio = attr->prio;
+               tbl_attr.vport = esw_attr->in_rep->vport;
+               tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+               mlx5_esw_vporttbl_put(esw, &tbl_attr);
+               kfree(sample_flow->post_attr);
+       }
+
+       kfree(sample_flow->pre_attr);
+       kfree(sample_flow);
+}
+
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
+{
+       struct mlx5e_tc_psample *tc_psample;
+       int err;
+
+       tc_psample = kzalloc(sizeof(*tc_psample), GFP_KERNEL);
+       if (!tc_psample)
+               return ERR_PTR(-ENOMEM);
+       if (IS_ERR_OR_NULL(post_act)) {
+               err = PTR_ERR(post_act);
+               goto err_post_act;
+       }
+       tc_psample->post_act = post_act;
+       tc_psample->esw = esw;
+       err = sampler_termtbl_create(tc_psample);
+       if (err)
+               goto err_post_act;
+
+       mutex_init(&tc_psample->ht_lock);
+       mutex_init(&tc_psample->restore_lock);
+
+       return tc_psample;
+
+err_post_act:
+       kfree(tc_psample);
+       return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample)
+{
+       if (IS_ERR_OR_NULL(tc_psample))
+               return;
+
+       mutex_destroy(&tc_psample->restore_lock);
+       mutex_destroy(&tc_psample->ht_lock);
+       sampler_termtbl_destroy(tc_psample);
+       kfree(tc_psample);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
new file mode 100644 (file)
index 0000000..db0146d
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_SAMPLE_H__
+#define __MLX5_EN_TC_SAMPLE_H__
+
+#include "eswitch.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_tc_psample;
+struct mlx5e_post_act;
+
+struct mlx5e_sample_attr {
+       u32 group_num;
+       u32 rate;
+       u32 trunc_size;
+       u32 restore_obj_id;
+       u32 sampler_id;
+       struct mlx5e_sample_flow *sample_flow;
+};
+
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
+
+struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv,
+                       struct mlx5_flow_spec *spec,
+                       struct mlx5_flow_attr *attr,
+                       u32 tunnel_id);
+
+void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv,
+                         struct mlx5_flow_handle *rule,
+                         struct mlx5_flow_attr *attr);
+
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act);
+
+void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
+
+#endif /* __MLX5_EN_TC_SAMPLE_H__ */
index 91e7a01..6c949ab 100644 (file)
@@ -19,6 +19,7 @@
 #include "en/tc_ct.h"
 #include "en/mod_hdr.h"
 #include "en/mapping.h"
+#include "en/tc/post_act.h"
 #include "en.h"
 #include "en_tc.h"
 #include "en_rep.h"
 #define MLX5_CT_STATE_RELATED_BIT BIT(5)
 #define MLX5_CT_STATE_INVALID_BIT BIT(6)
 
-#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen)
-#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
-#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
-
 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
 
@@ -46,14 +43,13 @@ struct mlx5_tc_ct_priv {
        struct mlx5_core_dev *dev;
        const struct net_device *netdev;
        struct mod_hdr_tbl *mod_hdr_tbl;
-       struct idr fte_ids;
        struct xarray tuple_ids;
        struct rhashtable zone_ht;
        struct rhashtable ct_tuples_ht;
        struct rhashtable ct_tuples_nat_ht;
        struct mlx5_flow_table *ct;
        struct mlx5_flow_table *ct_nat;
-       struct mlx5_flow_table *post_ct;
+       struct mlx5e_post_act *post_act;
        struct mutex control_lock; /* guards parallel adds/dels */
        struct mapping_ctx *zone_mapping;
        struct mapping_ctx *labels_mapping;
@@ -64,11 +60,9 @@ struct mlx5_tc_ct_priv {
 
 struct mlx5_ct_flow {
        struct mlx5_flow_attr *pre_ct_attr;
-       struct mlx5_flow_attr *post_ct_attr;
        struct mlx5_flow_handle *pre_ct_rule;
-       struct mlx5_flow_handle *post_ct_rule;
+       struct mlx5e_post_act_handle *post_act_handle;
        struct mlx5_ct_ft *ft;
-       u32 fte_id;
        u32 chain_mapping;
 };
 
@@ -768,7 +762,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
        attr->dest_chain = 0;
-       attr->dest_ft = ct_priv->post_ct;
+       attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
        attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
        attr->outer_match_level = MLX5_MATCH_L4;
        attr->counter = entry->counter->counter;
@@ -1432,7 +1426,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
                ctstate |= MLX5_CT_STATE_NAT_BIT;
        mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
 
-       dest.ft = ct_priv->post_ct;
+       dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
        rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -1716,9 +1710,9 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
  *      | do decap
  *      v
  * +---------------------+
- * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
- * + zone+nat match      +---------------->+ post_ct (see below) +
- * +---------------------+  set zone       +---------------------+
+ * + pre_ct/pre_ct_nat   +  if matches     +-------------------------+
+ * + zone+nat match      +---------------->+ post_act (see below) +
+ * +---------------------+  set zone       +-------------------------+
  *      | set zone
  *      v
  * +--------------------+
@@ -1732,7 +1726,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
  *      | do nat (if needed)
  *      v
  * +--------------+
- * + post_ct      + original filter actions
+ * + post_act  + original filter actions
  * + fte_id match +------------------------>
  * +--------------+
  */
@@ -1746,19 +1740,15 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
        struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
        u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
-       struct mlx5_flow_spec *post_ct_spec = NULL;
+       struct mlx5e_post_act_handle *handle;
        struct mlx5_flow_attr *pre_ct_attr;
        struct mlx5_modify_hdr *mod_hdr;
-       struct mlx5_flow_handle *rule;
        struct mlx5_ct_flow *ct_flow;
        int chain_mapping = 0, err;
        struct mlx5_ct_ft *ft;
-       u32 fte_id = 1;
 
-       post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
        ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
-       if (!post_ct_spec || !ct_flow) {
-               kvfree(post_ct_spec);
+       if (!ct_flow) {
                kfree(ct_flow);
                return ERR_PTR(-ENOMEM);
        }
@@ -1773,14 +1763,13 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
        }
        ct_flow->ft = ft;
 
-       err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
-                           MLX5_FTE_ID_MAX, GFP_KERNEL);
-       if (err) {
-               netdev_warn(priv->netdev,
-                           "Failed to allocate fte id, err: %d\n", err);
-               goto err_idr;
+       handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr);
+       if (IS_ERR(handle)) {
+               err = PTR_ERR(handle);
+               ct_dbg("Failed to allocate post action handle");
+               goto err_post_act_handle;
        }
-       ct_flow->fte_id = fte_id;
+       ct_flow->post_act_handle = handle;
 
        /* Base flow attributes of both rules on original rule attribute */
        ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
@@ -1789,15 +1778,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
                goto err_alloc_pre;
        }
 
-       ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
-       if (!ct_flow->post_ct_attr) {
-               err = -ENOMEM;
-               goto err_alloc_post;
-       }
-
        pre_ct_attr = ct_flow->pre_ct_attr;
        memcpy(pre_ct_attr, attr, attr_sz);
-       memcpy(ct_flow->post_ct_attr, attr, attr_sz);
 
        /* Modify the original rule's action to fwd and modify, leave decap */
        pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
@@ -1823,10 +1805,9 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
                goto err_mapping;
        }
 
-       err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
-                                       FTEID_TO_REG, fte_id);
+       err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts);
        if (err) {
-               ct_dbg("Failed to set fte_id register mapping");
+               ct_dbg("Failed to set post action handle");
                goto err_mapping;
        }
 
@@ -1857,33 +1838,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
        }
        pre_ct_attr->modify_hdr = mod_hdr;
 
-       /* Post ct rule matches on fte_id and executes original rule's
-        * tc rule action
-        */
-       mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
-                                   fte_id, MLX5_FTE_ID_MASK);
-
-       /* Put post_ct rule on post_ct flow table */
-       ct_flow->post_ct_attr->chain = 0;
-       ct_flow->post_ct_attr->prio = 0;
-       ct_flow->post_ct_attr->ft = ct_priv->post_ct;
-
-       /* Splits were handled before CT */
-       if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
-               ct_flow->post_ct_attr->esw_attr->split_count = 0;
-
-       ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
-       ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
-       ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
-       rule = mlx5_tc_rule_insert(priv, post_ct_spec,
-                                  ct_flow->post_ct_attr);
-       ct_flow->post_ct_rule = rule;
-       if (IS_ERR(ct_flow->post_ct_rule)) {
-               err = PTR_ERR(ct_flow->post_ct_rule);
-               ct_dbg("Failed to add post ct rule");
-               goto err_insert_post_ct;
-       }
-
        /* Change original rule point to ct table */
        pre_ct_attr->dest_chain = 0;
        pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
@@ -1897,28 +1851,21 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
 
        attr->ct_attr.ct_flow = ct_flow;
        dealloc_mod_hdr_actions(&pre_mod_acts);
-       kvfree(post_ct_spec);
 
-       return rule;
+       return ct_flow->pre_ct_rule;
 
 err_insert_orig:
-       mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
-                           ct_flow->post_ct_attr);
-err_insert_post_ct:
        mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
 err_mapping:
        dealloc_mod_hdr_actions(&pre_mod_acts);
        mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
 err_get_chain:
-       kfree(ct_flow->post_ct_attr);
-err_alloc_post:
        kfree(ct_flow->pre_ct_attr);
 err_alloc_pre:
-       idr_remove(&ct_priv->fte_ids, fte_id);
-err_idr:
+       mlx5e_tc_post_act_del(ct_priv->post_act, handle);
+err_post_act_handle:
        mlx5_tc_ct_del_ft_cb(ct_priv, ft);
 err_ft:
-       kvfree(post_ct_spec);
        kfree(ct_flow);
        netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
        return ERR_PTR(err);
@@ -2029,16 +1976,13 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
                            pre_ct_attr);
        mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
 
-       if (ct_flow->post_ct_rule) {
-               mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
-                                   ct_flow->post_ct_attr);
+       if (ct_flow->post_act_handle) {
                mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
-               idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
+               mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle);
                mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
        }
 
        kfree(ct_flow->pre_ct_attr);
-       kfree(ct_flow->post_ct_attr);
        kfree(ct_flow);
 }
 
@@ -2064,11 +2008,6 @@ static int
 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
                                  const char **err_msg)
 {
-       if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
-               *err_msg = "firmware level support is missing";
-               return -EOPNOTSUPP;
-       }
-
        if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
                /* vlan workaround should be avoided for multi chain rules.
                 * This is just a sanity check as pop vlan action should
@@ -2097,21 +2036,10 @@ mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
        return 0;
 }
 
-static int
-mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
-                                 const char **err_msg)
-{
-       if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
-               *err_msg = "firmware level support is missing";
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
 static int
 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
                              enum mlx5_flow_namespace_type ns_type,
+                             struct mlx5e_post_act *post_act,
                              const char **err_msg)
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -2122,10 +2050,14 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
        *err_msg = "tc skb extension missing";
        return -EOPNOTSUPP;
 #endif
+       if (IS_ERR_OR_NULL(post_act)) {
+               *err_msg = "tc ct offload not supported, post action is missing";
+               return -EOPNOTSUPP;
+       }
+
        if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
                return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
-       else
-               return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
+       return 0;
 }
 
 #define INIT_ERR_PREFIX "tc ct offload init failed"
@@ -2133,19 +2065,19 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
 struct mlx5_tc_ct_priv *
 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
                struct mod_hdr_tbl *mod_hdr,
-               enum mlx5_flow_namespace_type ns_type)
+               enum mlx5_flow_namespace_type ns_type,
+               struct mlx5e_post_act *post_act)
 {
        struct mlx5_tc_ct_priv *ct_priv;
        struct mlx5_core_dev *dev;
        const char *msg;
+       u64 mapping_id;
        int err;
 
        dev = priv->mdev;
-       err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
+       err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act, &msg);
        if (err) {
-               mlx5_core_warn(dev,
-                              "tc ct offload not supported, %s\n",
-                              msg);
+               mlx5_core_warn(dev, "tc ct offload not supported, %s\n", msg);
                goto err_support;
        }
 
@@ -2153,13 +2085,17 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
        if (!ct_priv)
                goto err_alloc;
 
-       ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
+       mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+       ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
+                                                     sizeof(u16), 0, true);
        if (IS_ERR(ct_priv->zone_mapping)) {
                err = PTR_ERR(ct_priv->zone_mapping);
                goto err_mapping_zone;
        }
 
-       ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
+       ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
+                                                       sizeof(u32) * 4, 0, true);
        if (IS_ERR(ct_priv->labels_mapping)) {
                err = PTR_ERR(ct_priv->labels_mapping);
                goto err_mapping_labels;
@@ -2189,16 +2125,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
                goto err_ct_nat_tbl;
        }
 
-       ct_priv->post_ct = mlx5_chains_create_global_table(chains);
-       if (IS_ERR(ct_priv->post_ct)) {
-               err = PTR_ERR(ct_priv->post_ct);
-               mlx5_core_warn(dev,
-                              "%s, failed to create post ct table err: %d\n",
-                              INIT_ERR_PREFIX, err);
-               goto err_post_ct_tbl;
-       }
-
-       idr_init(&ct_priv->fte_ids);
+       ct_priv->post_act = post_act;
        mutex_init(&ct_priv->control_lock);
        rhashtable_init(&ct_priv->zone_ht, &zone_params);
        rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
@@ -2206,8 +2133,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
 
        return ct_priv;
 
-err_post_ct_tbl:
-       mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
 err_ct_nat_tbl:
        mlx5_chains_destroy_global_table(chains, ct_priv->ct);
 err_ct_tbl:
@@ -2232,7 +2157,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
 
        chains = ct_priv->chains;
 
-       mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
        mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
        mlx5_chains_destroy_global_table(chains, ct_priv->ct);
        mapping_destroy(ct_priv->zone_mapping);
@@ -2242,7 +2166,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
        rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
        rhashtable_destroy(&ct_priv->zone_ht);
        mutex_destroy(&ct_priv->control_lock);
-       idr_destroy(&ct_priv->fte_ids);
        kfree(ct_priv);
 }
 
index 644cf16..363329f 100644 (file)
@@ -92,7 +92,8 @@ struct mlx5_ct_attr {
 struct mlx5_tc_ct_priv *
 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
                struct mod_hdr_tbl *mod_hdr,
-               enum mlx5_flow_namespace_type ns_type);
+               enum mlx5_flow_namespace_type ns_type,
+               struct mlx5e_post_act *post_act);
 void
 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv);
 
@@ -132,7 +133,8 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
 static inline struct mlx5_tc_ct_priv *
 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
                struct mod_hdr_tbl *mod_hdr,
-               enum mlx5_flow_namespace_type ns_type)
+               enum mlx5_flow_namespace_type ns_type,
+               struct mlx5e_post_act *post_act)
 {
        return NULL;
 }
index 1e2d117..b4e9868 100644 (file)
@@ -525,7 +525,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
        e->out_dev = attr.out_dev;
        e->route_dev_ifindex = attr.route_dev->ifindex;
 
-       /* It's importent to add the neigh to the hash table before checking
+       /* It's important to add the neigh to the hash table before checking
         * the neigh validity state. So if we'll get a notification, in case the
         * neigh changes it's validity state, we would find the relevant neigh
         * in the hash.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
new file mode 100644 (file)
index 0000000..de936dc
--- /dev/null
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "tir.h"
+#include "params.h"
+#include <linux/mlx5/transobj.h>
+
+#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
+
+/* max() doesn't work inside square brackets. */
+#define MLX5E_TIR_CMD_IN_SZ_DW ( \
+       MLX5_ST_SZ_DW(create_tir_in) > MLX5_ST_SZ_DW(modify_tir_in) ? \
+       MLX5_ST_SZ_DW(create_tir_in) : MLX5_ST_SZ_DW(modify_tir_in) \
+)
+
+struct mlx5e_tir_builder {
+       u32 in[MLX5E_TIR_CMD_IN_SZ_DW];
+       bool modify;
+};
+
+struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify)
+{
+       struct mlx5e_tir_builder *builder;
+
+       builder = kvzalloc(sizeof(*builder), GFP_KERNEL);
+       builder->modify = modify;
+
+       return builder;
+}
+
+void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder)
+{
+       kvfree(builder);
+}
+
+void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder)
+{
+       memset(builder->in, 0, sizeof(builder->in));
+}
+
+static void *mlx5e_tir_builder_get_tirc(struct mlx5e_tir_builder *builder)
+{
+       if (builder->modify)
+               return MLX5_ADDR_OF(modify_tir_in, builder->in, ctx);
+       return MLX5_ADDR_OF(create_tir_in, builder->in, ctx);
+}
+
+void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn)
+{
+       void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+       WARN_ON(builder->modify);
+
+       MLX5_SET(tirc, tirc, transport_domain, tdn);
+       MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+       MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE);
+       MLX5_SET(tirc, tirc, inline_rqn, rqn);
+}
+
+void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+                                u32 rqtn, bool inner_ft_support)
+{
+       void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+       WARN_ON(builder->modify);
+
+       MLX5_SET(tirc, tirc, transport_domain, tdn);
+       MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
+       MLX5_SET(tirc, tirc, indirect_table, rqtn);
+       MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support);
+}
+
+void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
+                                const struct mlx5e_lro_param *lro_param)
+{
+       void *tirc = mlx5e_tir_builder_get_tirc(builder);
+       const unsigned int rough_max_l2_l3_hdr_sz = 256;
+
+       if (builder->modify)
+               MLX5_SET(modify_tir_in, builder->in, bitmask.lro, 1);
+
+       if (!lro_param->enabled)
+               return;
+
+       MLX5_SET(tirc, tirc, lro_enable_mask,
+                MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
+                MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
+       MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
+                (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
+       MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout);
+}
+
+static int mlx5e_hfunc_to_hw(u8 hfunc)
+{
+       switch (hfunc) {
+       case ETH_RSS_HASH_TOP:
+               return MLX5_RX_HASH_FN_TOEPLITZ;
+       case ETH_RSS_HASH_XOR:
+               return MLX5_RX_HASH_FN_INVERTED_XOR8;
+       default:
+               return MLX5_RX_HASH_FN_NONE;
+       }
+}
+
+void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+                                const struct mlx5e_rss_params_hash *rss_hash,
+                                const struct mlx5e_rss_params_traffic_type *rss_tt,
+                                bool inner)
+{
+       void *tirc = mlx5e_tir_builder_get_tirc(builder);
+       void *hfso;
+
+       if (builder->modify)
+               MLX5_SET(modify_tir_in, builder->in, bitmask.hash, 1);
+
+       MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_hfunc_to_hw(rss_hash->hfunc));
+       if (rss_hash->hfunc == ETH_RSS_HASH_TOP) {
+               const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
+               void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
+
+               MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+               memcpy(rss_key, rss_hash->toeplitz_hash_key, len);
+       }
+
+       if (inner)
+               hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
+       else
+               hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+       MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, rss_tt->l3_prot_type);
+       MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, rss_tt->l4_prot_type);
+       MLX5_SET(rx_hash_field_select, hfso, selected_fields, rss_tt->rx_hash_fields);
+}
+
+void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder)
+{
+       void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+       WARN_ON(builder->modify);
+
+       MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
+}
+
+void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder)
+{
+       void *tirc = mlx5e_tir_builder_get_tirc(builder);
+
+       WARN_ON(builder->modify);
+
+       MLX5_SET(tirc, tirc, tls_en, 1);
+       MLX5_SET(tirc, tirc, self_lb_block,
+                MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST |
+                MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST);
+}
+
+int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder,
+                  struct mlx5_core_dev *mdev, bool reg)
+{
+       int err;
+
+       tir->mdev = mdev;
+
+       err = mlx5_core_create_tir(tir->mdev, builder->in, &tir->tirn);
+       if (err)
+               return err;
+
+       if (reg) {
+               struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs;
+
+               mutex_lock(&res->td.list_lock);
+               list_add(&tir->list, &res->td.tirs_list);
+               mutex_unlock(&res->td.list_lock);
+       } else {
+               INIT_LIST_HEAD(&tir->list);
+       }
+
+       return 0;
+}
+
+void mlx5e_tir_destroy(struct mlx5e_tir *tir)
+{
+       struct mlx5e_hw_objs *res = &tir->mdev->mlx5e_res.hw_objs;
+
+       /* Skip mutex if list_del is no-op (the TIR wasn't registered in the
+        * list). list_empty will never return true for an item of tirs_list,
+        * and READ_ONCE/WRITE_ONCE in list_empty/list_del guarantee consistency
+        * of the list->next value.
+        */
+       if (!list_empty(&tir->list)) {
+               mutex_lock(&res->td.list_lock);
+               list_del(&tir->list);
+               mutex_unlock(&res->td.list_lock);
+       }
+
+       mlx5_core_destroy_tir(tir->mdev, tir->tirn);
+}
+
+int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder)
+{
+       return mlx5_core_modify_tir(tir->mdev, tir->tirn, builder->in);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
new file mode 100644 (file)
index 0000000..e45149a
--- /dev/null
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_TIR_H__
+#define __MLX5_EN_TIR_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_rss_params_hash {
+       u8 hfunc;
+       u8 toeplitz_hash_key[40];
+};
+
+struct mlx5e_rss_params_traffic_type {
+       u8 l3_prot_type;
+       u8 l4_prot_type;
+       u32 rx_hash_fields;
+};
+
+struct mlx5e_tir_builder;
+struct mlx5e_lro_param;
+
+struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify);
+void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder);
+void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder);
+
+void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn);
+void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
+                                u32 rqtn, bool inner_ft_support);
+void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
+                                const struct mlx5e_lro_param *lro_param);
+void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
+                                const struct mlx5e_rss_params_hash *rss_hash,
+                                const struct mlx5e_rss_params_traffic_type *rss_tt,
+                                bool inner);
+void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder);
+void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder);
+
+struct mlx5_core_dev;
+
+struct mlx5e_tir {
+       struct mlx5_core_dev *mdev;
+       u32 tirn;
+       struct list_head list;
+};
+
+int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder,
+                  struct mlx5_core_dev *mdev, bool reg);
+void mlx5e_tir_destroy(struct mlx5e_tir *tir);
+
+static inline u32 mlx5e_tir_get_tirn(struct mlx5e_tir *tir)
+{
+       return tir->tirn;
+}
+
+int mlx5e_tir_modify(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder);
+
+#endif /* __MLX5_EN_TIR_H__ */
index 7f94508..d54607a 100644 (file)
@@ -92,30 +92,19 @@ static void mlx5e_close_trap_rq(struct mlx5e_rq *rq)
 static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir,
                                           u32 rqn)
 {
-       void *tirc;
-       int inlen;
-       u32 *in;
+       struct mlx5e_tir_builder *builder;
        int err;
 
-       inlen = MLX5_ST_SZ_BYTES(create_tir_in);
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in)
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
                return -ENOMEM;
 
-       tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-       MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
-       MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE);
-       MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
-       MLX5_SET(tirc, tirc, inline_rqn, rqn);
-       err = mlx5e_create_tir(mdev, tir, in);
-       kvfree(in);
+       mlx5e_tir_builder_build_inline(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqn);
+       err = mlx5e_tir_init(tir, builder, mdev, true);
 
-       return err;
-}
+       mlx5e_tir_builder_free(builder);
 
-static void mlx5e_destroy_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir)
-{
-       mlx5e_destroy_tir(mdev, tir);
+       return err;
 }
 
 static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev,
@@ -173,7 +162,7 @@ err_napi_del:
 
 void mlx5e_close_trap(struct mlx5e_trap *trap)
 {
-       mlx5e_destroy_trap_direct_rq_tir(trap->mdev, &trap->tir);
+       mlx5e_tir_destroy(&trap->tir);
        mlx5e_close_trap_rq(&trap->rq);
        netif_napi_del(&trap->napi);
        kvfree(trap);
index 71e8d66..7b562d2 100644 (file)
@@ -122,7 +122,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
         * any Fill Ring entries at the setup stage.
         */
 
-       err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+       err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix);
        if (unlikely(err))
                goto err_deactivate;
 
@@ -169,7 +169,7 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
                goto remove_pool;
 
        c = priv->channels.c[ix];
-       mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
+       mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix);
        mlx5e_deactivate_xsk(c);
        mlx5e_close_xsk(c);
 
index a8315f1..538bc24 100644 (file)
@@ -126,7 +126,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
        /* Create a separate SQ, so that when the buff pool is disabled, we could
         * close this SQ safely and stop receiving CQEs. In other case, e.g., if
         * the XDPSQ was used instead, we might run into trouble when the buff pool
-        * is disabled and then reenabled, but the SQ continues receiving CQEs
+        * is disabled and then re-enabled, but the SQ continues receiving CQEs
         * from the old buff pool.
         */
        err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true);
@@ -183,73 +183,3 @@ void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
        mlx5e_deactivate_rq(&c->xskrq);
        /* TX queue is disabled on close. */
 }
-
-static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn)
-{
-       struct mlx5e_redirect_rqt_param direct_rrp = {
-               .is_rss = false,
-               {
-                       .rqn = rqn,
-               },
-       };
-
-       u32 rqtn = priv->xsk_tir[ix].rqt.rqtn;
-
-       return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
-}
-
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c)
-{
-       return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
-{
-       return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
-       int err, i;
-
-       if (!priv->xsk.refcnt)
-               return 0;
-
-       for (i = 0; i < chs->num; i++) {
-               struct mlx5e_channel *c = chs->c[i];
-
-               if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
-                       continue;
-
-               err = mlx5e_xsk_redirect_rqt_to_channel(priv, c);
-               if (unlikely(err))
-                       goto err_stop;
-       }
-
-       return 0;
-
-err_stop:
-       for (i--; i >= 0; i--) {
-               if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
-                       continue;
-
-               mlx5e_xsk_redirect_rqt_to_drop(priv, i);
-       }
-
-       return err;
-}
-
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
-       int i;
-
-       if (!priv->xsk.refcnt)
-               return;
-
-       for (i = 0; i < chs->num; i++) {
-               if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
-                       continue;
-
-               mlx5e_xsk_redirect_rqt_to_drop(priv, i);
-       }
-}
index ca20f1f..50e111b 100644 (file)
@@ -17,9 +17,5 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
 void mlx5e_close_xsk(struct mlx5e_channel *c);
 void mlx5e_activate_xsk(struct mlx5e_channel *c);
 void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
 
 #endif /* __MLX5_EN_XSK_SETUP_H__ */
index e51f60b..4c4ee52 100644 (file)
@@ -16,13 +16,13 @@ struct mlx5e_accel_fs_tcp {
        struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES];
 };
 
-static enum mlx5e_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
+static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
 {
        switch (i) {
        case ACCEL_FS_IPV4_TCP:
-               return MLX5E_TT_IPV4_TCP;
+               return MLX5_TT_IPV4_TCP;
        default: /* ACCEL_FS_IPV6_TCP */
-               return MLX5E_TT_IPV6_TCP;
+               return MLX5_TT_IPV6_TCP;
        }
 }
 
@@ -161,7 +161,7 @@ static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv,
        fs_tcp = priv->fs.accel_tcp;
        accel_fs_t = &fs_tcp->tables[type];
 
-       dest = mlx5e_ttc_get_default_dest(priv, fs_accel2tt(type));
+       dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_accel2tt(type));
        rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -307,7 +307,7 @@ static int accel_fs_tcp_disable(struct mlx5e_priv *priv)
 
        for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
                /* Modify ttc rules destination to point back to the indir TIRs */
-               err = mlx5e_ttc_fwd_default_dest(priv, fs_accel2tt(i));
+               err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_accel2tt(i));
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -329,7 +329,7 @@ static int accel_fs_tcp_enable(struct mlx5e_priv *priv)
                dest.ft = priv->fs.accel_tcp->tables[i].t;
 
                /* Modify ttc rules destination to point on the accel_fs FTs */
-               err = mlx5e_ttc_fwd_dest(priv, fs_accel2tt(i), &dest);
+               err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_accel2tt(i), &dest);
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
index 34119ce..17da23d 100644 (file)
@@ -41,11 +41,11 @@ struct mlx5e_ipsec_tx {
 };
 
 /* IPsec RX flow steering */
-static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
+static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
 {
        if (i == ACCEL_FS_ESP4)
-               return MLX5E_TT_IPV4_IPSEC_ESP;
-       return MLX5E_TT_IPV6_IPSEC_ESP;
+               return MLX5_TT_IPV4_IPSEC_ESP;
+       return MLX5_TT_IPV6_IPSEC_ESP;
 }
 
 static int rx_err_add_rule(struct mlx5e_priv *priv,
@@ -265,7 +265,8 @@ static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
        accel_esp = priv->ipsec->rx_fs;
        fs_prot = &accel_esp->fs_prot[type];
 
-       fs_prot->default_dest = mlx5e_ttc_get_default_dest(priv, fs_esp2tt(type));
+       fs_prot->default_dest =
+               mlx5_ttc_get_default_dest(priv->fs.ttc, fs_esp2tt(type));
 
        err = rx_err_create_ft(priv, fs_prot, &fs_prot->rx_err);
        if (err)
@@ -301,7 +302,7 @@ static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
        /* connect */
        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
        dest.ft = fs_prot->ft;
-       mlx5e_ttc_fwd_dest(priv, fs_esp2tt(type), &dest);
+       mlx5_ttc_fwd_dest(priv->fs.ttc, fs_esp2tt(type), &dest);
 
 out:
        mutex_unlock(&fs_prot->prot_mutex);
@@ -320,7 +321,7 @@ static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
                goto out;
 
        /* disconnect */
-       mlx5e_ttc_fwd_default_dest(priv, fs_esp2tt(type));
+       mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_esp2tt(type));
 
        /* remove FT */
        rx_destroy(priv, type);
index 4e58fad..62abce0 100644 (file)
@@ -49,7 +49,7 @@ struct mlx5e_ktls_offload_context_rx {
        struct mlx5e_rq_stats *rq_stats;
        struct mlx5e_tls_sw_stats *sw_stats;
        struct completion add_ctx;
-       u32 tirn;
+       struct mlx5e_tir tir;
        u32 key_id;
        u32 rxq;
        DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS);
@@ -99,31 +99,22 @@ mlx5e_ktls_rx_resync_create_resp_list(void)
        return resp_list;
 }
 
-static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn)
+static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqtn)
 {
-       int err, inlen;
-       void *tirc;
-       u32 *in;
+       struct mlx5e_tir_builder *builder;
+       int err;
 
-       inlen = MLX5_ST_SZ_BYTES(create_tir_in);
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in)
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
                return -ENOMEM;
 
-       tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-
-       MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
-       MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
-       MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
-       MLX5_SET(tirc, tirc, indirect_table, rqtn);
-       MLX5_SET(tirc, tirc, tls_en, 1);
-       MLX5_SET(tirc, tirc, self_lb_block,
-                MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST |
-                MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST);
+       mlx5e_tir_builder_build_rqt(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqtn, false);
+       mlx5e_tir_builder_build_direct(builder);
+       mlx5e_tir_builder_build_tls(builder);
+       err = mlx5e_tir_init(tir, builder, mdev, false);
 
-       err = mlx5_core_create_tir(mdev, in, tirn);
+       mlx5e_tir_builder_free(builder);
 
-       kvfree(in);
        return err;
 }
 
@@ -139,7 +130,8 @@ static void accel_rule_handle_work(struct work_struct *work)
                goto out;
 
        rule = mlx5e_accel_fs_add_sk(accel_rule->priv, priv_rx->sk,
-                                    priv_rx->tirn, MLX5_FS_DEFAULT_FLOW_TAG);
+                                    mlx5e_tir_get_tirn(&priv_rx->tir),
+                                    MLX5_FS_DEFAULT_FLOW_TAG);
        if (!IS_ERR_OR_NULL(rule))
                accel_rule->rule = rule;
 out:
@@ -173,8 +165,8 @@ post_static_params(struct mlx5e_icosq *sq,
        pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs);
        wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi);
        mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_rx->crypto_info,
-                                      priv_rx->tirn, priv_rx->key_id,
-                                      priv_rx->resync.seq, false,
+                                      mlx5e_tir_get_tirn(&priv_rx->tir),
+                                      priv_rx->key_id, priv_rx->resync.seq, false,
                                       TLS_OFFLOAD_CTX_DIR_RX);
        wi = (struct mlx5e_icosq_wqe_info) {
                .wqe_type = MLX5E_ICOSQ_WQE_UMR_TLS,
@@ -202,8 +194,9 @@ post_progress_params(struct mlx5e_icosq *sq,
 
        pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs);
        wqe = MLX5E_TLS_FETCH_SET_PROGRESS_PARAMS_WQE(sq, pi);
-       mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn, priv_rx->tirn, false,
-                                        next_record_tcp_sn,
+       mlx5e_ktls_build_progress_params(wqe, sq->pc, sq->sqn,
+                                        mlx5e_tir_get_tirn(&priv_rx->tir),
+                                        false, next_record_tcp_sn,
                                         TLS_OFFLOAD_CTX_DIR_RX);
        wi = (struct mlx5e_icosq_wqe_info) {
                .wqe_type = MLX5E_ICOSQ_WQE_SET_PSV_TLS,
@@ -325,7 +318,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
        psv = &wqe->psv;
        psv->num_psv      = 1 << 4;
        psv->l_key        = sq->channel->mkey_be;
-       psv->psv_index[0] = cpu_to_be32(priv_rx->tirn);
+       psv->psv_index[0] = cpu_to_be32(mlx5e_tir_get_tirn(&priv_rx->tir));
        psv->va           = cpu_to_be64(buf->dma_addr);
 
        wi = (struct mlx5e_icosq_wqe_info) {
@@ -635,9 +628,9 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
        priv_rx->sw_stats = &priv->tls->sw_stats;
        mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
 
-       rqtn = priv->direct_tir[rxq].rqt.rqtn;
+       rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq);
 
-       err = mlx5e_ktls_create_tir(mdev, &priv_rx->tirn, rqtn);
+       err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn);
        if (err)
                goto err_create_tir;
 
@@ -658,7 +651,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
        return 0;
 
 err_post_wqes:
-       mlx5_core_destroy_tir(mdev, priv_rx->tirn);
+       mlx5e_tir_destroy(&priv_rx->tir);
 err_create_tir:
        mlx5_ktls_destroy_key(mdev, priv_rx->key_id);
 err_create_key:
@@ -693,7 +686,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
        if (priv_rx->rule.rule)
                mlx5e_accel_fs_del_sk(priv_rx->rule.rule);
 
-       mlx5_core_destroy_tir(mdev, priv_rx->tirn);
+       mlx5e_tir_destroy(&priv_rx->tir);
        mlx5_ktls_destroy_key(mdev, priv_rx->key_id);
        /* priv_rx should normally be freed here, but if there is an outstanding
         * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is
index 25403af..fe5d82f 100644 (file)
@@ -98,17 +98,17 @@ struct arfs_rule {
        for (j = 0; j < ARFS_HASH_SIZE; j++) \
                hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
 
-static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
+static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type)
 {
        switch (type) {
        case ARFS_IPV4_TCP:
-               return MLX5E_TT_IPV4_TCP;
+               return MLX5_TT_IPV4_TCP;
        case ARFS_IPV4_UDP:
-               return MLX5E_TT_IPV4_UDP;
+               return MLX5_TT_IPV4_UDP;
        case ARFS_IPV6_TCP:
-               return MLX5E_TT_IPV6_TCP;
+               return MLX5_TT_IPV6_TCP;
        case ARFS_IPV6_UDP:
-               return MLX5E_TT_IPV6_UDP;
+               return MLX5_TT_IPV6_UDP;
        default:
                return -EINVAL;
        }
@@ -120,7 +120,7 @@ static int arfs_disable(struct mlx5e_priv *priv)
 
        for (i = 0; i < ARFS_NUM_TYPES; i++) {
                /* Modify ttc rules destination back to their default */
-               err = mlx5e_ttc_fwd_default_dest(priv, arfs_get_tt(i));
+               err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, arfs_get_tt(i));
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -149,7 +149,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv)
        for (i = 0; i < ARFS_NUM_TYPES; i++) {
                dest.ft = priv->fs.arfs->arfs_tables[i].ft.t;
                /* Modify ttc rules destination to point on the aRFS FTs */
-               err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest);
+               err = mlx5_ttc_fwd_dest(priv->fs.ttc, arfs_get_tt(i), &dest);
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
@@ -192,10 +192,9 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
                                 enum arfs_type type)
 {
        struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type];
-       struct mlx5e_tir *tir = priv->indir_tir;
        struct mlx5_flow_destination dest = {};
        MLX5_DECLARE_FLOW_ACT(flow_act);
-       enum mlx5e_traffic_types tt;
+       enum mlx5_traffic_types tt;
        int err = 0;
 
        dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
@@ -206,10 +205,10 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
                return -EINVAL;
        }
 
-       /* FIXME: Must use mlx5e_ttc_get_default_dest(),
+       /* FIXME: Must use mlx5_ttc_get_default_dest(),
         * but can't since TTC default is not setup yet !
         */
-       dest.tir_num = tir[tt].tirn;
+       dest.tir_num = mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
        arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL,
                                                   &flow_act,
                                                   &dest, 1);
@@ -553,7 +552,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
                       16);
        }
        dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
+       dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq);
        rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
@@ -576,7 +575,7 @@ static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
        int err = 0;
 
        dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       dst.tir_num = priv->direct_tir[rxq].tirn;
+       dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq);
        err =  mlx5_modify_rule_destination(rule, &dst, NULL);
        if (err)
                netdev_warn(priv->netdev,
index 8c166ee..84eb720 100644 (file)
 #include "en.h"
 
 /* mlx5e global resources should be placed in this file.
- * Global resources are common to all the netdevices crated on the same nic.
+ * Global resources are common to all the netdevices created on the same nic.
  */
 
-int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in)
-{
-       struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
-       int err;
-
-       err = mlx5_core_create_tir(mdev, in, &tir->tirn);
-       if (err)
-               return err;
-
-       mutex_lock(&res->td.list_lock);
-       list_add(&tir->list, &res->td.tirs_list);
-       mutex_unlock(&res->td.list_lock);
-
-       return 0;
-}
-
-void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
-                      struct mlx5e_tir *tir)
-{
-       struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
-
-       mutex_lock(&res->td.list_lock);
-       mlx5_core_destroy_tir(mdev, tir->tirn);
-       list_del(&tir->list);
-       mutex_unlock(&res->td.list_lock);
-}
-
 void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
 {
        bool ro_pci_enable = pcie_relaxed_ordering_enabled(mdev->pdev);
index bd72572..2cfd129 100644 (file)
@@ -420,6 +420,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
        unsigned int count = ch->combined_count;
        struct mlx5e_params new_params;
        bool arfs_enabled;
+       int rss_cnt;
        bool opened;
        int err = 0;
 
@@ -455,6 +456,27 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
                goto out;
        }
 
+       /* Don't allow changing the number of channels if non-default RSS contexts exist,
+        * the kernel doesn't protect against set_channels operations that break them.
+        */
+       rss_cnt = mlx5e_rx_res_rss_cnt(priv->rx_res) - 1;
+       if (rss_cnt) {
+               err = -EINVAL;
+               netdev_err(priv->netdev, "%s: Non-default RSS contexts exist (%d), cannot change the number of channels\n",
+                          __func__, rss_cnt);
+               goto out;
+       }
+
+       /* Don't allow changing the number of channels if MQPRIO mode channel offload is active,
+        * because it defines a partition over the channels queues.
+        */
+       if (cur_params->mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
+               err = -EINVAL;
+               netdev_err(priv->netdev, "%s: MQPRIO mode channel offload is active, cannot change the number of channels\n",
+                          __func__);
+               goto out;
+       }
+
        new_params = *cur_params;
        new_params.num_channels = count;
 
@@ -512,7 +534,9 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
 }
 
 static int mlx5e_get_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *coal)
+                             struct ethtool_coalesce *coal,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -630,7 +654,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 }
 
 static int mlx5e_set_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *coal)
+                             struct ethtool_coalesce *coal,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct mlx5e_priv *priv    = netdev_priv(netdev);
 
@@ -1172,7 +1198,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev,
 
 u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
 {
-       return sizeof(priv->rss_params.toeplitz_hash_key);
+       return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key);
 }
 
 static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
@@ -1194,88 +1220,64 @@ static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev)
        return mlx5e_ethtool_get_rxfh_indir_size(priv);
 }
 
-int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
-                  u8 *hfunc)
+static int mlx5e_get_rxfh_context(struct net_device *dev, u32 *indir,
+                                 u8 *key, u8 *hfunc, u32 rss_context)
 {
-       struct mlx5e_priv *priv = netdev_priv(netdev);
-       struct mlx5e_rss_params *rss = &priv->rss_params;
-
-       if (indir)
-               memcpy(indir, rss->indirection_rqt,
-                      sizeof(rss->indirection_rqt));
-
-       if (key)
-               memcpy(key, rss->toeplitz_hash_key,
-                      sizeof(rss->toeplitz_hash_key));
-
-       if (hfunc)
-               *hfunc = rss->hfunc;
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       int err;
 
-       return 0;
+       mutex_lock(&priv->state_lock);
+       err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context, indir, key, hfunc);
+       mutex_unlock(&priv->state_lock);
+       return err;
 }
 
-int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
-                  const u8 *key, const u8 hfunc)
+static int mlx5e_set_rxfh_context(struct net_device *dev, const u32 *indir,
+                                 const u8 *key, const u8 hfunc,
+                                 u32 *rss_context, bool delete)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
-       struct mlx5e_rss_params *rss = &priv->rss_params;
-       int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
-       bool refresh_tirs = false;
-       bool refresh_rqt = false;
-       void *in;
-
-       if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
-           (hfunc != ETH_RSS_HASH_XOR) &&
-           (hfunc != ETH_RSS_HASH_TOP))
-               return -EINVAL;
-
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
+       int err;
 
        mutex_lock(&priv->state_lock);
-
-       if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) {
-               rss->hfunc = hfunc;
-               refresh_rqt = true;
-               refresh_tirs = true;
-       }
-
-       if (indir) {
-               memcpy(rss->indirection_rqt, indir,
-                      sizeof(rss->indirection_rqt));
-               refresh_rqt = true;
+       if (delete) {
+               err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context);
+               goto unlock;
        }
 
-       if (key) {
-               memcpy(rss->toeplitz_hash_key, key,
-                      sizeof(rss->toeplitz_hash_key));
-               refresh_tirs = refresh_tirs || rss->hfunc == ETH_RSS_HASH_TOP;
-       }
+       if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+               unsigned int count = priv->channels.params.num_channels;
 
-       if (refresh_rqt && test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-               struct mlx5e_redirect_rqt_param rrp = {
-                       .is_rss = true,
-                       {
-                               .rss = {
-                                       .hfunc = rss->hfunc,
-                                       .channels  = &priv->channels,
-                               },
-                       },
-               };
-               u32 rqtn = priv->indir_rqt.rqtn;
-
-               mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
+               err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count);
+               if (err)
+                       goto unlock;
        }
 
-       if (refresh_tirs)
-               mlx5e_modify_tirs_hash(priv, in);
+       err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context, indir, key,
+                                       hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
 
+unlock:
        mutex_unlock(&priv->state_lock);
+       return err;
+}
 
-       kvfree(in);
+int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+                  u8 *hfunc)
+{
+       return mlx5e_get_rxfh_context(netdev, indir, key, hfunc, 0);
+}
 
-       return 0;
+int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
+                  const u8 *key, const u8 hfunc)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       int err;
+
+       mutex_lock(&priv->state_lock);
+       err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, 0, indir, key,
+                                       hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
+       mutex_unlock(&priv->state_lock);
+       return err;
 }
 
 #define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC                100
@@ -2358,6 +2360,8 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
        .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size,
        .get_rxfh          = mlx5e_get_rxfh,
        .set_rxfh          = mlx5e_set_rxfh,
+       .get_rxfh_context  = mlx5e_get_rxfh_context,
+       .set_rxfh_context  = mlx5e_set_rxfh_context,
        .get_rxnfc         = mlx5e_get_rxnfc,
        .set_rxnfc         = mlx5e_set_rxnfc,
        .get_tunable       = mlx5e_get_tunable,
index 0b75fab..c06b4b9 100644 (file)
@@ -718,7 +718,7 @@ static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv)
        if (!spec)
                return -ENOMEM;
        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-       dest.ft = priv->fs.ttc.ft.t;
+       dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
 
        rule_p = &priv->fs.promisc.rule;
        *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
@@ -854,593 +854,59 @@ void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
        ft->t = NULL;
 }
 
-static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
-{
-       int i;
-
-       for (i = 0; i < MLX5E_NUM_TT; i++) {
-               if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
-                       mlx5_del_flow_rules(ttc->rules[i].rule);
-                       ttc->rules[i].rule = NULL;
-               }
-       }
-
-       for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) {
-               if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
-                       mlx5_del_flow_rules(ttc->tunnel_rules[i]);
-                       ttc->tunnel_rules[i] = NULL;
-               }
-       }
-}
-
-struct mlx5e_etype_proto {
-       u16 etype;
-       u8 proto;
-};
-
-static struct mlx5e_etype_proto ttc_rules[] = {
-       [MLX5E_TT_IPV4_TCP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_TCP,
-       },
-       [MLX5E_TT_IPV6_TCP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_TCP,
-       },
-       [MLX5E_TT_IPV4_UDP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_UDP,
-       },
-       [MLX5E_TT_IPV6_UDP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_UDP,
-       },
-       [MLX5E_TT_IPV4_IPSEC_AH] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_AH,
-       },
-       [MLX5E_TT_IPV6_IPSEC_AH] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_AH,
-       },
-       [MLX5E_TT_IPV4_IPSEC_ESP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_ESP,
-       },
-       [MLX5E_TT_IPV6_IPSEC_ESP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_ESP,
-       },
-       [MLX5E_TT_IPV4] = {
-               .etype = ETH_P_IP,
-               .proto = 0,
-       },
-       [MLX5E_TT_IPV6] = {
-               .etype = ETH_P_IPV6,
-               .proto = 0,
-       },
-       [MLX5E_TT_ANY] = {
-               .etype = 0,
-               .proto = 0,
-       },
-};
-
-static struct mlx5e_etype_proto ttc_tunnel_rules[] = {
-       [MLX5E_TT_IPV4_GRE] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_GRE,
-       },
-       [MLX5E_TT_IPV6_GRE] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_GRE,
-       },
-       [MLX5E_TT_IPV4_IPIP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_IPIP,
-       },
-       [MLX5E_TT_IPV6_IPIP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_IPIP,
-       },
-       [MLX5E_TT_IPV4_IPV6] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_IPV6,
-       },
-       [MLX5E_TT_IPV6_IPV6] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_IPV6,
-       },
-
-};
-
-u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt)
-{
-       return ttc_tunnel_rules[tt].proto;
-}
-
-static bool mlx5e_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, u8 proto_type)
-{
-       switch (proto_type) {
-       case IPPROTO_GRE:
-               return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
-       case IPPROTO_IPIP:
-       case IPPROTO_IPV6:
-               return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
-                       MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
-       default:
-               return false;
-       }
-}
-
-static bool mlx5e_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
-{
-       int tt;
-
-       for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
-               if (mlx5e_tunnel_proto_supported_rx(mdev, ttc_tunnel_rules[tt].proto))
-                       return true;
-       }
-       return false;
-}
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
-{
-       return (mlx5e_tunnel_any_rx_proto_supported(mdev) &&
-               MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
-}
-
-static u8 mlx5e_etype_to_ipv(u16 ethertype)
-{
-       if (ethertype == ETH_P_IP)
-               return 4;
-
-       if (ethertype == ETH_P_IPV6)
-               return 6;
-
-       return 0;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
-                       struct mlx5_flow_table *ft,
-                       struct mlx5_flow_destination *dest,
-                       u16 etype,
-                       u8 proto)
-{
-       int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
-       MLX5_DECLARE_FLOW_ACT(flow_act);
-       struct mlx5_flow_handle *rule;
-       struct mlx5_flow_spec *spec;
-       int err = 0;
-       u8 ipv;
-
-       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
-       if (!spec)
-               return ERR_PTR(-ENOMEM);
-
-       if (proto) {
-               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
-               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
-       }
-
-       ipv = mlx5e_etype_to_ipv(etype);
-       if (match_ipv_outer && ipv) {
-               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
-               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
-       } else if (etype) {
-               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
-               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
-       }
-
-       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
-       if (IS_ERR(rule)) {
-               err = PTR_ERR(rule);
-               netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
-       }
-
-       kvfree(spec);
-       return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv,
-                                         struct ttc_params *params,
-                                         struct mlx5e_ttc_table *ttc)
-{
-       struct mlx5_flow_destination dest = {};
-       struct mlx5_flow_handle **trules;
-       struct mlx5e_ttc_rule *rules;
-       struct mlx5_flow_table *ft;
-       int tt;
-       int err;
-
-       ft = ttc->ft.t;
-       rules = ttc->rules;
-
-       dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
-               struct mlx5e_ttc_rule *rule = &rules[tt];
-
-               if (tt == MLX5E_TT_ANY)
-                       dest.tir_num = params->any_tt_tirn;
-               else
-                       dest.tir_num = params->indir_tirn[tt];
-
-               rule->rule = mlx5e_generate_ttc_rule(priv, ft, &dest,
-                                                    ttc_rules[tt].etype,
-                                                    ttc_rules[tt].proto);
-               if (IS_ERR(rule->rule)) {
-                       err = PTR_ERR(rule->rule);
-                       rule->rule = NULL;
-                       goto del_rules;
-               }
-               rule->default_dest = dest;
-       }
-
-       if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
-               return 0;
-
-       trules    = ttc->tunnel_rules;
-       dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-       dest.ft   = params->inner_ttc->ft.t;
-       for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
-               if (!mlx5e_tunnel_proto_supported_rx(priv->mdev,
-                                                    ttc_tunnel_rules[tt].proto))
-                       continue;
-               trules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
-                                                    ttc_tunnel_rules[tt].etype,
-                                                    ttc_tunnel_rules[tt].proto);
-               if (IS_ERR(trules[tt])) {
-                       err = PTR_ERR(trules[tt]);
-                       trules[tt] = NULL;
-                       goto del_rules;
-               }
-       }
-
-       return 0;
-
-del_rules:
-       mlx5e_cleanup_ttc_rules(ttc);
-       return err;
-}
-
-static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc,
-                                        bool use_ipv)
-{
-       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int ix = 0;
-       u32 *in;
-       int err;
-       u8 *mc;
-
-       ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS,
-                       sizeof(*ft->g), GFP_KERNEL);
-       if (!ft->g)
-               return -ENOMEM;
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in) {
-               kfree(ft->g);
-               ft->g = NULL;
-               return -ENOMEM;
-       }
-
-       /* L4 Group */
-       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
-       MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
-       if (use_ipv)
-               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
-       else
-               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
-       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_TTC_GROUP1_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* L3 Group */
-       MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_TTC_GROUP2_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* Any Group */
-       memset(in, 0, inlen);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_TTC_GROUP3_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       kvfree(in);
-       return 0;
-
-err:
-       err = PTR_ERR(ft->g[ft->num_groups]);
-       ft->g[ft->num_groups] = NULL;
-       kvfree(in);
-
-       return err;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv,
-                             struct mlx5_flow_table *ft,
-                             struct mlx5_flow_destination *dest,
-                             u16 etype, u8 proto)
-{
-       MLX5_DECLARE_FLOW_ACT(flow_act);
-       struct mlx5_flow_handle *rule;
-       struct mlx5_flow_spec *spec;
-       int err = 0;
-       u8 ipv;
-
-       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
-       if (!spec)
-               return ERR_PTR(-ENOMEM);
-
-       ipv = mlx5e_etype_to_ipv(etype);
-       if (etype && ipv) {
-               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
-               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
-       }
-
-       if (proto) {
-               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
-               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
-       }
-
-       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
-       if (IS_ERR(rule)) {
-               err = PTR_ERR(rule);
-               netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
-       }
-
-       kvfree(spec);
-       return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv,
-                                               struct ttc_params *params,
-                                               struct mlx5e_ttc_table *ttc)
-{
-       struct mlx5_flow_destination dest = {};
-       struct mlx5e_ttc_rule *rules;
-       struct mlx5_flow_table *ft;
-       int err;
-       int tt;
-
-       ft = ttc->ft.t;
-       rules = ttc->rules;
-       dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-
-       for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
-               struct mlx5e_ttc_rule *rule = &rules[tt];
-
-               if (tt == MLX5E_TT_ANY)
-                       dest.tir_num = params->any_tt_tirn;
-               else
-                       dest.tir_num = params->indir_tirn[tt];
-
-               rule->rule = mlx5e_generate_inner_ttc_rule(priv, ft, &dest,
-                                                          ttc_rules[tt].etype,
-                                                          ttc_rules[tt].proto);
-               if (IS_ERR(rule->rule)) {
-                       err = PTR_ERR(rule->rule);
-                       rule->rule = NULL;
-                       goto del_rules;
-               }
-               rule->default_dest = dest;
-       }
-
-       return 0;
-
-del_rules:
-
-       mlx5e_cleanup_ttc_rules(ttc);
-       return err;
-}
-
-static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc)
-{
-       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int ix = 0;
-       u32 *in;
-       int err;
-       u8 *mc;
-
-       ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
-       if (!ft->g)
-               return -ENOMEM;
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in) {
-               kfree(ft->g);
-               ft->g = NULL;
-               return -ENOMEM;
-       }
-
-       /* L4 Group */
-       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
-       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
-       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
-       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_INNER_TTC_GROUP1_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* L3 Group */
-       MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_INNER_TTC_GROUP2_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* Any Group */
-       memset(in, 0, inlen);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_INNER_TTC_GROUP3_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       kvfree(in);
-       return 0;
-
-err:
-       err = PTR_ERR(ft->g[ft->num_groups]);
-       ft->g[ft->num_groups] = NULL;
-       kvfree(in);
-
-       return err;
-}
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv,
-                               struct ttc_params *ttc_params)
-{
-       ttc_params->any_tt_tirn = priv->direct_tir[0].tirn;
-       ttc_params->inner_ttc = &priv->fs.inner_ttc;
-}
-
-void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params)
+static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv,
+                                      struct ttc_params *ttc_params)
 {
        struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+       int tt;
 
-       ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE;
+       memset(ttc_params, 0, sizeof(*ttc_params));
+       ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+                                                MLX5_FLOW_NAMESPACE_KERNEL);
        ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL;
        ft_attr->prio = MLX5E_NIC_PRIO;
+
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+               ttc_params->dests[tt].tir_num =
+                       tt == MLX5_TT_ANY ?
+                               mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
+                               mlx5e_rx_res_get_tirn_rss_inner(priv->rx_res,
+                                                               tt);
+       }
 }
 
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params)
+void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+                         struct ttc_params *ttc_params, bool tunnel)
 
 {
        struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+       int tt;
 
-       ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
+       memset(ttc_params, 0, sizeof(*ttc_params));
+       ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+                                                MLX5_FLOW_NAMESPACE_KERNEL);
        ft_attr->level = MLX5E_TTC_FT_LEVEL;
        ft_attr->prio = MLX5E_NIC_PRIO;
-}
-
-int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
-                                struct mlx5e_ttc_table *ttc)
-{
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int err;
 
-       if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
-               return 0;
-
-       ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
-       if (IS_ERR(ft->t)) {
-               err = PTR_ERR(ft->t);
-               ft->t = NULL;
-               return err;
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+               ttc_params->dests[tt].tir_num =
+                       tt == MLX5_TT_ANY ?
+                               mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
+                               mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
        }
 
-       err = mlx5e_create_inner_ttc_table_groups(ttc);
-       if (err)
-               goto err;
-
-       err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc);
-       if (err)
-               goto err;
-
-       return 0;
-
-err:
-       mlx5e_destroy_flow_table(ft);
-       return err;
-}
-
-void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
-                                  struct mlx5e_ttc_table *ttc)
-{
-       if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+       ttc_params->inner_ttc = tunnel;
+       if (!tunnel || !mlx5_tunnel_inner_ft_supported(priv->mdev))
                return;
 
-       mlx5e_cleanup_ttc_rules(ttc);
-       mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
-                            struct mlx5e_ttc_table *ttc)
-{
-       mlx5e_cleanup_ttc_rules(ttc);
-       mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
-                          struct mlx5e_ttc_table *ttc)
-{
-       bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int err;
-
-       ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
-       if (IS_ERR(ft->t)) {
-               err = PTR_ERR(ft->t);
-               ft->t = NULL;
-               return err;
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               ttc_params->tunnel_dests[tt].type =
+                       MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+               ttc_params->tunnel_dests[tt].ft =
+                       mlx5_get_ttc_flow_table(priv->fs.inner_ttc);
        }
-
-       err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer);
-       if (err)
-               goto err;
-
-       err = mlx5e_generate_ttc_table_rules(priv, params, ttc);
-       if (err)
-               goto err;
-
-       return 0;
-err:
-       mlx5e_destroy_flow_table(ft);
-       return err;
-}
-
-int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type,
-                      struct mlx5_flow_destination *new_dest)
-{
-       return mlx5_modify_rule_destination(priv->fs.ttc.rules[type].rule, new_dest, NULL);
-}
-
-struct mlx5_flow_destination
-mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type)
-{
-       struct mlx5_flow_destination *dest = &priv->fs.ttc.rules[type].default_dest;
-
-       WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
-                 "TTC[%d] default dest is not setup yet", type);
-
-       return *dest;
-}
-
-int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type)
-{
-       struct mlx5_flow_destination dest = mlx5e_ttc_get_default_dest(priv, type);
-
-       return mlx5e_ttc_fwd_dest(priv, type, &dest);
 }
 
 static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
@@ -1473,7 +939,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
                               outer_headers.dmac_47_16);
 
        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-       dest.ft = priv->fs.ttc.ft.t;
+       dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
 
        switch (type) {
        case MLX5E_FULLMATCH:
@@ -1769,10 +1235,47 @@ static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
        kvfree(priv->fs.vlan);
 }
 
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv)
+{
+       if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+               return;
+       mlx5_destroy_ttc_table(priv->fs.inner_ttc);
+}
+
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv)
+{
+       mlx5_destroy_ttc_table(priv->fs.ttc);
+}
+
+static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv)
 {
        struct ttc_params ttc_params = {};
-       int tt, err;
+
+       if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+               return 0;
+
+       mlx5e_set_inner_ttc_params(priv, &ttc_params);
+       priv->fs.inner_ttc = mlx5_create_inner_ttc_table(priv->mdev,
+                                                        &ttc_params);
+       if (IS_ERR(priv->fs.inner_ttc))
+               return PTR_ERR(priv->fs.inner_ttc);
+       return 0;
+}
+
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
+{
+       struct ttc_params ttc_params = {};
+
+       mlx5e_set_ttc_params(priv, &ttc_params, true);
+       priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+       if (IS_ERR(priv->fs.ttc))
+               return PTR_ERR(priv->fs.ttc);
+       return 0;
+}
+
+int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+{
+       int err;
 
        priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
                                               MLX5_FLOW_NAMESPACE_KERNEL);
@@ -1787,23 +1290,15 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
                priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
        }
 
-       mlx5e_set_ttc_basic_params(priv, &ttc_params);
-       mlx5e_set_inner_ttc_ft_params(&ttc_params);
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn;
-
-       err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
+       err = mlx5e_create_inner_ttc_table(priv);
        if (err) {
-               netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
+               netdev_err(priv->netdev,
+                          "Failed to create inner ttc table, err=%d\n",
                           err);
                goto err_destroy_arfs_tables;
        }
 
-       mlx5e_set_ttc_ft_params(&ttc_params);
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
-
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+       err = mlx5e_create_ttc_table(priv);
        if (err) {
                netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
                           err);
@@ -1837,9 +1332,9 @@ err_destory_vlan_table:
 err_destroy_l2_table:
        mlx5e_destroy_l2_table(priv);
 err_destroy_ttc_table:
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+       mlx5e_destroy_ttc_table(priv);
 err_destroy_inner_ttc_table:
-       mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+       mlx5e_destroy_inner_ttc_table(priv);
 err_destroy_arfs_tables:
        mlx5e_arfs_destroy_tables(priv);
 
@@ -1851,8 +1346,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
        mlx5e_ptp_free_rx_fs(priv);
        mlx5e_destroy_vlan_table(priv);
        mlx5e_destroy_l2_table(priv);
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-       mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+       mlx5e_destroy_ttc_table(priv);
+       mlx5e_destroy_inner_ttc_table(priv);
        mlx5e_arfs_destroy_tables(priv);
        mlx5e_ethtool_cleanup_steering(priv);
 }
index b416a8e..03693fa 100644 (file)
 #include "en/params.h"
 #include "en/xsk/pool.h"
 
+static int flow_type_to_traffic_type(u32 flow_type);
+
+static u32 flow_type_mask(u32 flow_type)
+{
+       return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
+}
+
 struct mlx5e_ethtool_rule {
        struct list_head             list;
        struct ethtool_rx_flow_spec  flow_spec;
        struct mlx5_flow_handle      *rule;
        struct mlx5e_ethtool_table   *eth_ft;
+       struct mlx5e_rss             *rss;
 };
 
 static void put_flow_table(struct mlx5e_ethtool_table *eth_ft)
@@ -66,7 +74,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
        int table_size;
        int prio;
 
-       switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+       switch (flow_type_mask(fs->flow_type)) {
        case TCP_V4_FLOW:
        case UDP_V4_FLOW:
        case TCP_V6_FLOW:
@@ -329,7 +337,7 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v,
                                             outer_headers);
        void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
                                             outer_headers);
-       u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+       u32 flow_type = flow_type_mask(fs->flow_type);
 
        switch (flow_type) {
        case TCP_V4_FLOW:
@@ -397,10 +405,53 @@ static bool outer_header_zero(u32 *match_criteria)
                                                  size - 1);
 }
 
+static int flow_get_tirn(struct mlx5e_priv *priv,
+                        struct mlx5e_ethtool_rule *eth_rule,
+                        struct ethtool_rx_flow_spec *fs,
+                        u32 rss_context, u32 *tirn)
+{
+       if (fs->flow_type & FLOW_RSS) {
+               struct mlx5e_lro_param lro_param;
+               struct mlx5e_rss *rss;
+               u32 flow_type;
+               int err;
+               int tt;
+
+               rss = mlx5e_rx_res_rss_get(priv->rx_res, rss_context);
+               if (!rss)
+                       return -ENOENT;
+
+               flow_type = flow_type_mask(fs->flow_type);
+               tt = flow_type_to_traffic_type(flow_type);
+               if (tt < 0)
+                       return -EINVAL;
+
+               lro_param = mlx5e_get_lro_param(&priv->channels.params);
+               err = mlx5e_rss_obtain_tirn(rss, tt, &lro_param, false, tirn);
+               if (err)
+                       return err;
+               eth_rule->rss = rss;
+               mlx5e_rss_refcnt_inc(eth_rule->rss);
+       } else {
+               struct mlx5e_params *params = &priv->channels.params;
+               enum mlx5e_rq_group group;
+               u16 ix;
+
+               mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group);
+
+               *tirn = group == MLX5E_RQ_GROUP_XSK ?
+                       mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix) :
+                       mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix);
+       }
+
+       return 0;
+}
+
 static struct mlx5_flow_handle *
 add_ethtool_flow_rule(struct mlx5e_priv *priv,
+                     struct mlx5e_ethtool_rule *eth_rule,
                      struct mlx5_flow_table *ft,
-                     struct ethtool_rx_flow_spec *fs)
+                     struct ethtool_rx_flow_spec *fs, u32 rss_context)
 {
        struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND };
        struct mlx5_flow_destination *dst = NULL;
@@ -419,22 +470,17 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
        if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
                flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
        } else {
-               struct mlx5e_params *params = &priv->channels.params;
-               enum mlx5e_rq_group group;
-               struct mlx5e_tir *tir;
-               u16 ix;
-
-               mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group);
-               tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir;
-
                dst = kzalloc(sizeof(*dst), GFP_KERNEL);
                if (!dst) {
                        err = -ENOMEM;
                        goto free;
                }
 
+               err = flow_get_tirn(priv, eth_rule, fs, rss_context, &dst->tir_num);
+               if (err)
+                       goto free;
+
                dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-               dst->tir_num = tir[ix].tirn;
                flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        }
 
@@ -458,6 +504,8 @@ static void del_ethtool_rule(struct mlx5e_priv *priv,
 {
        if (eth_rule->rule)
                mlx5_del_flow_rules(eth_rule->rule);
+       if (eth_rule->rss)
+               mlx5e_rss_refcnt_dec(eth_rule->rss);
        list_del(&eth_rule->list);
        priv->fs.ethtool.tot_num_rules--;
        put_flow_table(eth_rule->eth_ft);
@@ -618,7 +666,7 @@ static int validate_flow(struct mlx5e_priv *priv,
                                        fs->ring_cookie))
                        return -EINVAL;
 
-       switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+       switch (flow_type_mask(fs->flow_type)) {
        case ETHER_FLOW:
                num_tuples += validate_ethter(fs);
                break;
@@ -667,7 +715,7 @@ static int validate_flow(struct mlx5e_priv *priv,
 
 static int
 mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
-                          struct ethtool_rx_flow_spec *fs)
+                          struct ethtool_rx_flow_spec *fs, u32 rss_context)
 {
        struct mlx5e_ethtool_table *eth_ft;
        struct mlx5e_ethtool_rule *eth_rule;
@@ -698,7 +746,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
                err = -EINVAL;
                goto del_ethtool_rule;
        }
-       rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs);
+       rule = add_ethtool_flow_rule(priv, eth_rule, eth_ft->ft, fs, rss_context);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
                goto del_ethtool_rule;
@@ -744,10 +792,20 @@ mlx5e_ethtool_get_flow(struct mlx5e_priv *priv,
                return -EINVAL;
 
        list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) {
-               if (eth_rule->flow_spec.location == location) {
-                       info->fs = eth_rule->flow_spec;
+               int index;
+
+               if (eth_rule->flow_spec.location != location)
+                       continue;
+               if (!info)
                        return 0;
-               }
+               info->fs = eth_rule->flow_spec;
+               if (!eth_rule->rss)
+                       return 0;
+               index = mlx5e_rx_res_rss_index(priv->rx_res, eth_rule->rss);
+               if (index < 0)
+                       return index;
+               info->rss_context = index;
+               return 0;
        }
 
        return -ENOENT;
@@ -763,7 +821,7 @@ mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
 
        info->data = MAX_NUM_OF_ETHTOOL_RULES;
        while ((!err || err == -ENOENT) && idx < info->rule_cnt) {
-               err = mlx5e_ethtool_get_flow(priv, info, location);
+               err = mlx5e_ethtool_get_flow(priv, NULL, location);
                if (!err)
                        rule_locs[idx++] = location;
                location++;
@@ -785,45 +843,44 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
        INIT_LIST_HEAD(&priv->fs.ethtool.rules);
 }
 
-static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type)
+static int flow_type_to_traffic_type(u32 flow_type)
 {
        switch (flow_type) {
        case TCP_V4_FLOW:
-               return  MLX5E_TT_IPV4_TCP;
+               return MLX5_TT_IPV4_TCP;
        case TCP_V6_FLOW:
-               return MLX5E_TT_IPV6_TCP;
+               return MLX5_TT_IPV6_TCP;
        case UDP_V4_FLOW:
-               return MLX5E_TT_IPV4_UDP;
+               return MLX5_TT_IPV4_UDP;
        case UDP_V6_FLOW:
-               return MLX5E_TT_IPV6_UDP;
+               return MLX5_TT_IPV6_UDP;
        case AH_V4_FLOW:
-               return MLX5E_TT_IPV4_IPSEC_AH;
+               return MLX5_TT_IPV4_IPSEC_AH;
        case AH_V6_FLOW:
-               return MLX5E_TT_IPV6_IPSEC_AH;
+               return MLX5_TT_IPV6_IPSEC_AH;
        case ESP_V4_FLOW:
-               return MLX5E_TT_IPV4_IPSEC_ESP;
+               return MLX5_TT_IPV4_IPSEC_ESP;
        case ESP_V6_FLOW:
-               return MLX5E_TT_IPV6_IPSEC_ESP;
+               return MLX5_TT_IPV6_IPSEC_ESP;
        case IPV4_FLOW:
-               return MLX5E_TT_IPV4;
+               return MLX5_TT_IPV4;
        case IPV6_FLOW:
-               return MLX5E_TT_IPV6;
+               return MLX5_TT_IPV6;
        default:
-               return MLX5E_NUM_INDIR_TIRS;
+               return -EINVAL;
        }
 }
 
 static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
                                  struct ethtool_rxnfc *nfc)
 {
-       int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
-       enum mlx5e_traffic_types tt;
        u8 rx_hash_field = 0;
-       void *in;
+       int err;
+       int tt;
 
        tt = flow_type_to_traffic_type(nfc->flow_type);
-       if (tt == MLX5E_NUM_INDIR_TIRS)
-               return -EINVAL;
+       if (tt < 0)
+               return tt;
 
        /*  RSS does not support anything other than hashing to queues
         *  on src IP, dest IP, TCP/UDP src port and TCP/UDP dest
@@ -848,35 +905,24 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
        if (nfc->data & RXH_L4_B_2_3)
                rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT;
 
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
-
        mutex_lock(&priv->state_lock);
-
-       if (rx_hash_field == priv->rss_params.rx_hash_fields[tt])
-               goto out;
-
-       priv->rss_params.rx_hash_fields[tt] = rx_hash_field;
-       mlx5e_modify_tirs_hash(priv, in);
-
-out:
+       err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field);
        mutex_unlock(&priv->state_lock);
-       kvfree(in);
-       return 0;
+
+       return err;
 }
 
 static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
                                  struct ethtool_rxnfc *nfc)
 {
-       enum mlx5e_traffic_types tt;
        u32 hash_field = 0;
+       int tt;
 
        tt = flow_type_to_traffic_type(nfc->flow_type);
-       if (tt == MLX5E_NUM_INDIR_TIRS)
-               return -EINVAL;
+       if (tt < 0)
+               return tt;
 
-       hash_field = priv->rss_params.rx_hash_fields[tt];
+       hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt);
        nfc->data = 0;
 
        if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP)
@@ -898,7 +944,7 @@ int mlx5e_ethtool_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 
        switch (cmd->cmd) {
        case ETHTOOL_SRXCLSRLINS:
-               err = mlx5e_ethtool_flow_replace(priv, &cmd->fs);
+               err = mlx5e_ethtool_flow_replace(priv, &cmd->fs, cmd->rss_context);
                break;
        case ETHTOOL_SRXCLSRLDEL:
                err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location);
index 24f919e..47efd85 100644 (file)
@@ -1619,7 +1619,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
                                  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
 
        MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
-       MLX5_SET(cqc,   cqc, c_eqn,         eqn);
+       MLX5_SET(cqc,   cqc, c_eqn_or_apu_element, eqn);
        MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
        MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
                                            MLX5_ADAPTER_PAGE_SHIFT);
@@ -1711,7 +1711,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c,
 {
        int err, tc;
 
-       for (tc = 0; tc < params->num_tc; tc++) {
+       for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) {
                int txq_ix = c->ix + tc * params->num_channels;
 
                err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
@@ -1992,7 +1992,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        c->pdev     = mlx5_core_dma_dev(priv->mdev);
        c->netdev   = priv->netdev;
        c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
-       c->num_tc   = params->num_tc;
+       c->num_tc   = mlx5e_get_dcb_num_tc(params);
        c->xdp      = !!params->xdp_prog;
        c->stats    = &priv->channel_stats[ix].ch;
        c->aff_mask = irq_get_effective_affinity_mask(irq);
@@ -2185,400 +2185,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
        chs->num = 0;
 }
 
-static int
-mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt)
-{
-       struct mlx5_core_dev *mdev = priv->mdev;
-       void *rqtc;
-       int inlen;
-       int err;
-       u32 *in;
-       int i;
-
-       inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
-
-       rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
-
-       MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
-       MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
-
-       for (i = 0; i < sz; i++)
-               MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn);
-
-       err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn);
-       if (!err)
-               rqt->enabled = true;
-
-       kvfree(in);
-       return err;
-}
-
-void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt)
-{
-       rqt->enabled = false;
-       mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn);
-}
-
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
-{
-       struct mlx5e_rqt *rqt = &priv->indir_rqt;
-       int err;
-
-       err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt);
-       if (err)
-               mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err);
-       return err;
-}
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
-{
-       int err;
-       int ix;
-
-       for (ix = 0; ix < n; ix++) {
-               err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt);
-               if (unlikely(err))
-                       goto err_destroy_rqts;
-       }
-
-       return 0;
-
-err_destroy_rqts:
-       mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err);
-       for (ix--; ix >= 0; ix--)
-               mlx5e_destroy_rqt(priv, &tirs[ix].rqt);
-
-       return err;
-}
-
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
-{
-       int i;
-
-       for (i = 0; i < n; i++)
-               mlx5e_destroy_rqt(priv, &tirs[i].rqt);
-}
-
-static int mlx5e_rx_hash_fn(int hfunc)
-{
-       return (hfunc == ETH_RSS_HASH_TOP) ?
-              MLX5_RX_HASH_FN_TOEPLITZ :
-              MLX5_RX_HASH_FN_INVERTED_XOR8;
-}
-
-int mlx5e_bits_invert(unsigned long a, int size)
-{
-       int inv = 0;
-       int i;
-
-       for (i = 0; i < size; i++)
-               inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i;
-
-       return inv;
-}
-
-static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz,
-                               struct mlx5e_redirect_rqt_param rrp, void *rqtc)
-{
-       int i;
-
-       for (i = 0; i < sz; i++) {
-               u32 rqn;
-
-               if (rrp.is_rss) {
-                       int ix = i;
-
-                       if (rrp.rss.hfunc == ETH_RSS_HASH_XOR)
-                               ix = mlx5e_bits_invert(i, ilog2(sz));
-
-                       ix = priv->rss_params.indirection_rqt[ix];
-                       rqn = rrp.rss.channels->c[ix]->rq.rqn;
-               } else {
-                       rqn = rrp.rqn;
-               }
-               MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
-       }
-}
-
-int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
-                      struct mlx5e_redirect_rqt_param rrp)
-{
-       struct mlx5_core_dev *mdev = priv->mdev;
-       void *rqtc;
-       int inlen;
-       u32 *in;
-       int err;
-
-       inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz;
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
-
-       rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
-
-       MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
-       MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1);
-       mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc);
-       err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen);
-
-       kvfree(in);
-       return err;
-}
-
-static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix,
-                               struct mlx5e_redirect_rqt_param rrp)
-{
-       if (!rrp.is_rss)
-               return rrp.rqn;
-
-       if (ix >= rrp.rss.channels->num)
-               return priv->drop_rq.rqn;
-
-       return rrp.rss.channels->c[ix]->rq.rqn;
-}
-
-static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
-                               struct mlx5e_redirect_rqt_param rrp,
-                               struct mlx5e_redirect_rqt_param *ptp_rrp)
-{
-       u32 rqtn;
-       int ix;
-
-       if (priv->indir_rqt.enabled) {
-               /* RSS RQ table */
-               rqtn = priv->indir_rqt.rqtn;
-               mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
-       }
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               struct mlx5e_redirect_rqt_param direct_rrp = {
-                       .is_rss = false,
-                       {
-                               .rqn    = mlx5e_get_direct_rqn(priv, ix, rrp)
-                       },
-               };
-
-               /* Direct RQ Tables */
-               if (!priv->direct_tir[ix].rqt.enabled)
-                       continue;
-
-               rqtn = priv->direct_tir[ix].rqt.rqtn;
-               mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
-       }
-       if (ptp_rrp) {
-               rqtn = priv->ptp_tir.rqt.rqtn;
-               mlx5e_redirect_rqt(priv, rqtn, 1, *ptp_rrp);
-       }
-}
-
-static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
-                                           struct mlx5e_channels *chs)
-{
-       bool rx_ptp_support = priv->profile->rx_ptp_support;
-       struct mlx5e_redirect_rqt_param *ptp_rrp_p = NULL;
-       struct mlx5e_redirect_rqt_param rrp = {
-               .is_rss        = true,
-               {
-                       .rss = {
-                               .channels  = chs,
-                               .hfunc     = priv->rss_params.hfunc,
-                       }
-               },
-       };
-       struct mlx5e_redirect_rqt_param ptp_rrp;
-
-       if (rx_ptp_support) {
-               u32 ptp_rqn;
-
-               ptp_rrp.is_rss = false;
-               ptp_rrp.rqn = mlx5e_ptp_get_rqn(priv->channels.ptp, &ptp_rqn) ?
-                             priv->drop_rq.rqn : ptp_rqn;
-               ptp_rrp_p = &ptp_rrp;
-       }
-       mlx5e_redirect_rqts(priv, rrp, ptp_rrp_p);
-}
-
-static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
-{
-       bool rx_ptp_support = priv->profile->rx_ptp_support;
-       struct mlx5e_redirect_rqt_param drop_rrp = {
-               .is_rss = false,
-               {
-                       .rqn = priv->drop_rq.rqn,
-               },
-       };
-
-       mlx5e_redirect_rqts(priv, drop_rrp, rx_ptp_support ? &drop_rrp : NULL);
-}
-
-static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = {
-       [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
-                               .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
-                               .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
-       },
-       [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
-                               .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
-                               .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
-       },
-       [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
-                               .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
-                               .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
-       },
-       [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
-                               .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
-                               .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
-       },
-       [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
-                                    .l4_prot_type = 0,
-                                    .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
-       },
-       [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
-                                    .l4_prot_type = 0,
-                                    .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
-       },
-       [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
-                                     .l4_prot_type = 0,
-                                     .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
-       },
-       [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
-                                     .l4_prot_type = 0,
-                                     .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
-       },
-       [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
-                           .l4_prot_type = 0,
-                           .rx_hash_fields = MLX5_HASH_IP,
-       },
-       [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
-                           .l4_prot_type = 0,
-                           .rx_hash_fields = MLX5_HASH_IP,
-       },
-};
-
-struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt)
-{
-       return tirc_default_config[tt];
-}
-
-static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc)
-{
-       if (!params->lro_en)
-               return;
-
-#define ROUGH_MAX_L2_L3_HDR_SZ 256
-
-       MLX5_SET(tirc, tirc, lro_enable_mask,
-                MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
-                MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
-       MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
-                (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
-       MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout);
-}
-
-void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
-                                   const struct mlx5e_tirc_config *ttconfig,
-                                   void *tirc, bool inner)
-{
-       void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) :
-                            MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
-
-       MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc));
-       if (rss_params->hfunc == ETH_RSS_HASH_TOP) {
-               void *rss_key = MLX5_ADDR_OF(tirc, tirc,
-                                            rx_hash_toeplitz_key);
-               size_t len = MLX5_FLD_SZ_BYTES(tirc,
-                                              rx_hash_toeplitz_key);
-
-               MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
-               memcpy(rss_key, rss_params->toeplitz_hash_key, len);
-       }
-       MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
-                ttconfig->l3_prot_type);
-       MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
-                ttconfig->l4_prot_type);
-       MLX5_SET(rx_hash_field_select, hfso, selected_fields,
-                ttconfig->rx_hash_fields);
-}
-
-static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig,
-                                       enum mlx5e_traffic_types tt,
-                                       u32 rx_hash_fields)
-{
-       *ttconfig                = tirc_default_config[tt];
-       ttconfig->rx_hash_fields = rx_hash_fields;
-}
-
-void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in)
-{
-       void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
-       struct mlx5e_rss_params *rss = &priv->rss_params;
-       struct mlx5_core_dev *mdev = priv->mdev;
-       int ctxlen = MLX5_ST_SZ_BYTES(tirc);
-       struct mlx5e_tirc_config ttconfig;
-       int tt;
-
-       MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               memset(tirc, 0, ctxlen);
-               mlx5e_update_rx_hash_fields(&ttconfig, tt,
-                                           rss->rx_hash_fields[tt]);
-               mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false);
-               mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in);
-       }
-
-       /* Verify inner tirs resources allocated */
-       if (!priv->inner_indir_tir[0].tirn)
-               return;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               memset(tirc, 0, ctxlen);
-               mlx5e_update_rx_hash_fields(&ttconfig, tt,
-                                           rss->rx_hash_fields[tt]);
-               mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true);
-               mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in);
-       }
-}
-
 static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
 {
-       struct mlx5_core_dev *mdev = priv->mdev;
-
-       void *in;
-       void *tirc;
-       int inlen;
-       int err;
-       int tt;
-       int ix;
-
-       inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
-
-       MLX5_SET(modify_tir_in, in, bitmask.lro, 1);
-       tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+       struct mlx5e_rx_res *res = priv->rx_res;
+       struct mlx5e_lro_param lro_param;
 
-       mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
+       lro_param = mlx5e_get_lro_param(&priv->channels.params);
 
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in);
-               if (err)
-                       goto free_in;
-       }
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, in);
-               if (err)
-                       goto free_in;
-       }
-
-free_in:
-       kvfree(in);
-
-       return err;
+       return mlx5e_rx_res_lro_set_param(res, &lro_param);
 }
 
 static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
@@ -2649,22 +2263,34 @@ void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
                                ETH_MAX_MTU);
 }
 
-static void mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc)
+static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc,
+                               struct tc_mqprio_qopt_offload *mqprio)
 {
-       int tc;
+       int tc, err;
 
        netdev_reset_tc(netdev);
 
        if (ntc == 1)
-               return;
+               return 0;
 
-       netdev_set_num_tc(netdev, ntc);
+       err = netdev_set_num_tc(netdev, ntc);
+       if (err) {
+               netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc);
+               return err;
+       }
 
-       /* Map netdev TCs to offset 0
-        * We have our own UP to TXQ mapping for QoS
-        */
-       for (tc = 0; tc < ntc; tc++)
-               netdev_set_tc_queue(netdev, tc, nch, 0);
+       for (tc = 0; tc < ntc; tc++) {
+               u16 count, offset;
+
+               /* For DCB mode, map netdev TCs to offset 0
+                * We have our own UP to TXQ mapping for QoS
+                */
+               count = mqprio ? mqprio->qopt.count[tc] : nch;
+               offset = mqprio ? mqprio->qopt.offset[tc] : 0;
+               netdev_set_tc_queue(netdev, tc, count, offset);
+       }
+
+       return 0;
 }
 
 int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
@@ -2674,7 +2300,7 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
        qos_queues = mlx5e_qos_cur_leaf_nodes(priv);
 
        nch = priv->channels.params.num_channels;
-       ntc = priv->channels.params.num_tc;
+       ntc = mlx5e_get_dcb_num_tc(&priv->channels.params);
        num_txqs = nch * ntc + qos_queues;
        if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS))
                num_txqs += ntc;
@@ -2698,11 +2324,12 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
        old_ntc = netdev->num_tc ? : 1;
 
        nch = priv->channels.params.num_channels;
-       ntc = priv->channels.params.num_tc;
+       ntc = mlx5e_get_dcb_num_tc(&priv->channels.params);
        num_rxqs = nch * priv->profile->rq_groups;
 
-       mlx5e_netdev_set_tcs(netdev, nch, ntc);
-
+       err = mlx5e_netdev_set_tcs(netdev, nch, ntc, NULL);
+       if (err)
+               goto err_out;
        err = mlx5e_update_tx_netdev_queues(priv);
        if (err)
                goto err_tcs;
@@ -2723,7 +2350,8 @@ err_txqs:
        WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs));
 
 err_tcs:
-       mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc);
+       mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc, NULL);
+err_out:
        return err;
 }
 
@@ -2759,9 +2387,9 @@ int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
 
        mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params);
 
-       if (!netif_is_rxfh_configured(priv->netdev))
-               mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt,
-                                             MLX5E_INDIR_RQT_SIZE, count);
+       /* This function may be called on attach, before priv->rx_res is created. */
+       if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res)
+               mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count);
 
        return 0;
 }
@@ -2773,7 +2401,7 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
        int i, ch, tc, num_tc;
 
        ch = priv->channels.num;
-       num_tc = priv->channels.params.num_tc;
+       num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params);
 
        for (i = 0; i < ch; i++) {
                for (tc = 0; tc < num_tc; tc++) {
@@ -2804,7 +2432,7 @@ static void mlx5e_update_num_tc_x_num_ch(struct mlx5e_priv *priv)
 {
        /* Sync with mlx5e_select_queue. */
        WRITE_ONCE(priv->num_tc_x_num_ch,
-                  priv->channels.params.num_tc * priv->channels.num);
+                  mlx5e_get_dcb_num_tc(&priv->channels.params) * priv->channels.num);
 }
 
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
@@ -2820,16 +2448,15 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
                mlx5e_add_sqs_fwd_rules(priv);
 
        mlx5e_wait_channels_min_rx_wqes(&priv->channels);
-       mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
 
-       mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
+       if (priv->rx_res)
+               mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
 }
 
 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
 {
-       mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
-
-       mlx5e_redirect_rqts_to_drop(priv);
+       if (priv->rx_res)
+               mlx5e_rx_res_channels_deactivate(priv->rx_res);
 
        if (mlx5e_is_vport_rep(priv))
                mlx5e_remove_sqs_fwd_rules(priv);
@@ -3204,224 +2831,152 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
        mlx5e_destroy_tises(priv);
 }
 
-static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv,
-                                            u32 rqtn, u32 *tirc)
-{
-       MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn);
-       MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
-       MLX5_SET(tirc, tirc, indirect_table, rqtn);
-       MLX5_SET(tirc, tirc, tunneled_offload_en,
-                priv->channels.params.tunneled_offload_en);
-
-       mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
-}
-
-static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv,
-                                     enum mlx5e_traffic_types tt,
-                                     u32 *tirc)
+static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
 {
-       mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc);
-       mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
-                                      &tirc_default_config[tt], tirc, false);
-}
+       int err = 0;
+       int i;
 
-static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc)
-{
-       mlx5e_build_indir_tir_ctx_common(priv, rqtn, tirc);
-       MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
-}
+       for (i = 0; i < chs->num; i++) {
+               err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
+               if (err)
+                       return err;
+       }
 
-static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv,
-                                           enum mlx5e_traffic_types tt,
-                                           u32 *tirc)
-{
-       mlx5e_build_indir_tir_ctx_common(priv, priv->indir_rqt.rqtn, tirc);
-       mlx5e_build_indir_tir_ctx_hash(&priv->rss_params,
-                                      &tirc_default_config[tt], tirc, true);
+       return 0;
 }
 
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
+static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
 {
-       struct mlx5e_tir *tir;
-       void *tirc;
-       int inlen;
-       int i = 0;
        int err;
-       u32 *in;
-       int tt;
-
-       inlen = MLX5_ST_SZ_BYTES(create_tir_in);
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               memset(in, 0, inlen);
-               tir = &priv->indir_tir[tt];
-               tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-               mlx5e_build_indir_tir_ctx(priv, tt, tirc);
-               err = mlx5e_create_tir(priv->mdev, tir, in);
-               if (err) {
-                       mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
-                       goto err_destroy_inner_tirs;
-               }
-       }
-
-       if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
-               goto out;
+       int i;
 
-       for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
-               memset(in, 0, inlen);
-               tir = &priv->inner_indir_tir[i];
-               tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-               mlx5e_build_inner_indir_tir_ctx(priv, i, tirc);
-               err = mlx5e_create_tir(priv->mdev, tir, in);
-               if (err) {
-                       mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
-                       goto err_destroy_inner_tirs;
-               }
+       for (i = 0; i < chs->num; i++) {
+               err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
+               if (err)
+                       return err;
        }
-
-out:
-       kvfree(in);
+       if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
+               return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
 
        return 0;
-
-err_destroy_inner_tirs:
-       for (i--; i >= 0; i--)
-               mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
-
-       for (tt--; tt >= 0; tt--)
-               mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]);
-
-       kvfree(in);
-
-       return err;
 }
 
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
+static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
+                                    struct tc_mqprio_qopt *mqprio)
 {
-       struct mlx5e_tir *tir;
-       void *tirc;
-       int inlen;
-       int err = 0;
-       u32 *in;
-       int ix;
-
-       inlen = MLX5_ST_SZ_BYTES(create_tir_in);
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in)
-               return -ENOMEM;
+       struct mlx5e_params new_params;
+       u8 tc = mqprio->num_tc;
+       int err;
 
-       for (ix = 0; ix < n; ix++) {
-               memset(in, 0, inlen);
-               tir = &tirs[ix];
-               tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-               mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc);
-               err = mlx5e_create_tir(priv->mdev, tir, in);
-               if (unlikely(err))
-                       goto err_destroy_ch_tirs;
-       }
+       mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 
-       goto out;
+       if (tc && tc != MLX5E_MAX_NUM_TC)
+               return -EINVAL;
 
-err_destroy_ch_tirs:
-       mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err);
-       for (ix--; ix >= 0; ix--)
-               mlx5e_destroy_tir(priv->mdev, &tirs[ix]);
+       new_params = priv->channels.params;
+       new_params.mqprio.mode = TC_MQPRIO_MODE_DCB;
+       new_params.mqprio.num_tc = tc ? tc : 1;
 
-out:
-       kvfree(in);
+       err = mlx5e_safe_switch_params(priv, &new_params,
+                                      mlx5e_num_channels_changed_ctx, NULL, true);
 
+       priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
+                                   mlx5e_get_dcb_num_tc(&priv->channels.params));
        return err;
 }
 
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
+static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv,
+                                        struct tc_mqprio_qopt_offload *mqprio)
 {
+       struct net_device *netdev = priv->netdev;
+       int agg_count = 0;
        int i;
 
-       for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
-               mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
-
-       /* Verify inner tirs resources allocated */
-       if (!priv->inner_indir_tir[0].tirn)
-               return;
-
-       for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
-               mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
-}
-
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
-{
-       int i;
+       if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 ||
+           mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC)
+               return -EINVAL;
 
-       for (i = 0; i < n; i++)
-               mlx5e_destroy_tir(priv->mdev, &tirs[i]);
-}
+       for (i = 0; i < mqprio->qopt.num_tc; i++) {
+               if (!mqprio->qopt.count[i]) {
+                       netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i);
+                       return -EINVAL;
+               }
+               if (mqprio->min_rate[i]) {
+                       netdev_err(netdev, "Min tx rate is not supported\n");
+                       return -EINVAL;
+               }
+               if (mqprio->max_rate[i]) {
+                       netdev_err(netdev, "Max tx rate is not supported\n");
+                       return -EINVAL;
+               }
 
-static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
-{
-       int err = 0;
-       int i;
+               if (mqprio->qopt.offset[i] != agg_count) {
+                       netdev_err(netdev, "Discontinuous queues config is not supported\n");
+                       return -EINVAL;
+               }
+               agg_count += mqprio->qopt.count[i];
+       }
 
-       for (i = 0; i < chs->num; i++) {
-               err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
-               if (err)
-                       return err;
+       if (priv->channels.params.num_channels < agg_count) {
+               netdev_err(netdev, "Num of queues (%d) exceeds available (%d)\n",
+                          agg_count, priv->channels.params.num_channels);
+               return -EINVAL;
        }
 
        return 0;
 }
 
-static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
+static int mlx5e_mqprio_channel_set_tcs_ctx(struct mlx5e_priv *priv, void *ctx)
 {
-       int err;
-       int i;
+       struct tc_mqprio_qopt_offload *mqprio = (struct tc_mqprio_qopt_offload *)ctx;
+       struct net_device *netdev = priv->netdev;
+       u8 num_tc;
 
-       for (i = 0; i < chs->num; i++) {
-               err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
-               if (err)
-                       return err;
-       }
-       if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
-               return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
+       if (priv->channels.params.mqprio.mode != TC_MQPRIO_MODE_CHANNEL)
+               return -EINVAL;
+
+       num_tc = priv->channels.params.mqprio.num_tc;
+       mlx5e_netdev_set_tcs(netdev, 0, num_tc, mqprio);
 
        return 0;
 }
 
-static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
-                                struct tc_mqprio_qopt *mqprio)
+static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
+                                        struct tc_mqprio_qopt_offload *mqprio)
 {
        struct mlx5e_params new_params;
-       u8 tc = mqprio->num_tc;
-       int err = 0;
+       int err;
 
-       mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+       err = mlx5e_mqprio_channel_validate(priv, mqprio);
+       if (err)
+               return err;
 
-       if (tc && tc != MLX5E_MAX_NUM_TC)
-               return -EINVAL;
+       new_params = priv->channels.params;
+       new_params.mqprio.mode = TC_MQPRIO_MODE_CHANNEL;
+       new_params.mqprio.num_tc = mqprio->qopt.num_tc;
+       err = mlx5e_safe_switch_params(priv, &new_params,
+                                      mlx5e_mqprio_channel_set_tcs_ctx, mqprio, true);
 
-       mutex_lock(&priv->state_lock);
+       return err;
+}
 
+static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
+                                struct tc_mqprio_qopt_offload *mqprio)
+{
        /* MQPRIO is another toplevel qdisc that can't be attached
         * simultaneously with the offloaded HTB.
         */
-       if (WARN_ON(priv->htb.maj_id)) {
-               err = -EINVAL;
-               goto out;
-       }
-
-       new_params = priv->channels.params;
-       new_params.num_tc = tc ? tc : 1;
-
-       err = mlx5e_safe_switch_params(priv, &new_params,
-                                      mlx5e_num_channels_changed_ctx, NULL, true);
+       if (WARN_ON(priv->htb.maj_id))
+               return -EINVAL;
 
-out:
-       priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
-                                   priv->channels.params.num_tc);
-       mutex_unlock(&priv->state_lock);
-       return err;
+       switch (mqprio->mode) {
+       case TC_MQPRIO_MODE_DCB:
+               return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt);
+       case TC_MQPRIO_MODE_CHANNEL:
+               return mlx5e_setup_tc_mqprio_channel(priv, mqprio);
+       default:
+               return -EOPNOTSUPP;
+       }
 }
 
 static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb)
@@ -3445,8 +3000,7 @@ static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offloa
                return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid,
                                               htb->rate, htb->ceil, htb->extack);
        case TC_HTB_LEAF_DEL:
-               return mlx5e_htb_leaf_del(priv, htb->classid, &htb->moved_qid, &htb->qid,
-                                         htb->extack);
+               return mlx5e_htb_leaf_del(priv, &htb->classid, htb->extack);
        case TC_HTB_LEAF_DEL_LAST:
        case TC_HTB_LEAF_DEL_LAST_FORCE:
                return mlx5e_htb_leaf_del_last(priv, htb->classid,
@@ -3493,7 +3047,10 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
                                                  priv, priv, true);
        }
        case TC_SETUP_QDISC_MQPRIO:
-               return mlx5e_setup_tc_mqprio(priv, type_data);
+               mutex_lock(&priv->state_lock);
+               err = mlx5e_setup_tc_mqprio(priv, type_data);
+               mutex_unlock(&priv->state_lock);
+               return err;
        case TC_SETUP_QDISC_HTB:
                mutex_lock(&priv->state_lock);
                err = mlx5e_setup_tc_htb(priv, type_data);
@@ -4582,7 +4139,7 @@ const struct net_device_ops mlx5e_netdev_ops = {
        .ndo_set_features        = mlx5e_set_features,
        .ndo_fix_features        = mlx5e_fix_features,
        .ndo_change_mtu          = mlx5e_change_nic_mtu,
-       .ndo_do_ioctl            = mlx5e_ioctl,
+       .ndo_eth_ioctl            = mlx5e_ioctl,
        .ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
        .ndo_features_check      = mlx5e_features_check,
        .ndo_tx_timeout          = mlx5e_tx_timeout,
@@ -4611,15 +4168,6 @@ const struct net_device_ops mlx5e_netdev_ops = {
        .ndo_get_devlink_port    = mlx5e_get_devlink_port,
 };
 
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
-                                  int num_channels)
-{
-       int i;
-
-       for (i = 0; i < len; i++)
-               indirection_rqt[i] = i % num_channels;
-}
-
 static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
 {
        int i;
@@ -4632,24 +4180,8 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo
        return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
 }
 
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
-                           u16 num_channels)
-{
-       enum mlx5e_traffic_types tt;
-
-       rss_params->hfunc = ETH_RSS_HASH_TOP;
-       netdev_rss_key_fill(rss_params->toeplitz_hash_key,
-                           sizeof(rss_params->toeplitz_hash_key));
-       mlx5e_build_default_indir_rqt(rss_params->indirection_rqt,
-                                     MLX5E_INDIR_RQT_SIZE, num_channels);
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               rss_params->rx_hash_fields[tt] =
-                       tirc_default_config[tt].rx_hash_fields;
-}
-
 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
 {
-       struct mlx5e_rss_params *rss_params = &priv->rss_params;
        struct mlx5e_params *params = &priv->channels.params;
        struct mlx5_core_dev *mdev = priv->mdev;
        u8 rx_cq_period_mode;
@@ -4660,12 +4192,12 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
        params->hard_mtu = MLX5E_ETH_HARD_MTU;
        params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2,
                                     priv->max_nch);
-       params->num_tc       = 1;
+       params->mqprio.num_tc = 1;
 
        /* Set an initial non-zero value, so that mlx5e_select_queue won't
         * divide by zero if called before first activating channels.
         */
-       priv->num_tc_x_num_ch = params->num_channels * params->num_tc;
+       priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc;
 
        /* SQ */
        params->log_sq_size = is_kdump_kernel() ?
@@ -4709,10 +4241,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
        /* TX inline */
        mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
 
-       /* RSS */
-       mlx5e_build_rss_params(rss_params, params->num_channels);
-       params->tunneled_offload_en =
-               mlx5e_tunnel_inner_ft_supported(mdev);
+       params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev);
 
        /* AF_XDP */
        params->xsk = xsk;
@@ -4772,8 +4301,8 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
 {
        int tt;
 
-       for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
-               if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5e_get_proto_by_tunnel_type(tt)))
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt)))
                        return true;
        }
        return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
@@ -4812,7 +4341,14 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
        netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_TX;
        netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_RX;
 
+       /* Tunneled LRO is not supported in the driver, and the same RQs are
+        * shared between inner and outer TIRs, so the driver can't disable LRO
+        * for inner TIRs while having it enabled for outer TIRs. Due to this,
+        * block LRO altogether if the firmware declares tunneled LRO support.
+        */
        if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
+           !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) &&
+           !MLX5_CAP_ETH(mdev, tunnel_lro_gre) &&
            mlx5e_check_fragmented_striding_rq_cap(mdev))
                netdev->vlan_features    |= NETIF_F_LRO;
 
@@ -4939,7 +4475,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
                          struct net_device *netdev)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
-       struct devlink_port *dl_port;
        int err;
 
        mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu);
@@ -4955,19 +4490,13 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
        if (err)
                mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
 
-       dl_port = mlx5e_devlink_get_dl_port(priv);
-       if (dl_port->registered)
-               mlx5e_health_create_reporters(priv);
-
+       mlx5e_health_create_reporters(priv);
        return 0;
 }
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
-       struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
-
-       if (dl_port->registered)
-               mlx5e_health_destroy_reporters(priv);
+       mlx5e_health_destroy_reporters(priv);
        mlx5e_tls_cleanup(priv);
        mlx5e_ipsec_cleanup(priv);
 }
@@ -4975,9 +4504,14 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
-       u16 max_nch = priv->max_nch;
+       enum mlx5e_rx_res_features features;
+       struct mlx5e_lro_param lro_param;
        int err;
 
+       priv->rx_res = mlx5e_rx_res_alloc();
+       if (!priv->rx_res)
+               return -ENOMEM;
+
        mlx5e_create_q_counters(priv);
 
        err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -4986,42 +4520,20 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
                goto err_destroy_q_counters;
        }
 
-       err = mlx5e_create_indirect_rqt(priv);
+       features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
+       if (priv->channels.params.tunneled_offload_en)
+               features |= MLX5E_RX_RES_FEATURE_INNER_FT;
+       lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
+                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->channels.params.num_channels);
        if (err)
                goto err_close_drop_rq;
 
-       err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch);
-       if (err)
-               goto err_destroy_indirect_rqts;
-
-       err = mlx5e_create_indirect_tirs(priv, true);
-       if (err)
-               goto err_destroy_direct_rqts;
-
-       err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch);
-       if (err)
-               goto err_destroy_indirect_tirs;
-
-       err = mlx5e_create_direct_rqts(priv, priv->xsk_tir, max_nch);
-       if (unlikely(err))
-               goto err_destroy_direct_tirs;
-
-       err = mlx5e_create_direct_tirs(priv, priv->xsk_tir, max_nch);
-       if (unlikely(err))
-               goto err_destroy_xsk_rqts;
-
-       err = mlx5e_create_direct_rqts(priv, &priv->ptp_tir, 1);
-       if (err)
-               goto err_destroy_xsk_tirs;
-
-       err = mlx5e_create_direct_tirs(priv, &priv->ptp_tir, 1);
-       if (err)
-               goto err_destroy_ptp_rqt;
-
        err = mlx5e_create_flow_steering(priv);
        if (err) {
                mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
-               goto err_destroy_ptp_direct_tir;
+               goto err_destroy_rx_res;
        }
 
        err = mlx5e_tc_nic_init(priv);
@@ -5042,46 +4554,27 @@ err_tc_nic_cleanup:
        mlx5e_tc_nic_cleanup(priv);
 err_destroy_flow_steering:
        mlx5e_destroy_flow_steering(priv);
-err_destroy_ptp_direct_tir:
-       mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1);
-err_destroy_ptp_rqt:
-       mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1);
-err_destroy_xsk_tirs:
-       mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch);
-err_destroy_xsk_rqts:
-       mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch);
-err_destroy_direct_tirs:
-       mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_tirs:
-       mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
-       mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_rqts:
-       mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_destroy_rx_res:
+       mlx5e_rx_res_destroy(priv->rx_res);
 err_close_drop_rq:
        mlx5e_close_drop_rq(&priv->drop_rq);
 err_destroy_q_counters:
        mlx5e_destroy_q_counters(priv);
+       mlx5e_rx_res_free(priv->rx_res);
+       priv->rx_res = NULL;
        return err;
 }
 
 static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
 {
-       u16 max_nch = priv->max_nch;
-
        mlx5e_accel_cleanup_rx(priv);
        mlx5e_tc_nic_cleanup(priv);
        mlx5e_destroy_flow_steering(priv);
-       mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1);
-       mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1);
-       mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch);
-       mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch);
-       mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
-       mlx5e_destroy_indirect_tirs(priv);
-       mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
-       mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+       mlx5e_rx_res_destroy(priv->rx_res);
        mlx5e_close_drop_rq(&priv->drop_rq);
        mlx5e_destroy_q_counters(priv);
+       mlx5e_rx_res_free(priv->rx_res);
+       priv->rx_res = NULL;
 }
 
 static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
index bf94bcb..ae71a17 100644 (file)
@@ -49,6 +49,7 @@
 #include "en/devlink.h"
 #include "fs_core.h"
 #include "lib/mlx5.h"
+#include "lib/devcom.h"
 #define CREATE_TRACE_POINTS
 #include "diag/en_rep_tracepoint.h"
 #include "en_accel/ipsec.h"
@@ -250,7 +251,9 @@ static int mlx5e_rep_set_channels(struct net_device *dev,
 }
 
 static int mlx5e_rep_get_coalesce(struct net_device *netdev,
-                                 struct ethtool_coalesce *coal)
+                                 struct ethtool_coalesce *coal,
+                                 struct kernel_ethtool_coalesce *kernel_coal,
+                                 struct netlink_ext_ack *extack)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -258,7 +261,9 @@ static int mlx5e_rep_get_coalesce(struct net_device *netdev,
 }
 
 static int mlx5e_rep_set_coalesce(struct net_device *netdev,
-                                 struct ethtool_coalesce *coal)
+                                 struct ethtool_coalesce *coal,
+                                 struct kernel_ethtool_coalesce *kernel_coal,
+                                 struct netlink_ext_ack *extack)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -310,6 +315,8 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
        rpriv = mlx5e_rep_to_rep_priv(rep);
        list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
                mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+               if (rep_sq->send_to_vport_rule_peer)
+                       mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
                list_del(&rep_sq->list);
                kfree(rep_sq);
        }
@@ -319,6 +326,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                                 struct mlx5_eswitch_rep *rep,
                                 u32 *sqns_array, int sqns_num)
 {
+       struct mlx5_eswitch *peer_esw = NULL;
        struct mlx5_flow_handle *flow_rule;
        struct mlx5e_rep_priv *rpriv;
        struct mlx5e_rep_sq *rep_sq;
@@ -329,6 +337,10 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                return 0;
 
        rpriv = mlx5e_rep_to_rep_priv(rep);
+       if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+               peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom,
+                                                    MLX5_DEVCOM_ESW_OFFLOADS);
+
        for (i = 0; i < sqns_num; i++) {
                rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
                if (!rep_sq) {
@@ -337,7 +349,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                }
 
                /* Add re-inject rule to the PF/representor sqs */
-               flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep,
+               flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep,
                                                                sqns_array[i]);
                if (IS_ERR(flow_rule)) {
                        err = PTR_ERR(flow_rule);
@@ -345,12 +357,34 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                        goto out_err;
                }
                rep_sq->send_to_vport_rule = flow_rule;
+               rep_sq->sqn = sqns_array[i];
+
+               if (peer_esw) {
+                       flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
+                                                                       rep, sqns_array[i]);
+                       if (IS_ERR(flow_rule)) {
+                               err = PTR_ERR(flow_rule);
+                               mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+                               kfree(rep_sq);
+                               goto out_err;
+                       }
+                       rep_sq->send_to_vport_rule_peer = flow_rule;
+               }
+
                list_add(&rep_sq->list, &rpriv->vport_sqs_list);
        }
+
+       if (peer_esw)
+               mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
        return 0;
 
 out_err:
        mlx5e_sqs2vport_stop(esw, rep);
+
+       if (peer_esw)
+               mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
        return err;
 }
 
@@ -364,7 +398,8 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
        int err = -ENOMEM;
        u32 *sqs;
 
-       sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(*sqs), GFP_KERNEL);
+       sqs = kcalloc(priv->channels.num * mlx5e_get_dcb_num_tc(&priv->channels.params),
+                     sizeof(*sqs), GFP_KERNEL);
        if (!sqs)
                goto out;
 
@@ -581,13 +616,10 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
        params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
        mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
 
-       params->num_tc                = 1;
+       params->mqprio.num_tc       = 1;
        params->tunneled_offload_en = false;
 
        mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
-
-       /* RSS */
-       mlx5e_build_rss_params(&priv->rss_params, params->num_channels);
 }
 
 static void mlx5e_build_rep_netdev(struct net_device *netdev,
@@ -651,25 +683,23 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)
        struct mlx5e_rep_priv *rpriv = priv->ppriv;
        struct mlx5_eswitch_rep *rep = rpriv->rep;
        struct ttc_params ttc_params = {};
-       int tt, err;
+       int err;
 
        priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
                                              MLX5_FLOW_NAMESPACE_KERNEL);
 
        /* The inner_ttc in the ttc params is intentionally not set */
-       ttc_params.any_tt_tirn = priv->direct_tir[0].tirn;
-       mlx5e_set_ttc_ft_params(&ttc_params);
+       mlx5e_set_ttc_params(priv, &ttc_params, false);
 
        if (rep->vport != MLX5_VPORT_UPLINK)
                /* To give uplik rep TTC a lower level for chaining from root ft */
                ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1;
 
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
-
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
-       if (err) {
-               netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err);
+       priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+       if (IS_ERR(priv->fs.ttc)) {
+               err = PTR_ERR(priv->fs.ttc);
+               netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n",
+                          err);
                return err;
        }
        return 0;
@@ -687,7 +717,7 @@ static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv)
                /* non uplik reps will skip any bypass tables and go directly to
                 * their own ttc
                 */
-               rpriv->root_ft = priv->fs.ttc.ft.t;
+               rpriv->root_ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
                return 0;
        }
 
@@ -760,9 +790,13 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
 static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
-       u16 max_nch = priv->max_nch;
+       struct mlx5e_lro_param lro_param;
        int err;
 
+       priv->rx_res = mlx5e_rx_res_alloc();
+       if (!priv->rx_res)
+               return -ENOMEM;
+
        mlx5e_init_l2_addr(priv);
 
        err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -771,25 +805,16 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
                return err;
        }
 
-       err = mlx5e_create_indirect_rqt(priv);
+       lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->channels.params.num_channels);
        if (err)
                goto err_close_drop_rq;
 
-       err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch);
-       if (err)
-               goto err_destroy_indirect_rqts;
-
-       err = mlx5e_create_indirect_tirs(priv, false);
-       if (err)
-               goto err_destroy_direct_rqts;
-
-       err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch);
-       if (err)
-               goto err_destroy_indirect_tirs;
-
        err = mlx5e_create_rep_ttc_table(priv);
        if (err)
-               goto err_destroy_direct_tirs;
+               goto err_destroy_rx_res;
 
        err = mlx5e_create_rep_root_ft(priv);
        if (err)
@@ -806,33 +831,26 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 err_destroy_root_ft:
        mlx5e_destroy_rep_root_ft(priv);
 err_destroy_ttc_table:
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-err_destroy_direct_tirs:
-       mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_tirs:
-       mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
-       mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_rqts:
-       mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+       mlx5_destroy_ttc_table(priv->fs.ttc);
+err_destroy_rx_res:
+       mlx5e_rx_res_destroy(priv->rx_res);
 err_close_drop_rq:
        mlx5e_close_drop_rq(&priv->drop_rq);
+       mlx5e_rx_res_free(priv->rx_res);
+       priv->rx_res = NULL;
        return err;
 }
 
 static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 {
-       u16 max_nch = priv->max_nch;
-
        mlx5e_ethtool_cleanup_steering(priv);
        rep_vport_rx_rule_destroy(priv);
        mlx5e_destroy_rep_root_ft(priv);
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-       mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
-       mlx5e_destroy_indirect_tirs(priv);
-       mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
-       mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+       mlx5_destroy_ttc_table(priv->fs.ttc);
+       mlx5e_rx_res_destroy(priv->rx_res);
        mlx5e_close_drop_rq(&priv->drop_rq);
+       mlx5e_rx_res_free(priv->rx_res);
+       priv->rx_res = NULL;
 }
 
 static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv)
@@ -1264,10 +1282,64 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
        return rpriv->netdev;
 }
 
+static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5e_rep_priv *rpriv;
+       struct mlx5e_rep_sq *rep_sq;
+
+       rpriv = mlx5e_rep_to_rep_priv(rep);
+       list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+               if (!rep_sq->send_to_vport_rule_peer)
+                       continue;
+               mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
+               rep_sq->send_to_vport_rule_peer = NULL;
+       }
+}
+
+static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw,
+                                     struct mlx5_eswitch_rep *rep,
+                                     struct mlx5_eswitch *peer_esw)
+{
+       struct mlx5_flow_handle *flow_rule;
+       struct mlx5e_rep_priv *rpriv;
+       struct mlx5e_rep_sq *rep_sq;
+
+       rpriv = mlx5e_rep_to_rep_priv(rep);
+       list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+               if (rep_sq->send_to_vport_rule_peer)
+                       continue;
+               flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn);
+               if (IS_ERR(flow_rule))
+                       goto err_out;
+               rep_sq->send_to_vport_rule_peer = flow_rule;
+       }
+
+       return 0;
+err_out:
+       mlx5e_vport_rep_event_unpair(rep);
+       return PTR_ERR(flow_rule);
+}
+
+static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
+                                struct mlx5_eswitch_rep *rep,
+                                enum mlx5_switchdev_event event,
+                                void *data)
+{
+       int err = 0;
+
+       if (event == MLX5_SWITCHDEV_EVENT_PAIR)
+               err = mlx5e_vport_rep_event_pair(esw, rep, data);
+       else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR)
+               mlx5e_vport_rep_event_unpair(rep);
+
+       return err;
+}
+
 static const struct mlx5_eswitch_rep_ops rep_ops = {
        .load = mlx5e_vport_rep_load,
        .unload = mlx5e_vport_rep_unload,
-       .get_proto_dev = mlx5e_vport_rep_get_proto_dev
+       .get_proto_dev = mlx5e_vport_rep_get_proto_dev,
+       .event = mlx5e_vport_rep_event,
 };
 
 static int mlx5e_rep_probe(struct auxiliary_device *adev,
index 47a2dfb..48a203a 100644 (file)
@@ -60,6 +60,7 @@ struct mlx5e_neigh_update_table {
 struct mlx5_tc_ct_priv;
 struct mlx5e_rep_bond;
 struct mlx5e_tc_tun_encap;
+struct mlx5e_post_act;
 
 struct mlx5_rep_uplink_priv {
        /* Filters DB - instantiated by the uplink representor and shared by
@@ -88,8 +89,9 @@ struct mlx5_rep_uplink_priv {
        /* maps tun_enc_opts to a unique id*/
        struct mapping_ctx *tunnel_enc_opts_mapping;
 
+       struct mlx5e_post_act *post_act;
        struct mlx5_tc_ct_priv *ct_priv;
-       struct mlx5_esw_psample *esw_psample;
+       struct mlx5e_tc_psample *tc_psample;
 
        /* support eswitch vports bonding */
        struct mlx5e_rep_bond *bond;
@@ -146,7 +148,7 @@ struct mlx5e_neigh_hash_entry {
         */
        refcount_t refcnt;
 
-       /* Save the last reported time offloaded trafic pass over one of the
+       /* Save the last reported time offloaded traffic pass over one of the
         * neigh hash entry flows. Use it to periodically update the neigh
         * 'used' value and avoid neigh deleting by the kernel.
         */
@@ -207,6 +209,8 @@ struct mlx5e_encap_entry {
 
 struct mlx5e_rep_sq {
        struct mlx5_flow_handle *send_to_vport_rule;
+       struct mlx5_flow_handle *send_to_vport_rule_peer;
+       u32 sqn;
        struct list_head         list;
 };
 
index 6eba574..ba81647 100644 (file)
 #include <net/flow_offload.h>
 #include <net/sch_generic.h>
 #include <net/pkt_cls.h>
-#include <net/tc_act/tc_gact.h>
-#include <net/tc_act/tc_skbedit.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/device.h>
 #include <linux/rhashtable.h>
 #include <linux/refcount.h>
 #include <linux/completion.h>
-#include <net/tc_act/tc_mirred.h>
-#include <net/tc_act/tc_vlan.h>
-#include <net/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_csum.h>
-#include <net/tc_act/tc_mpls.h>
 #include <net/psample.h>
 #include <net/arp.h>
 #include <net/ipv6_stubs.h>
 #include <net/bareudp.h>
 #include <net/bonding.h>
 #include "en.h"
+#include "en/tc/post_act.h"
 #include "en_rep.h"
 #include "en/rep/tc.h"
 #include "en/rep/neigh.h"
@@ -66,7 +61,7 @@
 #include "en/mod_hdr.h"
 #include "en/tc_priv.h"
 #include "en/tc_tun_encap.h"
-#include "esw/sample.h"
+#include "en/tc/sample.h"
 #include "lib/devcom.h"
 #include "lib/geneve.h"
 #include "lib/fs_chains.h"
@@ -103,7 +98,7 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
        [MARK_TO_REG] = mark_to_reg_ct,
        [LABELS_TO_REG] = labels_to_reg_ct,
        [FTEID_TO_REG] = fteid_to_reg_ct,
-       /* For NIC rules we store the retore metadata directly
+       /* For NIC rules we store the restore metadata directly
         * into reg_b that is passed to SW since we don't
         * jump between steering domains.
         */
@@ -252,7 +247,7 @@ get_ct_priv(struct mlx5e_priv *priv)
 }
 
 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
-static struct mlx5_esw_psample *
+static struct mlx5e_tc_psample *
 get_sample_priv(struct mlx5e_priv *priv)
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -263,7 +258,7 @@ get_sample_priv(struct mlx5e_priv *priv)
                uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
                uplink_priv = &uplink_rpriv->uplink_priv;
 
-               return uplink_priv->esw_psample;
+               return uplink_priv->tc_psample;
        }
 
        return NULL;
@@ -340,12 +335,12 @@ struct mlx5e_hairpin {
        struct mlx5_core_dev *func_mdev;
        struct mlx5e_priv *func_priv;
        u32 tdn;
-       u32 tirn;
+       struct mlx5e_tir direct_tir;
 
        int num_channels;
        struct mlx5e_rqt indir_rqt;
-       u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
-       struct mlx5e_ttc_table ttc;
+       struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
+       struct mlx5_ttc_table *ttc;
 };
 
 struct mlx5e_hairpin_entry {
@@ -482,126 +477,101 @@ struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
 
 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
 {
-       u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {};
-       void *tirc;
+       struct mlx5e_tir_builder *builder;
        int err;
 
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
+               return -ENOMEM;
+
        err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
        if (err)
-               goto alloc_tdn_err;
-
-       tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-
-       MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
-       MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
-       MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
+               goto out;
 
-       err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn);
+       mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
+       err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
        if (err)
                goto create_tir_err;
 
-       return 0;
+out:
+       mlx5e_tir_builder_free(builder);
+       return err;
 
 create_tir_err:
        mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
-alloc_tdn_err:
-       return err;
+
+       goto out;
 }
 
 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
 {
-       mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
+       mlx5e_tir_destroy(&hp->direct_tir);
        mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
 }
 
-static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
-{
-       struct mlx5e_priv *priv = hp->func_priv;
-       int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
-       u32 *indirection_rqt, rqn;
-
-       indirection_rqt = kcalloc(sz, sizeof(*indirection_rqt), GFP_KERNEL);
-       if (!indirection_rqt)
-               return -ENOMEM;
-
-       mlx5e_build_default_indir_rqt(indirection_rqt, sz,
-                                     hp->num_channels);
-
-       for (i = 0; i < sz; i++) {
-               ix = i;
-               if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
-                       ix = mlx5e_bits_invert(i, ilog2(sz));
-               ix = indirection_rqt[ix];
-               rqn = hp->pair->rqn[ix];
-               MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
-       }
-
-       kfree(indirection_rqt);
-       return 0;
-}
-
 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
 {
-       int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
        struct mlx5e_priv *priv = hp->func_priv;
        struct mlx5_core_dev *mdev = priv->mdev;
-       void *rqtc;
-       u32 *in;
+       struct mlx5e_rss_params_indir *indir;
+       int err;
 
-       inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in)
+       indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
+       if (!indir)
                return -ENOMEM;
 
-       rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
+       mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
+       err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
+                                  mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
+                                  indir);
 
-       MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
-       MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
-
-       err = mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
-       if (err)
-               goto out;
-
-       err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
-       if (!err)
-               hp->indir_rqt.enabled = true;
-
-out:
-       kvfree(in);
+       kvfree(indir);
        return err;
 }
 
 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
 {
        struct mlx5e_priv *priv = hp->func_priv;
-       u32 in[MLX5_ST_SZ_DW(create_tir_in)];
-       int tt, i, err;
-       void *tirc;
+       struct mlx5e_rss_params_hash rss_hash;
+       enum mlx5_traffic_types tt, max_tt;
+       struct mlx5e_tir_builder *builder;
+       int err = 0;
+
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
+               return -ENOMEM;
+
+       rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
 
        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
+               struct mlx5e_rss_params_traffic_type rss_tt;
 
-               memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
-               tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+               rss_tt = mlx5e_rss_get_default_tt_config(tt);
 
-               MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
-               MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
-               MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
-               mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
+               mlx5e_tir_builder_build_rqt(builder, hp->tdn,
+                                           mlx5e_rqt_get_rqtn(&hp->indir_rqt),
+                                           false);
+               mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
 
-               err = mlx5_core_create_tir(hp->func_mdev, in,
-                                          &hp->indir_tirn[tt]);
+               err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
                if (err) {
                        mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
                        goto err_destroy_tirs;
                }
+
+               mlx5e_tir_builder_clear(builder);
        }
-       return 0;
 
-err_destroy_tirs:
-       for (i = 0; i < tt; i++)
-               mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
+out:
+       mlx5e_tir_builder_free(builder);
        return err;
+
+err_destroy_tirs:
+       max_tt = tt;
+       for (tt = 0; tt < max_tt; tt++)
+               mlx5e_tir_destroy(&hp->indir_tir[tt]);
+
+       goto out;
 }
 
 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
@@ -609,7 +579,7 @@ static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
        int tt;
 
        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
+               mlx5e_tir_destroy(&hp->indir_tir[tt]);
 }
 
 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
@@ -620,12 +590,16 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
 
        memset(ttc_params, 0, sizeof(*ttc_params));
 
-       ttc_params->any_tt_tirn = hp->tirn;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
+       ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
+                                                MLX5_FLOW_NAMESPACE_KERNEL);
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+               ttc_params->dests[tt].tir_num =
+                       tt == MLX5_TT_ANY ?
+                               mlx5e_tir_get_tirn(&hp->direct_tir) :
+                               mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
+       }
 
-       ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
        ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
        ft_attr->prio = MLX5E_TC_PRIO;
 }
@@ -645,30 +619,31 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
                goto err_create_indirect_tirs;
 
        mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
-       if (err)
+       hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+       if (IS_ERR(hp->ttc)) {
+               err = PTR_ERR(hp->ttc);
                goto err_create_ttc_table;
+       }
 
        netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
-                  hp->num_channels, hp->ttc.ft.t->id);
+                  hp->num_channels,
+                  mlx5_get_ttc_flow_table(priv->fs.ttc)->id);
 
        return 0;
 
 err_create_ttc_table:
        mlx5e_hairpin_destroy_indirect_tirs(hp);
 err_create_indirect_tirs:
-       mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+       mlx5e_rqt_destroy(&hp->indir_rqt);
 
        return err;
 }
 
 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
 {
-       struct mlx5e_priv *priv = hp->func_priv;
-
-       mlx5e_destroy_ttc_table(priv, &hp->ttc);
+       mlx5_destroy_ttc_table(hp->ttc);
        mlx5e_hairpin_destroy_indirect_tirs(hp);
-       mlx5e_destroy_rqt(priv, &hp->indir_rqt);
+       mlx5e_rqt_destroy(&hp->indir_rqt);
 }
 
 static struct mlx5e_hairpin *
@@ -903,16 +878,17 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
        }
 
        netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
-                  hp->tirn, hp->pair->rqn[0],
+                  mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
                   dev_name(hp->pair->peer_mdev->device),
                   hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
 
 attach_flow:
        if (hpe->hp->num_channels > 1) {
                flow_flag_set(flow, HAIRPIN_RSS);
-               flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
+               flow->attr->nic_attr->hairpin_ft =
+                       mlx5_get_ttc_flow_table(hpe->hp->ttc);
        } else {
-               flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn;
+               flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
        }
 
        flow->hpe = hpe;
@@ -1056,15 +1032,17 @@ err_ft_get:
 
 static int
 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
-                     struct mlx5e_tc_flow_parse_attr *parse_attr,
                      struct mlx5e_tc_flow *flow,
                      struct netlink_ext_ack *extack)
 {
+       struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct mlx5_flow_attr *attr = flow->attr;
        struct mlx5_core_dev *dev = priv->mdev;
-       struct mlx5_fc *counter = NULL;
+       struct mlx5_fc *counter;
        int err;
 
+       parse_attr = attr->parse_attr;
+
        if (flow_flag_test(flow, HAIRPIN)) {
                err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
                if (err)
@@ -1170,7 +1148,8 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
                                               mod_hdr_acts);
 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
        } else if (flow_flag_test(flow, SAMPLE)) {
-               rule = mlx5_esw_sample_offload(get_sample_priv(flow->priv), spec, attr);
+               rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr,
+                                              mlx5e_tc_get_flow_tun_id(flow));
 #endif
        } else {
                rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
@@ -1209,7 +1188,7 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
 
 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
        if (flow_flag_test(flow, SAMPLE)) {
-               mlx5_esw_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
+               mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
                return;
        }
 #endif
@@ -1402,9 +1381,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
        bool vf_tun = false, encap_valid = true;
        struct net_device *encap_dev = NULL;
        struct mlx5_esw_flow_attr *esw_attr;
-       struct mlx5_fc *counter = NULL;
        struct mlx5e_rep_priv *rpriv;
        struct mlx5e_priv *out_priv;
+       struct mlx5_fc *counter;
        u32 max_prio, max_chain;
        int err = 0;
        int out_index;
@@ -1591,6 +1570,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
                else
                        mlx5e_detach_mod_hdr(priv, flow);
        }
+       kfree(attr->sample_attr);
        kvfree(attr->parse_attr);
        kvfree(attr->esw_attr->rx_tun_attr);
 
@@ -1600,7 +1580,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
        if (flow_flag_test(flow, L3_TO_L2_DECAP))
                mlx5e_detach_decap(priv, flow);
 
-       kfree(flow->attr->esw_attr->sample);
        kfree(flow->attr);
 }
 
@@ -1665,17 +1644,22 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
        }
 }
 
-static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
+static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
 {
        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
        struct flow_action *flow_action = &rule->action;
        const struct flow_action_entry *act;
        int i;
 
+       if (chain)
+               return false;
+
        flow_action_for_each(i, act, flow_action) {
                switch (act->id) {
                case FLOW_ACTION_GOTO:
                        return true;
+               case FLOW_ACTION_SAMPLE:
+                       return true;
                default:
                        continue;
                }
@@ -1916,7 +1900,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
                return -EOPNOTSUPP;
 
        needs_mapping = !!flow->attr->chain;
-       sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f);
+       sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
        *match_inner = !needs_mapping;
 
        if ((needs_mapping || sets_mapping) &&
@@ -2489,7 +2473,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                        spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
                }
        }
-       /* Currenlty supported only for MPLS over UDP */
+       /* Currently supported only for MPLS over UDP */
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
            !netif_is_bareudp(filter_dev)) {
                NL_SET_ERR_MSG_MOD(extack,
@@ -2743,7 +2727,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
                if (s_mask && a_mask) {
                        NL_SET_ERR_MSG_MOD(extack,
                                           "can't set and add to the same HW field");
-                       printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
+                       netdev_warn(priv->netdev,
+                                   "mlx5: can't set and add to the same HW field (%x)\n",
+                                   f->field);
                        return -EOPNOTSUPP;
                }
 
@@ -2782,8 +2768,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
                if (first < next_z && next_z < last) {
                        NL_SET_ERR_MSG_MOD(extack,
                                           "rewrite of few sub-fields isn't supported");
-                       printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
-                              mask);
+                       netdev_warn(priv->netdev,
+                                   "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
+                                   mask);
                        return -EOPNOTSUPP;
                }
 
@@ -3370,10 +3357,10 @@ static int validate_goto_chain(struct mlx5e_priv *priv,
 
 static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                                struct flow_action *flow_action,
-                               struct mlx5e_tc_flow_parse_attr *parse_attr,
                                struct mlx5e_tc_flow *flow,
                                struct netlink_ext_ack *extack)
 {
+       struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct mlx5_flow_attr *attr = flow->attr;
        struct pedit_headers_action hdrs[2] = {};
        const struct flow_action_entry *act;
@@ -3389,8 +3376,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                return -EOPNOTSUPP;
 
        nic_attr = attr->nic_attr;
-
        nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+       parse_attr = attr->parse_attr;
 
        flow_action_for_each(i, act, flow_action) {
                switch (act->id) {
@@ -3399,10 +3386,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                                  MLX5_FLOW_CONTEXT_ACTION_COUNT;
                        break;
                case FLOW_ACTION_DROP:
-                       action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
-                       if (MLX5_CAP_FLOWTABLE(priv->mdev,
-                                              flow_table_properties_nic_receive.flow_counter))
-                               action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                       action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+                                 MLX5_FLOW_CONTEXT_ACTION_COUNT;
                        break;
                case FLOW_ACTION_MANGLE:
                case FLOW_ACTION_ADD:
@@ -3443,7 +3428,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                                                   "device is not on same HW, can't offload");
                                netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
                                            peer_dev->name);
-                               return -EINVAL;
+                               return -EOPNOTSUPP;
                        }
                        }
                        break;
@@ -3453,7 +3438,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                        if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
                                NL_SET_ERR_MSG_MOD(extack,
                                                   "Bad flow mark - only 16 bit is supported");
-                               return -EINVAL;
+                               return -EOPNOTSUPP;
                        }
 
                        nic_attr->flow_tag = mark;
@@ -3750,20 +3735,19 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv,
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                struct flow_action *flow_action,
                                struct mlx5e_tc_flow *flow,
-                               struct netlink_ext_ack *extack,
-                               struct net_device *filter_dev)
+                               struct netlink_ext_ack *extack)
 {
        struct pedit_headers_action hdrs[2] = {};
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
        struct mlx5e_rep_priv *rpriv = priv->ppriv;
+       struct mlx5e_sample_attr sample_attr = {};
        const struct ip_tunnel_info *info = NULL;
        struct mlx5_flow_attr *attr = flow->attr;
        int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
        bool ft_flow = mlx5e_is_ft_flow(flow);
        const struct flow_action_entry *act;
        struct mlx5_esw_flow_attr *esw_attr;
-       struct mlx5_sample_attr sample = {};
        bool encap = false, decap = false;
        u32 action = attr->action;
        int err, i, if_count = 0;
@@ -3816,7 +3800,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                                   "mpls pop supported only as first action");
                                return -EOPNOTSUPP;
                        }
-                       if (!netif_is_bareudp(filter_dev)) {
+                       if (!netif_is_bareudp(parse_attr->filter_dev)) {
                                NL_SET_ERR_MSG_MOD(extack,
                                                   "mpls pop supported only on bareudp devices");
                                return -EOPNOTSUPP;
@@ -3965,7 +3949,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                            "devices %s %s not on same switch HW, can't offload forwarding\n",
                                            priv->netdev->name,
                                            out_dev->name);
-                               return -EINVAL;
+                               return -EOPNOTSUPP;
                        }
                        }
                        break;
@@ -4034,10 +4018,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
                                return -EOPNOTSUPP;
                        }
-                       sample.rate = act->sample.rate;
-                       sample.group_num = act->sample.psample_group->group_num;
+                       sample_attr.rate = act->sample.rate;
+                       sample_attr.group_num = act->sample.psample_group->group_num;
                        if (act->sample.truncate)
-                               sample.trunc_size = act->sample.trunc_size;
+                               sample_attr.trunc_size = act->sample.trunc_size;
                        flow_flag_set(flow, SAMPLE);
                        break;
                default:
@@ -4122,10 +4106,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
         * no errors after parsing.
         */
        if (flow_flag_test(flow, SAMPLE)) {
-               esw_attr->sample = kzalloc(sizeof(*esw_attr->sample), GFP_KERNEL);
-               if (!esw_attr->sample)
+               attr->sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL);
+               if (!attr->sample_attr)
                        return -ENOMEM;
-               *esw_attr->sample = sample;
+               *attr->sample_attr = sample_attr;
        }
 
        return 0;
@@ -4318,7 +4302,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
        if (err)
                goto err_free;
 
-       err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
+       err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
        if (err)
                goto err_free;
 
@@ -4464,11 +4448,11 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
        if (err)
                goto err_free;
 
-       err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
+       err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
        if (err)
                goto err_free;
 
-       err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
+       err = mlx5e_tc_add_nic_flow(priv, flow, extack);
        if (err)
                goto err_free;
 
@@ -4723,7 +4707,7 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
                rate_mbps = max_t(u32, rate, 1);
        }
 
-       err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
+       err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
        if (err)
                NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
 
@@ -4895,6 +4879,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
        struct mlx5_core_dev *dev = priv->mdev;
        struct mapping_ctx *chains_mapping;
        struct mlx5_chains_attr attr = {};
+       u64 mapping_id;
        int err;
 
        mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
@@ -4908,8 +4893,12 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 
        lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
 
-       chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj),
-                                       MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+       mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+       chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+                                              sizeof(struct mlx5_mapped_obj),
+                                              MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+
        if (IS_ERR(chains_mapping)) {
                err = PTR_ERR(chains_mapping);
                goto err_mapping;
@@ -4931,8 +4920,9 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
                goto err_chains;
        }
 
+       tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
        tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
-                                MLX5_FLOW_NAMESPACE_KERNEL);
+                                MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
 
        tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
        err = register_netdevice_notifier_dev_net(priv->netdev,
@@ -4948,6 +4938,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 
 err_reg:
        mlx5_tc_ct_clean(tc->ct);
+       mlx5e_tc_post_act_destroy(tc->post_act);
        mlx5_chains_destroy(tc->chains);
 err_chains:
        mapping_destroy(chains_mapping);
@@ -4986,6 +4977,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
        mutex_destroy(&tc->t_lock);
 
        mlx5_tc_ct_clean(tc->ct);
+       mlx5e_tc_post_act_destroy(tc->post_act);
        mapping_destroy(tc->mapping);
        mlx5_chains_destroy(tc->chains);
 }
@@ -4998,6 +4990,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
        struct mapping_ctx *mapping;
        struct mlx5_eswitch *esw;
        struct mlx5e_priv *priv;
+       u64 mapping_id;
        int err = 0;
 
        uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
@@ -5005,17 +4998,24 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
        priv = netdev_priv(rpriv->netdev);
        esw = priv->mdev->priv.eswitch;
 
+       uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
+                                                      MLX5_FLOW_NAMESPACE_FDB);
        uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
                                               esw_chains(esw),
                                               &esw->offloads.mod_hdr,
-                                              MLX5_FLOW_NAMESPACE_FDB);
+                                              MLX5_FLOW_NAMESPACE_FDB,
+                                              uplink_priv->post_act);
 
 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
-       uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev));
+       uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
 #endif
 
-       mapping = mapping_create(sizeof(struct tunnel_match_key),
-                                TUNNEL_INFO_BITS_MASK, true);
+       mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+       mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
+                                       sizeof(struct tunnel_match_key),
+                                       TUNNEL_INFO_BITS_MASK, true);
+
        if (IS_ERR(mapping)) {
                err = PTR_ERR(mapping);
                goto err_tun_mapping;
@@ -5023,7 +5023,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
        uplink_priv->tunnel_mapping = mapping;
 
        /* 0xFFF is reserved for stack devices slow path table mark */
-       mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
+       mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
+                                       sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
        if (IS_ERR(mapping)) {
                err = PTR_ERR(mapping);
                goto err_enc_opts_mapping;
@@ -5052,11 +5053,12 @@ err_enc_opts_mapping:
        mapping_destroy(uplink_priv->tunnel_mapping);
 err_tun_mapping:
 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
-       mlx5_esw_sample_cleanup(uplink_priv->esw_psample);
+       mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
 #endif
        mlx5_tc_ct_clean(uplink_priv->ct_priv);
        netdev_warn(priv->netdev,
                    "Failed to initialize tc (eswitch), err: %d", err);
+       mlx5e_tc_post_act_destroy(uplink_priv->post_act);
        return err;
 }
 
@@ -5073,9 +5075,10 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
        mapping_destroy(uplink_priv->tunnel_mapping);
 
 #if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
-       mlx5_esw_sample_cleanup(uplink_priv->esw_psample);
+       mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
 #endif
        mlx5_tc_ct_clean(uplink_priv->ct_priv);
+       mlx5e_tc_post_act_destroy(uplink_priv->post_act);
 }
 
 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
index f7cbeb0..1a4cd88 100644 (file)
@@ -70,6 +70,7 @@ struct mlx5_flow_attr {
        struct mlx5_fc *counter;
        struct mlx5_modify_hdr *modify_hdr;
        struct mlx5_ct_attr ct_attr;
+       struct mlx5e_sample_attr *sample_attr;
        struct mlx5e_tc_flow_parse_attr *parse_attr;
        u32 chain;
        u16 prio;
index 505bf81..2e504c7 100644 (file)
@@ -15,6 +15,15 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport)
        vport->egress.offloads.fwd_rule = NULL;
 }
 
+static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport)
+{
+       if (!vport->egress.offloads.bounce_rule)
+               return;
+
+       mlx5_del_flow_rules(vport->egress.offloads.bounce_rule);
+       vport->egress.offloads.bounce_rule = NULL;
+}
+
 static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
                                                struct mlx5_vport *vport,
                                                struct mlx5_flow_destination *fwd_dest)
@@ -87,6 +96,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
 {
        esw_acl_egress_vlan_destroy(vport);
        esw_acl_egress_ofld_fwd2vport_destroy(vport);
+       esw_acl_egress_ofld_bounce_rule_destroy(vport);
 }
 
 static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
@@ -145,6 +155,12 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
                mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp);
                vport->egress.offloads.fwd_grp = NULL;
        }
+
+       if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) {
+               mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
+               vport->egress.offloads.bounce_grp = NULL;
+       }
+
        esw_acl_egress_vlan_grp_destroy(vport);
 }
 
index 69a3630..7e22103 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/notifier.h>
 #include <net/netevent.h>
 #include <net/switchdev.h>
+#include "lib/devcom.h"
 #include "bridge.h"
 #include "eswitch.h"
 #include "bridge_priv.h"
@@ -56,7 +57,6 @@ struct mlx5_esw_bridge {
 
        struct list_head fdb_list;
        struct rhashtable fdb_ht;
-       struct xarray vports;
 
        struct mlx5_flow_table *egress_ft;
        struct mlx5_flow_group *egress_vlan_fg;
@@ -77,6 +77,15 @@ mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *
        call_switchdev_notifiers(val, dev, &send_info.info, NULL);
 }
 
+static void
+mlx5_esw_bridge_fdb_del_notify(struct mlx5_esw_bridge_fdb_entry *entry)
+{
+       if (!(entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | MLX5_ESW_BRIDGE_FLAG_PEER)))
+               mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
+                                                  entry->key.vid,
+                                                  SWITCHDEV_FDB_DEL_TO_BRIDGE);
+}
+
 static struct mlx5_flow_table *
 mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
 {
@@ -400,9 +409,10 @@ mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge)
 }
 
 static struct mlx5_flow_handle *
-mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
-                                   struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
-                                   struct mlx5_esw_bridge *bridge)
+mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char *addr,
+                                            struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+                                            struct mlx5_esw_bridge *bridge,
+                                            struct mlx5_eswitch *esw)
 {
        struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
        struct mlx5_flow_act flow_act = {
@@ -430,7 +440,7 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
        MLX5_SET(fte_match_param, rule_spec->match_criteria,
                 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
        MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
-                mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num));
+                mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
 
        if (vlan && vlan->pkt_reformat_push) {
                flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
@@ -458,6 +468,35 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
        return handle;
 }
 
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr,
+                                   struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+                                   struct mlx5_esw_bridge *bridge)
+{
+       return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id,
+                                                           bridge, bridge->br_offloads->esw);
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, const unsigned char *addr,
+                                        struct mlx5_esw_bridge_vlan *vlan, u32 counter_id,
+                                        struct mlx5_esw_bridge *bridge)
+{
+       struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom;
+       static struct mlx5_flow_handle *handle;
+       struct mlx5_eswitch *peer_esw;
+
+       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       if (!peer_esw)
+               return ERR_PTR(-ENODEV);
+
+       handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id,
+                                                             bridge, peer_esw);
+
+       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       return handle;
+}
+
 static struct mlx5_flow_handle *
 mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *addr,
                                           struct mlx5_esw_bridge *bridge)
@@ -505,7 +544,7 @@ mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *a
 }
 
 static struct mlx5_flow_handle *
-mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr,
+mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const unsigned char *addr,
                                   struct mlx5_esw_bridge_vlan *vlan,
                                   struct mlx5_esw_bridge *bridge)
 {
@@ -550,6 +589,10 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr,
                         vlan->vid);
        }
 
+       if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) {
+               dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+               dest.vport.vhca_id = esw_owner_vhca_id;
+       }
        handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1);
 
        kvfree(rule_spec);
@@ -576,7 +619,6 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex,
                goto err_fdb_ht;
 
        INIT_LIST_HEAD(&bridge->fdb_list);
-       xa_init(&bridge->vports);
        bridge->ifindex = ifindex;
        bridge->refcnt = 1;
        bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME);
@@ -603,7 +645,6 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads,
                return;
 
        mlx5_esw_bridge_egress_table_cleanup(bridge);
-       WARN_ON(!xa_empty(&bridge->vports));
        list_del(&bridge->list);
        rhashtable_destroy(&bridge->fdb_ht);
        kvfree(bridge);
@@ -639,30 +680,40 @@ mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads
        return bridge;
 }
 
+static unsigned long mlx5_esw_bridge_port_key_from_data(u16 vport_num, u16 esw_owner_vhca_id)
+{
+       return vport_num | (unsigned long)esw_owner_vhca_id << sizeof(vport_num) * BITS_PER_BYTE;
+}
+
+static unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port)
+{
+       return mlx5_esw_bridge_port_key_from_data(port->vport_num, port->esw_owner_vhca_id);
+}
+
 static int mlx5_esw_bridge_port_insert(struct mlx5_esw_bridge_port *port,
-                                      struct mlx5_esw_bridge *bridge)
+                                      struct mlx5_esw_bridge_offloads *br_offloads)
 {
-       return xa_insert(&bridge->vports, port->vport_num, port, GFP_KERNEL);
+       return xa_insert(&br_offloads->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL);
 }
 
 static struct mlx5_esw_bridge_port *
-mlx5_esw_bridge_port_lookup(u16 vport_num, struct mlx5_esw_bridge *bridge)
+mlx5_esw_bridge_port_lookup(u16 vport_num, u16 esw_owner_vhca_id,
+                           struct mlx5_esw_bridge_offloads *br_offloads)
 {
-       return xa_load(&bridge->vports, vport_num);
+       return xa_load(&br_offloads->ports, mlx5_esw_bridge_port_key_from_data(vport_num,
+                                                                              esw_owner_vhca_id));
 }
 
 static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port,
-                                      struct mlx5_esw_bridge *bridge)
+                                      struct mlx5_esw_bridge_offloads *br_offloads)
 {
-       xa_erase(&bridge->vports, port->vport_num);
+       xa_erase(&br_offloads->ports, mlx5_esw_bridge_port_key(port));
 }
 
-static void mlx5_esw_bridge_fdb_entry_refresh(unsigned long lastuse,
-                                             struct mlx5_esw_bridge_fdb_entry *entry)
+static void mlx5_esw_bridge_fdb_entry_refresh(struct mlx5_esw_bridge_fdb_entry *entry)
 {
        trace_mlx5_esw_bridge_fdb_entry_refresh(entry);
 
-       entry->lastuse = lastuse;
        mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
                                           entry->key.vid,
                                           SWITCHDEV_FDB_ADD_TO_BRIDGE);
@@ -690,10 +741,7 @@ static void mlx5_esw_bridge_fdb_flush(struct mlx5_esw_bridge *bridge)
        struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
 
        list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) {
-               if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER))
-                       mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
-                                                          entry->key.vid,
-                                                          SWITCHDEV_FDB_DEL_TO_BRIDGE);
+               mlx5_esw_bridge_fdb_del_notify(entry);
                mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
        }
 }
@@ -841,10 +889,7 @@ static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan,
        struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
 
        list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list) {
-               if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER))
-                       mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
-                                                          entry->key.vid,
-                                                          SWITCHDEV_FDB_DEL_TO_BRIDGE);
+               mlx5_esw_bridge_fdb_del_notify(entry);
                mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
        }
 
@@ -875,13 +920,13 @@ static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port,
 }
 
 static struct mlx5_esw_bridge_vlan *
-mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, struct mlx5_esw_bridge *bridge,
-                                struct mlx5_eswitch *esw)
+mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, u16 esw_owner_vhca_id,
+                                struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw)
 {
        struct mlx5_esw_bridge_port *port;
        struct mlx5_esw_bridge_vlan *vlan;
 
-       port = mlx5_esw_bridge_port_lookup(vport_num, bridge);
+       port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, bridge->br_offloads);
        if (!port) {
                /* FDB is added asynchronously on wq while port might have been deleted
                 * concurrently. Report on 'info' logging level and skip the FDB offload.
@@ -904,24 +949,23 @@ mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, struct mlx5_esw_bridge
 }
 
 static struct mlx5_esw_bridge_fdb_entry *
-mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsigned char *addr,
-                              u16 vid, bool added_by_user, struct mlx5_eswitch *esw,
-                              struct mlx5_esw_bridge *bridge)
+mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+                              const unsigned char *addr, u16 vid, bool added_by_user, bool peer,
+                              struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge)
 {
        struct mlx5_esw_bridge_vlan *vlan = NULL;
        struct mlx5_esw_bridge_fdb_entry *entry;
        struct mlx5_flow_handle *handle;
        struct mlx5_fc *counter;
-       struct mlx5e_priv *priv;
        int err;
 
        if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) {
-               vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, bridge, esw);
+               vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, esw_owner_vhca_id, bridge,
+                                                       esw);
                if (IS_ERR(vlan))
                        return ERR_CAST(vlan);
        }
 
-       priv = netdev_priv(dev);
        entry = kvzalloc(sizeof(*entry), GFP_KERNEL);
        if (!entry)
                return ERR_PTR(-ENOMEM);
@@ -930,19 +974,25 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi
        entry->key.vid = vid;
        entry->dev = dev;
        entry->vport_num = vport_num;
+       entry->esw_owner_vhca_id = esw_owner_vhca_id;
        entry->lastuse = jiffies;
        if (added_by_user)
                entry->flags |= MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER;
+       if (peer)
+               entry->flags |= MLX5_ESW_BRIDGE_FLAG_PEER;
 
-       counter = mlx5_fc_create(priv->mdev, true);
+       counter = mlx5_fc_create(esw->dev, true);
        if (IS_ERR(counter)) {
                err = PTR_ERR(counter);
                goto err_ingress_fc_create;
        }
        entry->ingress_counter = counter;
 
-       handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, mlx5_fc_id(counter),
-                                                    bridge);
+       handle = peer ?
+               mlx5_esw_bridge_ingress_flow_peer_create(vport_num, addr, vlan,
+                                                        mlx5_fc_id(counter), bridge) :
+               mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan,
+                                                   mlx5_fc_id(counter), bridge);
        if (IS_ERR(handle)) {
                err = PTR_ERR(handle);
                esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d)\n",
@@ -962,7 +1012,8 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi
                entry->filter_handle = handle;
        }
 
-       handle = mlx5_esw_bridge_egress_flow_create(vport_num, addr, vlan, bridge);
+       handle = mlx5_esw_bridge_egress_flow_create(vport_num, esw_owner_vhca_id, addr, vlan,
+                                                   bridge);
        if (IS_ERR(handle)) {
                err = PTR_ERR(handle);
                esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n",
@@ -994,32 +1045,37 @@ err_egress_flow_create:
 err_ingress_filter_flow_create:
        mlx5_del_flow_rules(entry->ingress_handle);
 err_ingress_flow_create:
-       mlx5_fc_destroy(priv->mdev, entry->ingress_counter);
+       mlx5_fc_destroy(esw->dev, entry->ingress_counter);
 err_ingress_fc_create:
        kvfree(entry);
        return ERR_PTR(err);
 }
 
-int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswitch *esw,
-                                   struct mlx5_vport *vport)
+int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time,
+                                   struct mlx5_esw_bridge_offloads *br_offloads)
 {
-       if (!vport->bridge)
+       struct mlx5_esw_bridge_port *port;
+
+       port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+       if (!port)
                return -EINVAL;
 
-       vport->bridge->ageing_time = clock_t_to_jiffies(ageing_time);
+       port->bridge->ageing_time = clock_t_to_jiffies(ageing_time);
        return 0;
 }
 
-int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw,
-                                      struct mlx5_vport *vport)
+int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+                                      struct mlx5_esw_bridge_offloads *br_offloads)
 {
+       struct mlx5_esw_bridge_port *port;
        struct mlx5_esw_bridge *bridge;
        bool filtering;
 
-       if (!vport->bridge)
+       port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+       if (!port)
                return -EINVAL;
 
-       bridge = vport->bridge;
+       bridge = port->bridge;
        filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG;
        if (filtering == enable)
                return 0;
@@ -1033,114 +1089,143 @@ int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw,
        return 0;
 }
 
-static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge_offloads *br_offloads,
-                                     struct mlx5_esw_bridge *bridge,
-                                     struct mlx5_vport *vport)
+static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags,
+                                     struct mlx5_esw_bridge_offloads *br_offloads,
+                                     struct mlx5_esw_bridge *bridge)
 {
        struct mlx5_eswitch *esw = br_offloads->esw;
        struct mlx5_esw_bridge_port *port;
        int err;
 
        port = kvzalloc(sizeof(*port), GFP_KERNEL);
-       if (!port) {
-               err = -ENOMEM;
-               goto err_port_alloc;
-       }
+       if (!port)
+               return -ENOMEM;
 
-       port->vport_num = vport->vport;
+       port->vport_num = vport_num;
+       port->esw_owner_vhca_id = esw_owner_vhca_id;
+       port->bridge = bridge;
+       port->flags |= flags;
        xa_init(&port->vlans);
-       err = mlx5_esw_bridge_port_insert(port, bridge);
+       err = mlx5_esw_bridge_port_insert(port, br_offloads);
        if (err) {
-               esw_warn(esw->dev, "Failed to insert port metadata (vport=%u,err=%d)\n",
-                        vport->vport, err);
+               esw_warn(esw->dev,
+                        "Failed to insert port metadata (vport=%u,esw_owner_vhca_id=%u,err=%d)\n",
+                        port->vport_num, port->esw_owner_vhca_id, err);
                goto err_port_insert;
        }
        trace_mlx5_esw_bridge_vport_init(port);
 
-       vport->bridge = bridge;
        return 0;
 
 err_port_insert:
        kvfree(port);
-err_port_alloc:
-       mlx5_esw_bridge_put(br_offloads, bridge);
        return err;
 }
 
 static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads,
-                                        struct mlx5_vport *vport)
+                                        struct mlx5_esw_bridge_port *port)
 {
-       struct mlx5_esw_bridge *bridge = vport->bridge;
+       u16 vport_num = port->vport_num, esw_owner_vhca_id = port->esw_owner_vhca_id;
+       struct mlx5_esw_bridge *bridge = port->bridge;
        struct mlx5_esw_bridge_fdb_entry *entry, *tmp;
-       struct mlx5_esw_bridge_port *port;
 
        list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list)
-               if (entry->vport_num == vport->vport)
+               if (entry->vport_num == vport_num && entry->esw_owner_vhca_id == esw_owner_vhca_id)
                        mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
 
-       port = mlx5_esw_bridge_port_lookup(vport->vport, bridge);
-       if (!port) {
-               WARN(1, "Vport %u metadata not found on bridge", vport->vport);
-               return -EINVAL;
-       }
-
        trace_mlx5_esw_bridge_vport_cleanup(port);
        mlx5_esw_bridge_port_vlans_flush(port, bridge);
-       mlx5_esw_bridge_port_erase(port, bridge);
+       mlx5_esw_bridge_port_erase(port, br_offloads);
        kvfree(port);
        mlx5_esw_bridge_put(br_offloads, bridge);
-       vport->bridge = NULL;
        return 0;
 }
 
-int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
-                              struct mlx5_vport *vport, struct netlink_ext_ack *extack)
+static int mlx5_esw_bridge_vport_link_with_flags(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+                                                u16 flags,
+                                                struct mlx5_esw_bridge_offloads *br_offloads,
+                                                struct netlink_ext_ack *extack)
 {
        struct mlx5_esw_bridge *bridge;
        int err;
 
-       WARN_ON(vport->bridge);
-
        bridge = mlx5_esw_bridge_lookup(ifindex, br_offloads);
        if (IS_ERR(bridge)) {
                NL_SET_ERR_MSG_MOD(extack, "Error checking for existing bridge with same ifindex");
                return PTR_ERR(bridge);
        }
 
-       err = mlx5_esw_bridge_vport_init(br_offloads, bridge, vport);
-       if (err)
+       err = mlx5_esw_bridge_vport_init(vport_num, esw_owner_vhca_id, flags, br_offloads, bridge);
+       if (err) {
                NL_SET_ERR_MSG_MOD(extack, "Error initializing port");
+               goto err_vport;
+       }
+       return 0;
+
+err_vport:
+       mlx5_esw_bridge_put(br_offloads, bridge);
        return err;
 }
 
-int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
-                                struct mlx5_vport *vport, struct netlink_ext_ack *extack)
+int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+                              struct mlx5_esw_bridge_offloads *br_offloads,
+                              struct netlink_ext_ack *extack)
 {
-       struct mlx5_esw_bridge *bridge = vport->bridge;
+       return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, 0,
+                                                    br_offloads, extack);
+}
+
+int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+                                struct mlx5_esw_bridge_offloads *br_offloads,
+                                struct netlink_ext_ack *extack)
+{
+       struct mlx5_esw_bridge_port *port;
        int err;
 
-       if (!bridge) {
+       port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+       if (!port) {
                NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge");
                return -EINVAL;
        }
-       if (bridge->ifindex != ifindex) {
+       if (port->bridge->ifindex != ifindex) {
                NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge");
                return -EINVAL;
        }
 
-       err = mlx5_esw_bridge_vport_cleanup(br_offloads, vport);
+       err = mlx5_esw_bridge_vport_cleanup(br_offloads, port);
        if (err)
                NL_SET_ERR_MSG_MOD(extack, "Port cleanup failed");
        return err;
 }
 
-int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw,
-                                 struct mlx5_vport *vport, struct netlink_ext_ack *extack)
+int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+                                   struct mlx5_esw_bridge_offloads *br_offloads,
+                                   struct netlink_ext_ack *extack)
+{
+       if (!MLX5_CAP_ESW(br_offloads->esw->dev, merged_eswitch))
+               return 0;
+
+       return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id,
+                                                    MLX5_ESW_BRIDGE_PORT_FLAG_PEER,
+                                                    br_offloads, extack);
+}
+
+int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+                                     struct mlx5_esw_bridge_offloads *br_offloads,
+                                     struct netlink_ext_ack *extack)
+{
+       return mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, br_offloads,
+                                           extack);
+}
+
+int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags,
+                                 struct mlx5_esw_bridge_offloads *br_offloads,
+                                 struct netlink_ext_ack *extack)
 {
        struct mlx5_esw_bridge_port *port;
        struct mlx5_esw_bridge_vlan *vlan;
 
-       port = mlx5_esw_bridge_port_lookup(vport->vport, vport->bridge);
+       port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
        if (!port)
                return -EINVAL;
 
@@ -1148,10 +1233,10 @@ int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw,
        if (vlan) {
                if (vlan->flags == flags)
                        return 0;
-               mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge);
+               mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge);
        }
 
-       vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, esw);
+       vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, br_offloads->esw);
        if (IS_ERR(vlan)) {
                NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry");
                return PTR_ERR(vlan);
@@ -1159,62 +1244,93 @@ int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw,
        return 0;
 }
 
-void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid,
+                                  struct mlx5_esw_bridge_offloads *br_offloads)
 {
        struct mlx5_esw_bridge_port *port;
        struct mlx5_esw_bridge_vlan *vlan;
 
-       port = mlx5_esw_bridge_port_lookup(vport->vport, vport->bridge);
+       port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
        if (!port)
                return;
 
        vlan = mlx5_esw_bridge_vlan_lookup(vid, port);
        if (!vlan)
                return;
-       mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge);
+       mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge);
 }
 
-void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw,
-                               struct mlx5_vport *vport,
-                               struct switchdev_notifier_fdb_info *fdb_info)
+void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+                                    struct mlx5_esw_bridge_offloads *br_offloads,
+                                    struct switchdev_notifier_fdb_info *fdb_info)
 {
-       struct mlx5_esw_bridge *bridge = vport->bridge;
        struct mlx5_esw_bridge_fdb_entry *entry;
-       u16 vport_num = vport->vport;
+       struct mlx5_esw_bridge_fdb_key key;
+       struct mlx5_esw_bridge_port *port;
+       struct mlx5_esw_bridge *bridge;
 
-       if (!bridge) {
-               esw_info(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num);
+       port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+       if (!port || port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER)
+               return;
+
+       bridge = port->bridge;
+       ether_addr_copy(key.addr, fdb_info->addr);
+       key.vid = fdb_info->vid;
+       entry = rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params);
+       if (!entry) {
+               esw_debug(br_offloads->esw->dev,
+                         "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n",
+                         key.addr, key.vid, vport_num);
                return;
        }
 
-       entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, fdb_info->addr, fdb_info->vid,
-                                              fdb_info->added_by_user, esw, bridge);
+       entry->lastuse = jiffies;
+}
+
+void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+                               struct mlx5_esw_bridge_offloads *br_offloads,
+                               struct switchdev_notifier_fdb_info *fdb_info)
+{
+       struct mlx5_esw_bridge_fdb_entry *entry;
+       struct mlx5_esw_bridge_port *port;
+       struct mlx5_esw_bridge *bridge;
+
+       port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+       if (!port)
+               return;
+
+       bridge = port->bridge;
+       entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, esw_owner_vhca_id, fdb_info->addr,
+                                              fdb_info->vid, fdb_info->added_by_user,
+                                              port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER,
+                                              br_offloads->esw, bridge);
        if (IS_ERR(entry))
                return;
 
        if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)
                mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
                                                   SWITCHDEV_FDB_OFFLOADED);
-       else
+       else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER))
                /* Take over dynamic entries to prevent kernel bridge from aging them out. */
                mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
                                                   SWITCHDEV_FDB_ADD_TO_BRIDGE);
 }
 
-void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw,
-                               struct mlx5_vport *vport,
+void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+                               struct mlx5_esw_bridge_offloads *br_offloads,
                                struct switchdev_notifier_fdb_info *fdb_info)
 {
-       struct mlx5_esw_bridge *bridge = vport->bridge;
+       struct mlx5_eswitch *esw = br_offloads->esw;
        struct mlx5_esw_bridge_fdb_entry *entry;
        struct mlx5_esw_bridge_fdb_key key;
-       u16 vport_num = vport->vport;
+       struct mlx5_esw_bridge_port *port;
+       struct mlx5_esw_bridge *bridge;
 
-       if (!bridge) {
-               esw_warn(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num);
+       port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads);
+       if (!port)
                return;
-       }
 
+       bridge = port->bridge;
        ether_addr_copy(key.addr, fdb_info->addr);
        key.vid = fdb_info->vid;
        entry = rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params);
@@ -1225,9 +1341,7 @@ void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw
                return;
        }
 
-       if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER))
-               mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid,
-                                                  SWITCHDEV_FDB_DEL_TO_BRIDGE);
+       mlx5_esw_bridge_fdb_del_notify(entry);
        mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
 }
 
@@ -1245,11 +1359,10 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
                                continue;
 
                        if (time_after(lastuse, entry->lastuse)) {
-                               mlx5_esw_bridge_fdb_entry_refresh(lastuse, entry);
-                       } else if (time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) {
-                               mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr,
-                                                                  entry->key.vid,
-                                                                  SWITCHDEV_FDB_DEL_TO_BRIDGE);
+                               mlx5_esw_bridge_fdb_entry_refresh(entry);
+                       } else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER) &&
+                                  time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) {
+                               mlx5_esw_bridge_fdb_del_notify(entry);
                                mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge);
                        }
                }
@@ -1258,13 +1371,11 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads)
 
 static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads)
 {
-       struct mlx5_eswitch *esw = br_offloads->esw;
-       struct mlx5_vport *vport;
+       struct mlx5_esw_bridge_port *port;
        unsigned long i;
 
-       mlx5_esw_for_each_vport(esw, i, vport)
-               if (vport->bridge)
-                       mlx5_esw_bridge_vport_cleanup(br_offloads, vport);
+       xa_for_each(&br_offloads->ports, i, port)
+               mlx5_esw_bridge_vport_cleanup(br_offloads, port);
 
        WARN_ONCE(!list_empty(&br_offloads->bridges),
                  "Cleaning up bridge offloads while still having bridges attached\n");
@@ -1279,6 +1390,7 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw)
                return ERR_PTR(-ENOMEM);
 
        INIT_LIST_HEAD(&br_offloads->bridges);
+       xa_init(&br_offloads->ports);
        br_offloads->esw = esw;
        esw->br_offloads = br_offloads;
 
@@ -1293,6 +1405,7 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw)
                return;
 
        mlx5_esw_bridge_flush(br_offloads);
+       WARN_ON(!xa_empty(&br_offloads->ports));
 
        esw->br_offloads = NULL;
        kvfree(br_offloads);
index d826942..efc3997 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/notifier.h>
 #include <linux/list.h>
 #include <linux/workqueue.h>
+#include <linux/xarray.h>
 #include "eswitch.h"
 
 struct mlx5_flow_table;
@@ -15,6 +16,8 @@ struct mlx5_flow_group;
 struct mlx5_esw_bridge_offloads {
        struct mlx5_eswitch *esw;
        struct list_head bridges;
+       struct xarray ports;
+
        struct notifier_block netdev_nb;
        struct notifier_block nb_blk;
        struct notifier_block nb;
@@ -31,23 +34,36 @@ struct mlx5_esw_bridge_offloads {
 
 struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw);
 void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw);
-int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
-                              struct mlx5_vport *vport, struct netlink_ext_ack *extack);
-int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads,
-                                struct mlx5_vport *vport, struct netlink_ext_ack *extack);
-void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw,
-                               struct mlx5_vport *vport,
+int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+                              struct mlx5_esw_bridge_offloads *br_offloads,
+                              struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+                                struct mlx5_esw_bridge_offloads *br_offloads,
+                                struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+                                   struct mlx5_esw_bridge_offloads *br_offloads,
+                                   struct netlink_ext_ack *extack);
+int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id,
+                                     struct mlx5_esw_bridge_offloads *br_offloads,
+                                     struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+                                    struct mlx5_esw_bridge_offloads *br_offloads,
+                                    struct switchdev_notifier_fdb_info *fdb_info);
+void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+                               struct mlx5_esw_bridge_offloads *br_offloads,
                                struct switchdev_notifier_fdb_info *fdb_info);
-void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw,
-                               struct mlx5_vport *vport,
+void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id,
+                               struct mlx5_esw_bridge_offloads *br_offloads,
                                struct switchdev_notifier_fdb_info *fdb_info);
 void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads);
-int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswitch *esw,
-                                   struct mlx5_vport *vport);
-int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw,
-                                      struct mlx5_vport *vport);
-int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw,
-                                 struct mlx5_vport *vport, struct netlink_ext_ack *extack);
-void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time,
+                                   struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable,
+                                      struct mlx5_esw_bridge_offloads *br_offloads);
+int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags,
+                                 struct mlx5_esw_bridge_offloads *br_offloads,
+                                 struct netlink_ext_ack *extack);
+void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid,
+                                  struct mlx5_esw_bridge_offloads *br_offloads);
 
 #endif /* __MLX5_ESW_BRIDGE_H__ */
index d9ab2e8..52964a8 100644 (file)
@@ -19,6 +19,11 @@ struct mlx5_esw_bridge_fdb_key {
 
 enum {
        MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0),
+       MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1),
+};
+
+enum {
+       MLX5_ESW_BRIDGE_PORT_FLAG_PEER = BIT(0),
 };
 
 struct mlx5_esw_bridge_fdb_entry {
@@ -28,6 +33,7 @@ struct mlx5_esw_bridge_fdb_entry {
        struct list_head list;
        struct list_head vlan_list;
        u16 vport_num;
+       u16 esw_owner_vhca_id;
        u16 flags;
 
        struct mlx5_flow_handle *ingress_handle;
@@ -47,6 +53,9 @@ struct mlx5_esw_bridge_vlan {
 
 struct mlx5_esw_bridge_port {
        u16 vport_num;
+       u16 esw_owner_vhca_id;
+       u16 flags;
+       struct mlx5_esw_bridge *bridge;
        struct xarray vlans;
 };
 
index 1703384..20af557 100644 (file)
@@ -91,9 +91,15 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
        if (err)
                goto reg_err;
 
+       err = devlink_rate_leaf_create(dl_port, vport);
+       if (err)
+               goto rate_err;
+
        vport->dl_port = dl_port;
        return 0;
 
+rate_err:
+       devlink_port_unregister(dl_port);
 reg_err:
        mlx5_esw_dl_port_free(dl_port);
        return err;
@@ -109,6 +115,12 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
        vport = mlx5_eswitch_get_vport(esw, vport_num);
        if (IS_ERR(vport))
                return;
+
+       if (vport->dl_port->devlink_rate) {
+               mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
+               devlink_rate_leaf_destroy(vport->dl_port);
+       }
+
        devlink_port_unregister(vport->dl_port);
        mlx5_esw_dl_port_free(vport->dl_port);
        vport->dl_port = NULL;
@@ -148,8 +160,16 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p
        if (err)
                return err;
 
+       err = devlink_rate_leaf_create(dl_port, vport);
+       if (err)
+               goto rate_err;
+
        vport->dl_port = dl_port;
        return 0;
+
+rate_err:
+       devlink_port_unregister(dl_port);
+       return err;
 }
 
 void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num)
@@ -159,6 +179,12 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num
        vport = mlx5_eswitch_get_vport(esw, vport_num);
        if (IS_ERR(vport))
                return;
+
+       if (vport->dl_port->devlink_rate) {
+               mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
+               devlink_rate_leaf_destroy(vport->dl_port);
+       }
+
        devlink_port_unregister(vport->dl_port);
        vport->dl_port = NULL;
 }
index 227964b..3401188 100644 (file)
@@ -85,11 +85,18 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_port_template,
                    TP_ARGS(port),
                    TP_STRUCT__entry(
                            __field(u16, vport_num)
+                           __field(u16, esw_owner_vhca_id)
+                           __field(u16, flags)
                            ),
                    TP_fast_assign(
                            __entry->vport_num = port->vport_num;
+                           __entry->esw_owner_vhca_id = port->esw_owner_vhca_id;
+                           __entry->flags = port->flags;
                            ),
-                   TP_printk("vport_num=%hu", __entry->vport_num)
+                   TP_printk("vport_num=%hu esw_owner_vhca_id=%hu flags=%hx",
+                             __entry->vport_num,
+                             __entry->esw_owner_vhca_id,
+                             __entry->flags)
        );
 
 DEFINE_EVENT(mlx5_esw_bridge_port_template,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h
new file mode 100644 (file)
index 0000000..458baf0
--- /dev/null
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_ESW_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_ESW_TP_
+
+#include <linux/tracepoint.h>
+#include "eswitch.h"
+
+TRACE_EVENT(mlx5_esw_vport_qos_destroy,
+           TP_PROTO(const struct mlx5_vport *vport),
+           TP_ARGS(vport),
+           TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device))
+                            __field(unsigned short, vport_id)
+                            __field(unsigned int,   tsar_ix)
+                            ),
+           TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device));
+                   __entry->vport_id = vport->vport;
+                   __entry->tsar_ix = vport->qos.esw_tsar_ix;
+           ),
+           TP_printk("(%s) vport=%hu tsar_ix=%u\n",
+                     __get_str(devname), __entry->vport_id, __entry->tsar_ix
+                     )
+);
+
+DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template,
+                   TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+                   TP_ARGS(vport, bw_share, max_rate),
+                   TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device))
+                                    __field(unsigned short, vport_id)
+                                    __field(unsigned int, tsar_ix)
+                                    __field(unsigned int, bw_share)
+                                    __field(unsigned int, max_rate)
+                                    __field(void *, group)
+                                    ),
+                   TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device));
+                           __entry->vport_id = vport->vport;
+                           __entry->tsar_ix = vport->qos.esw_tsar_ix;
+                           __entry->bw_share = bw_share;
+                           __entry->max_rate = max_rate;
+                           __entry->group = vport->qos.group;
+                   ),
+                   TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n",
+                             __get_str(devname), __entry->vport_id, __entry->tsar_ix,
+                             __entry->bw_share, __entry->max_rate, __entry->group
+                             )
+);
+
+DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create,
+            TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+            TP_ARGS(vport, bw_share, max_rate)
+            );
+
+DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config,
+            TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+            TP_ARGS(vport, bw_share, max_rate)
+            );
+
+DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template,
+                   TP_PROTO(const struct mlx5_core_dev *dev,
+                            const struct mlx5_esw_rate_group *group,
+                            unsigned int tsar_ix),
+                   TP_ARGS(dev, group, tsar_ix),
+                   TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+                                    __field(const void *, group)
+                                    __field(unsigned int, tsar_ix)
+                                    ),
+                   TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+                           __entry->group = group;
+                           __entry->tsar_ix = tsar_ix;
+                   ),
+                   TP_printk("(%s) group=%p tsar_ix=%u\n",
+                             __get_str(devname), __entry->group, __entry->tsar_ix
+                             )
+);
+
+DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_create,
+            TP_PROTO(const struct mlx5_core_dev *dev,
+                     const struct mlx5_esw_rate_group *group,
+                     unsigned int tsar_ix),
+            TP_ARGS(dev, group, tsar_ix)
+            );
+
+DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_destroy,
+            TP_PROTO(const struct mlx5_core_dev *dev,
+                     const struct mlx5_esw_rate_group *group,
+                     unsigned int tsar_ix),
+            TP_ARGS(dev, group, tsar_ix)
+            );
+
+TRACE_EVENT(mlx5_esw_group_qos_config,
+           TP_PROTO(const struct mlx5_core_dev *dev,
+                    const struct mlx5_esw_rate_group *group,
+                    unsigned int tsar_ix, u32 bw_share, u32 max_rate),
+           TP_ARGS(dev, group, tsar_ix, bw_share, max_rate),
+           TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+                            __field(const void *, group)
+                            __field(unsigned int, tsar_ix)
+                            __field(unsigned int, bw_share)
+                            __field(unsigned int, max_rate)
+                            ),
+           TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+                   __entry->group = group;
+                   __entry->tsar_ix = tsar_ix;
+                   __entry->bw_share = bw_share;
+                   __entry->max_rate = max_rate;
+           ),
+           TP_printk("(%s) group=%p tsar_ix=%u bw_share=%u max_rate=%u\n",
+                     __get_str(devname), __entry->group, __entry->tsar_ix,
+                     __entry->bw_share, __entry->max_rate
+                     )
+);
+#endif /* _MLX5_ESW_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH esw/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE qos_tracepoint
+#include <trace/define_trace.h>
index d9041b1..df277a6 100644 (file)
@@ -11,6 +11,7 @@
 #include "mlx5_core.h"
 #include "eswitch.h"
 #include "fs_core.h"
+#include "esw/qos.h"
 
 enum {
        LEGACY_VEPA_PRIO = 0,
@@ -508,3 +509,22 @@ unlock:
        mutex_unlock(&esw->state_lock);
        return err;
 }
+
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
+                               u32 max_rate, u32 min_rate)
+{
+       struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+       int err;
+
+       if (!mlx5_esw_allowed(esw))
+               return -EPERM;
+       if (IS_ERR(evport))
+               return PTR_ERR(evport);
+
+       mutex_lock(&esw->state_lock);
+       err = mlx5_esw_qos_set_vport_min_rate(esw, evport, min_rate, NULL);
+       if (!err)
+               err = mlx5_esw_qos_set_vport_max_rate(esw, evport, max_rate, NULL);
+       mutex_unlock(&esw->state_lock);
+       return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
new file mode 100644 (file)
index 0000000..985e305
--- /dev/null
@@ -0,0 +1,869 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "eswitch.h"
+#include "esw/qos.h"
+#include "en/port.h"
+#define CREATE_TRACE_POINTS
+#include "diag/qos_tracepoint.h"
+
+/* Minimum supported BW share value by the HW is 1 Mbit/sec */
+#define MLX5_MIN_BW_SHARE 1
+
+#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
+       min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
+
+struct mlx5_esw_rate_group {
+       u32 tsar_ix;
+       u32 max_rate;
+       u32 min_rate;
+       u32 bw_share;
+       struct list_head list;
+};
+
+static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
+                              u32 parent_ix, u32 tsar_ix,
+                              u32 max_rate, u32 bw_share)
+{
+       u32 bitmask = 0;
+
+       if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+               return -EOPNOTSUPP;
+
+       MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix);
+       MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+       MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+       bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+       bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
+
+       return mlx5_modify_scheduling_element_cmd(dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 sched_ctx,
+                                                 tsar_ix,
+                                                 bitmask);
+}
+
+static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
+                               u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
+{
+       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+       struct mlx5_core_dev *dev = esw->dev;
+       int err;
+
+       err = esw_qos_tsar_config(dev, sched_ctx,
+                                 esw->qos.root_tsar_ix, group->tsar_ix,
+                                 max_rate, bw_share);
+       if (err)
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
+
+       trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
+
+       return err;
+}
+
+static int esw_qos_vport_config(struct mlx5_eswitch *esw,
+                               struct mlx5_vport *vport,
+                               u32 max_rate, u32 bw_share,
+                               struct netlink_ext_ack *extack)
+{
+       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+       struct mlx5_esw_rate_group *group = vport->qos.group;
+       struct mlx5_core_dev *dev = esw->dev;
+       u32 parent_tsar_ix;
+       void *vport_elem;
+       int err;
+
+       if (!vport->qos.enabled)
+               return -EIO;
+
+       parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+       MLX5_SET(scheduling_context, sched_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+       vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
+                                 element_attributes);
+       MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+
+       err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
+                                 max_rate, bw_share);
+       if (err) {
+               esw_warn(esw->dev,
+                        "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
+                        vport->vport, err);
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
+               return err;
+       }
+
+       trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
+
+       return 0;
+}
+
+static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
+                                             struct mlx5_esw_rate_group *group,
+                                             bool group_level)
+{
+       u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+       struct mlx5_vport *evport;
+       u32 max_guarantee = 0;
+       unsigned long i;
+
+       if (group_level) {
+               struct mlx5_esw_rate_group *group;
+
+               list_for_each_entry(group, &esw->qos.groups, list) {
+                       if (group->min_rate < max_guarantee)
+                               continue;
+                       max_guarantee = group->min_rate;
+               }
+       } else {
+               mlx5_esw_for_each_vport(esw, i, evport) {
+                       if (!evport->enabled || !evport->qos.enabled ||
+                           evport->qos.group != group || evport->qos.min_rate < max_guarantee)
+                               continue;
+                       max_guarantee = evport->qos.min_rate;
+               }
+       }
+
+       if (max_guarantee)
+               return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+
+       /* If vports min rate divider is 0 but their group has bw_share configured, then
+        * need to set bw_share for vports to minimal value.
+        */
+       if (!group_level && !max_guarantee && group->bw_share)
+               return 1;
+       return 0;
+}
+
+static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
+{
+       if (divider)
+               return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
+
+       return 0;
+}
+
+static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
+                                            struct mlx5_esw_rate_group *group,
+                                            struct netlink_ext_ack *extack)
+{
+       u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+       u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
+       struct mlx5_vport *evport;
+       unsigned long i;
+       u32 bw_share;
+       int err;
+
+       mlx5_esw_for_each_vport(esw, i, evport) {
+               if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
+                       continue;
+               bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
+
+               if (bw_share == evport->qos.bw_share)
+                       continue;
+
+               err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
+               if (err)
+                       return err;
+
+               evport->qos.bw_share = bw_share;
+       }
+
+       return 0;
+}
+
+static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
+                                            struct netlink_ext_ack *extack)
+{
+       u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+       struct mlx5_esw_rate_group *group;
+       u32 bw_share;
+       int err;
+
+       list_for_each_entry(group, &esw->qos.groups, list) {
+               bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
+
+               if (bw_share == group->bw_share)
+                       continue;
+
+               err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
+               if (err)
+                       return err;
+
+               group->bw_share = bw_share;
+
+               /* All the group's vports need to be set with default bw_share
+                * to enable them with QOS
+                */
+               err = esw_qos_normalize_vports_min_rate(esw, group, extack);
+
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
+                                   struct mlx5_vport *evport,
+                                   u32 min_rate,
+                                   struct netlink_ext_ack *extack)
+{
+       u32 fw_max_bw_share, previous_min_rate;
+       bool min_rate_supported;
+       int err;
+
+       lockdep_assert_held(&esw->state_lock);
+       fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+       min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
+                               fw_max_bw_share >= MLX5_MIN_BW_SHARE;
+       if (min_rate && !min_rate_supported)
+               return -EOPNOTSUPP;
+       if (min_rate == evport->qos.min_rate)
+               return 0;
+
+       previous_min_rate = evport->qos.min_rate;
+       evport->qos.min_rate = min_rate;
+       err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
+       if (err)
+               evport->qos.min_rate = previous_min_rate;
+
+       return err;
+}
+
+int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
+                                   struct mlx5_vport *evport,
+                                   u32 max_rate,
+                                   struct netlink_ext_ack *extack)
+{
+       u32 act_max_rate = max_rate;
+       bool max_rate_supported;
+       int err;
+
+       lockdep_assert_held(&esw->state_lock);
+       max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
+
+       if (max_rate && !max_rate_supported)
+               return -EOPNOTSUPP;
+       if (max_rate == evport->qos.max_rate)
+               return 0;
+
+       /* If parent group has rate limit need to set to group
+        * value when new max rate is 0.
+        */
+       if (evport->qos.group && !max_rate)
+               act_max_rate = evport->qos.group->max_rate;
+
+       err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
+
+       if (!err)
+               evport->qos.max_rate = max_rate;
+
+       return err;
+}
+
+static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
+                                     u32 min_rate, struct netlink_ext_ack *extack)
+{
+       u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+       struct mlx5_core_dev *dev = esw->dev;
+       u32 previous_min_rate, divider;
+       int err;
+
+       if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
+               return -EOPNOTSUPP;
+
+       if (min_rate == group->min_rate)
+               return 0;
+
+       previous_min_rate = group->min_rate;
+       group->min_rate = min_rate;
+       divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+       err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+       if (err) {
+               group->min_rate = previous_min_rate;
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
+
+               /* Attempt restoring previous configuration */
+               divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+               if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
+                       NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
+       }
+
+       return err;
+}
+
+static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
+                                     struct mlx5_esw_rate_group *group,
+                                     u32 max_rate, struct netlink_ext_ack *extack)
+{
+       struct mlx5_vport *vport;
+       unsigned long i;
+       int err;
+
+       if (group->max_rate == max_rate)
+               return 0;
+
+       err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
+       if (err)
+               return err;
+
+       group->max_rate = max_rate;
+
+       /* Any unlimited vports in the group should be set
+        * with the value of the group.
+        */
+       mlx5_esw_for_each_vport(esw, i, vport) {
+               if (!vport->enabled || !vport->qos.enabled ||
+                   vport->qos.group != group || vport->qos.max_rate)
+                       continue;
+
+               err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
+               if (err)
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "E-Switch vport implicit rate limit setting failed");
+       }
+
+       return err;
+}
+
+static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
+                                             struct mlx5_vport *vport,
+                                             u32 max_rate, u32 bw_share)
+{
+       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+       struct mlx5_esw_rate_group *group = vport->qos.group;
+       struct mlx5_core_dev *dev = esw->dev;
+       u32 parent_tsar_ix;
+       void *vport_elem;
+       int err;
+
+       parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+       MLX5_SET(scheduling_context, sched_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+       vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
+       MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+       MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
+       MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+       MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+
+       err = mlx5_create_scheduling_element_cmd(dev,
+                                                SCHEDULING_HIERARCHY_E_SWITCH,
+                                                sched_ctx,
+                                                &vport->qos.esw_tsar_ix);
+       if (err) {
+               esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
+                        vport->vport, err);
+               return err;
+       }
+
+       return 0;
+}
+
+static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
+                                                  struct mlx5_vport *vport,
+                                                  struct mlx5_esw_rate_group *curr_group,
+                                                  struct mlx5_esw_rate_group *new_group,
+                                                  struct netlink_ext_ack *extack)
+{
+       u32 max_rate;
+       int err;
+
+       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 vport->qos.esw_tsar_ix);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
+               return err;
+       }
+
+       vport->qos.group = new_group;
+       max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
+
+       /* If vport is unlimited, we set the group's value.
+        * Therefore, if the group is limited it will apply to
+        * the vport as well and if not, vport will remain unlimited.
+        */
+       err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
+               goto err_sched;
+       }
+
+       return 0;
+
+err_sched:
+       vport->qos.group = curr_group;
+       max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
+       if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
+               esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
+                        vport->vport);
+
+       return err;
+}
+
+static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+                                     struct mlx5_vport *vport,
+                                     struct mlx5_esw_rate_group *group,
+                                     struct netlink_ext_ack *extack)
+{
+       struct mlx5_esw_rate_group *new_group, *curr_group;
+       int err;
+
+       if (!vport->enabled)
+               return -EINVAL;
+
+       curr_group = vport->qos.group;
+       new_group = group ?: esw->qos.group0;
+       if (curr_group == new_group)
+               return 0;
+
+       err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
+       if (err)
+               return err;
+
+       /* Recalculate bw share weights of old and new groups */
+       if (vport->qos.bw_share) {
+               esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
+               esw_qos_normalize_vports_min_rate(esw, new_group, extack);
+       }
+
+       return 0;
+}
+
+static struct mlx5_esw_rate_group *
+esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+       u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+       struct mlx5_esw_rate_group *group;
+       u32 divider;
+       int err;
+
+       if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
+               return ERR_PTR(-EOPNOTSUPP);
+
+       group = kzalloc(sizeof(*group), GFP_KERNEL);
+       if (!group)
+               return ERR_PTR(-ENOMEM);
+
+       MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
+                esw->qos.root_tsar_ix);
+       err = mlx5_create_scheduling_element_cmd(esw->dev,
+                                                SCHEDULING_HIERARCHY_E_SWITCH,
+                                                tsar_ctx,
+                                                &group->tsar_ix);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
+               goto err_sched_elem;
+       }
+
+       list_add_tail(&group->list, &esw->qos.groups);
+
+       divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+       if (divider) {
+               err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+               if (err) {
+                       NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
+                       goto err_min_rate;
+               }
+       }
+       trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
+
+       return group;
+
+err_min_rate:
+       list_del(&group->list);
+       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 group->tsar_ix);
+       if (err)
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
+err_sched_elem:
+       kfree(group);
+       return ERR_PTR(err);
+}
+
+static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+                                     struct mlx5_esw_rate_group *group,
+                                     struct netlink_ext_ack *extack)
+{
+       u32 divider;
+       int err;
+
+       list_del(&group->list);
+
+       divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
+       err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+       if (err)
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
+
+       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 group->tsar_ix);
+       if (err)
+               NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
+
+       trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
+       kfree(group);
+       return err;
+}
+
+static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
+{
+       switch (type) {
+       case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+               return MLX5_CAP_QOS(dev, esw_element_type) &
+                      ELEMENT_TYPE_CAP_MASK_TASR;
+       case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+               return MLX5_CAP_QOS(dev, esw_element_type) &
+                      ELEMENT_TYPE_CAP_MASK_VPORT;
+       case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+               return MLX5_CAP_QOS(dev, esw_element_type) &
+                      ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+       case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+               return MLX5_CAP_QOS(dev, esw_element_type) &
+                      ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+       }
+       return false;
+}
+
+void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
+{
+       u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+       struct mlx5_core_dev *dev = esw->dev;
+       __be32 *attr;
+       int err;
+
+       if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+               return;
+
+       if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
+               return;
+
+       mutex_lock(&esw->state_lock);
+       if (esw->qos.enabled)
+               goto unlock;
+
+       MLX5_SET(scheduling_context, tsar_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+
+       attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+       *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
+
+       err = mlx5_create_scheduling_element_cmd(dev,
+                                                SCHEDULING_HIERARCHY_E_SWITCH,
+                                                tsar_ctx,
+                                                &esw->qos.root_tsar_ix);
+       if (err) {
+               esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
+               goto unlock;
+       }
+
+       INIT_LIST_HEAD(&esw->qos.groups);
+       if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
+               esw->qos.group0 = esw_qos_create_rate_group(esw, NULL);
+               if (IS_ERR(esw->qos.group0)) {
+                       esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
+                                PTR_ERR(esw->qos.group0));
+                       goto err_group0;
+               }
+       }
+       esw->qos.enabled = true;
+unlock:
+       mutex_unlock(&esw->state_lock);
+       return;
+
+err_group0:
+       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 esw->qos.root_tsar_ix);
+       if (err)
+               esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
+       mutex_unlock(&esw->state_lock);
+}
+
+void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
+{
+       struct devlink *devlink = priv_to_devlink(esw->dev);
+       int err;
+
+       devlink_rate_nodes_destroy(devlink);
+       mutex_lock(&esw->state_lock);
+       if (!esw->qos.enabled)
+               goto unlock;
+
+       if (esw->qos.group0)
+               esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
+
+       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 esw->qos.root_tsar_ix);
+       if (err)
+               esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
+
+       esw->qos.enabled = false;
+unlock:
+       mutex_unlock(&esw->state_lock);
+}
+
+int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+                             u32 max_rate, u32 bw_share)
+{
+       int err;
+
+       lockdep_assert_held(&esw->state_lock);
+       if (!esw->qos.enabled)
+               return 0;
+
+       if (vport->qos.enabled)
+               return -EEXIST;
+
+       vport->qos.group = esw->qos.group0;
+
+       err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
+       if (!err) {
+               vport->qos.enabled = true;
+               trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
+       }
+
+       return err;
+}
+
+void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+       int err;
+
+       lockdep_assert_held(&esw->state_lock);
+       if (!esw->qos.enabled || !vport->qos.enabled)
+               return;
+       WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
+            "Disabling QoS on port before detaching it from group");
+
+       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 vport->qos.esw_tsar_ix);
+       if (err)
+               esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
+                        vport->vport, err);
+
+       vport->qos.enabled = false;
+       trace_mlx5_esw_vport_qos_destroy(vport);
+}
+
+int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
+{
+       u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+       struct mlx5_vport *vport;
+       u32 bitmask;
+
+       vport = mlx5_eswitch_get_vport(esw, vport_num);
+       if (IS_ERR(vport))
+               return PTR_ERR(vport);
+
+       if (!vport->qos.enabled)
+               return -EOPNOTSUPP;
+
+       MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
+       bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+
+       return mlx5_modify_scheduling_element_cmd(esw->dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 ctx,
+                                                 vport->qos.esw_tsar_ix,
+                                                 bitmask);
+}
+
+#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
+
+/* Converts bytes per second value passed in a pointer into megabits per
+ * second, rewriting last. If converted rate exceed link speed or is not a
+ * fraction of Mbps - returns error.
+ */
+static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
+                                       u64 *rate, struct netlink_ext_ack *extack)
+{
+       u32 link_speed_max, reminder;
+       u64 value;
+       int err;
+
+       err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
+               return err;
+       }
+
+       value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
+       if (reminder) {
+               pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
+                      name, *rate);
+               NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
+               return -EINVAL;
+       }
+
+       if (value > link_speed_max) {
+               pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
+                      name, value, link_speed_max);
+               NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
+               return -EINVAL;
+       }
+
+       *rate = value;
+       return 0;
+}
+
+/* Eswitch devlink rate API */
+
+int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
+                                           u64 tx_share, struct netlink_ext_ack *extack)
+{
+       struct mlx5_vport *vport = priv;
+       struct mlx5_eswitch *esw;
+       int err;
+
+       esw = vport->dev->priv.eswitch;
+       if (!mlx5_esw_allowed(esw))
+               return -EPERM;
+
+       err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
+       if (err)
+               return err;
+
+       mutex_lock(&esw->state_lock);
+       err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
+       mutex_unlock(&esw->state_lock);
+       return err;
+}
+
+int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
+                                         u64 tx_max, struct netlink_ext_ack *extack)
+{
+       struct mlx5_vport *vport = priv;
+       struct mlx5_eswitch *esw;
+       int err;
+
+       esw = vport->dev->priv.eswitch;
+       if (!mlx5_esw_allowed(esw))
+               return -EPERM;
+
+       err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
+       if (err)
+               return err;
+
+       mutex_lock(&esw->state_lock);
+       err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
+       mutex_unlock(&esw->state_lock);
+       return err;
+}
+
+int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
+                                           u64 tx_share, struct netlink_ext_ack *extack)
+{
+       struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
+       struct mlx5_eswitch *esw = dev->priv.eswitch;
+       struct mlx5_esw_rate_group *group = priv;
+       int err;
+
+       err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
+       if (err)
+               return err;
+
+       mutex_lock(&esw->state_lock);
+       err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
+       mutex_unlock(&esw->state_lock);
+       return err;
+}
+
+int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
+                                         u64 tx_max, struct netlink_ext_ack *extack)
+{
+       struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
+       struct mlx5_eswitch *esw = dev->priv.eswitch;
+       struct mlx5_esw_rate_group *group = priv;
+       int err;
+
+       err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
+       if (err)
+               return err;
+
+       mutex_lock(&esw->state_lock);
+       err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
+       mutex_unlock(&esw->state_lock);
+       return err;
+}
+
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+                                  struct netlink_ext_ack *extack)
+{
+       struct mlx5_esw_rate_group *group;
+       struct mlx5_eswitch *esw;
+       int err = 0;
+
+       esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+       if (IS_ERR(esw))
+               return PTR_ERR(esw);
+
+       mutex_lock(&esw->state_lock);
+       if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Rate node creation supported only in switchdev mode");
+               err = -EOPNOTSUPP;
+               goto unlock;
+       }
+
+       group = esw_qos_create_rate_group(esw, extack);
+       if (IS_ERR(group)) {
+               err = PTR_ERR(group);
+               goto unlock;
+       }
+
+       *priv = group;
+unlock:
+       mutex_unlock(&esw->state_lock);
+       return err;
+}
+
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+                                  struct netlink_ext_ack *extack)
+{
+       struct mlx5_esw_rate_group *group = priv;
+       struct mlx5_eswitch *esw;
+       int err;
+
+       esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+       if (IS_ERR(esw))
+               return PTR_ERR(esw);
+
+       mutex_lock(&esw->state_lock);
+       err = esw_qos_destroy_rate_group(esw, group, extack);
+       mutex_unlock(&esw->state_lock);
+       return err;
+}
+
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+                                   struct mlx5_vport *vport,
+                                   struct mlx5_esw_rate_group *group,
+                                   struct netlink_ext_ack *extack)
+{
+       int err;
+
+       mutex_lock(&esw->state_lock);
+       err = esw_qos_vport_update_group(esw, vport, group, extack);
+       mutex_unlock(&esw->state_lock);
+       return err;
+}
+
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+                                    struct devlink_rate *parent,
+                                    void *priv, void *parent_priv,
+                                    struct netlink_ext_ack *extack)
+{
+       struct mlx5_esw_rate_group *group;
+       struct mlx5_vport *vport = priv;
+
+       if (!parent)
+               return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
+                                                      vport, NULL, extack);
+
+       group = parent_priv;
+       return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
new file mode 100644 (file)
index 0000000..28451ab
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_ESW_QOS_H__
+#define __MLX5_ESW_QOS_H__
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
+                                   struct mlx5_vport *evport,
+                                   u32 min_rate,
+                                   struct netlink_ext_ack *extack);
+int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
+                                   struct mlx5_vport *evport,
+                                   u32 max_rate,
+                                   struct netlink_ext_ack *extack);
+void mlx5_esw_qos_create(struct mlx5_eswitch *esw);
+void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw);
+int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+                             u32 max_rate, u32 bw_share);
+void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
+                                           u64 tx_share, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
+                                         u64 tx_max, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
+                                           u64 tx_share, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
+                                         u64 tx_max, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+                                  struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+                                  struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+                                    struct devlink_rate *parent,
+                                    void *priv, void *parent_priv,
+                                    struct netlink_ext_ack *extack);
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
deleted file mode 100644 (file)
index d3ad78a..0000000
+++ /dev/null
@@ -1,586 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/* Copyright (c) 2021 Mellanox Technologies. */
-
-#include <linux/skbuff.h>
-#include <net/psample.h>
-#include "en/mapping.h"
-#include "esw/sample.h"
-#include "eswitch.h"
-#include "en_tc.h"
-#include "fs_core.h"
-
-#define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024)
-
-static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = {
-       .max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE,
-       .max_num_groups = 0,    /* default num of groups */
-       .flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP,
-};
-
-struct mlx5_esw_psample {
-       struct mlx5e_priv *priv;
-       struct mlx5_flow_table *termtbl;
-       struct mlx5_flow_handle *termtbl_rule;
-       DECLARE_HASHTABLE(hashtbl, 8);
-       struct mutex ht_lock; /* protect hashtbl */
-       DECLARE_HASHTABLE(restore_hashtbl, 8);
-       struct mutex restore_lock; /* protect restore_hashtbl */
-};
-
-struct mlx5_sampler {
-       struct hlist_node hlist;
-       u32 sampler_id;
-       u32 sample_ratio;
-       u32 sample_table_id;
-       u32 default_table_id;
-       int count;
-};
-
-struct mlx5_sample_flow {
-       struct mlx5_sampler *sampler;
-       struct mlx5_sample_restore *restore;
-       struct mlx5_flow_attr *pre_attr;
-       struct mlx5_flow_handle *pre_rule;
-       struct mlx5_flow_handle *rule;
-};
-
-struct mlx5_sample_restore {
-       struct hlist_node hlist;
-       struct mlx5_modify_hdr *modify_hdr;
-       struct mlx5_flow_handle *rule;
-       u32 obj_id;
-       int count;
-};
-
-static int
-sampler_termtbl_create(struct mlx5_esw_psample *esw_psample)
-{
-       struct mlx5_core_dev *dev = esw_psample->priv->mdev;
-       struct mlx5_eswitch *esw = dev->priv.eswitch;
-       struct mlx5_flow_table_attr ft_attr = {};
-       struct mlx5_flow_destination dest = {};
-       struct mlx5_flow_namespace *root_ns;
-       struct mlx5_flow_act act = {};
-       int err;
-
-       if (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, termination_table))  {
-               mlx5_core_warn(dev, "termination table is not supported\n");
-               return -EOPNOTSUPP;
-       }
-
-       root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
-       if (!root_ns) {
-               mlx5_core_warn(dev, "failed to get FDB flow namespace\n");
-               return -EOPNOTSUPP;
-       }
-
-       ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED;
-       ft_attr.autogroup.max_num_groups = 1;
-       ft_attr.prio = FDB_SLOW_PATH;
-       ft_attr.max_fte = 1;
-       ft_attr.level = 1;
-       esw_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
-       if (IS_ERR(esw_psample->termtbl)) {
-               err = PTR_ERR(esw_psample->termtbl);
-               mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err);
-               return err;
-       }
-
-       act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-       dest.vport.num = esw->manager_vport;
-       esw_psample->termtbl_rule = mlx5_add_flow_rules(esw_psample->termtbl, NULL, &act, &dest, 1);
-       if (IS_ERR(esw_psample->termtbl_rule)) {
-               err = PTR_ERR(esw_psample->termtbl_rule);
-               mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err);
-               mlx5_destroy_flow_table(esw_psample->termtbl);
-               return err;
-       }
-
-       return 0;
-}
-
-static void
-sampler_termtbl_destroy(struct mlx5_esw_psample *esw_psample)
-{
-       mlx5_del_flow_rules(esw_psample->termtbl_rule);
-       mlx5_destroy_flow_table(esw_psample->termtbl);
-}
-
-static int
-sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5_sampler *sampler)
-{
-       u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {};
-       u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
-       u64 general_obj_types;
-       void *obj;
-       int err;
-
-       general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
-       if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER))
-               return -EOPNOTSUPP;
-       if (!MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))
-               return -EOPNOTSUPP;
-
-       obj = MLX5_ADDR_OF(create_sampler_obj_in, in, sampler_object);
-       MLX5_SET(sampler_obj, obj, table_type, FS_FT_FDB);
-       MLX5_SET(sampler_obj, obj, ignore_flow_level, 1);
-       MLX5_SET(sampler_obj, obj, level, 1);
-       MLX5_SET(sampler_obj, obj, sample_ratio, sampler->sample_ratio);
-       MLX5_SET(sampler_obj, obj, sample_table_id, sampler->sample_table_id);
-       MLX5_SET(sampler_obj, obj, default_table_id, sampler->default_table_id);
-       MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
-       MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
-
-       err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-       if (!err)
-               sampler->sampler_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
-
-       return err;
-}
-
-static void
-sampler_obj_destroy(struct mlx5_core_dev *mdev, u32 sampler_id)
-{
-       u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
-       u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
-
-       MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
-       MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
-       MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id);
-
-       mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-}
-
-static u32
-sampler_hash(u32 sample_ratio, u32 default_table_id)
-{
-       return jhash_2words(sample_ratio, default_table_id, 0);
-}
-
-static int
-sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 default_table_id2)
-{
-       return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2;
-}
-
-static struct mlx5_sampler *
-sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_table_id)
-{
-       struct mlx5_sampler *sampler;
-       u32 hash_key;
-       int err;
-
-       mutex_lock(&esw_psample->ht_lock);
-       hash_key = sampler_hash(sample_ratio, default_table_id);
-       hash_for_each_possible(esw_psample->hashtbl, sampler, hlist, hash_key)
-               if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id,
-                                sample_ratio, default_table_id))
-                       goto add_ref;
-
-       sampler = kzalloc(sizeof(*sampler), GFP_KERNEL);
-       if (!sampler) {
-               err = -ENOMEM;
-               goto err_alloc;
-       }
-
-       sampler->sample_table_id = esw_psample->termtbl->id;
-       sampler->default_table_id = default_table_id;
-       sampler->sample_ratio = sample_ratio;
-
-       err = sampler_obj_create(esw_psample->priv->mdev, sampler);
-       if (err)
-               goto err_create;
-
-       hash_add(esw_psample->hashtbl, &sampler->hlist, hash_key);
-
-add_ref:
-       sampler->count++;
-       mutex_unlock(&esw_psample->ht_lock);
-       return sampler;
-
-err_create:
-       kfree(sampler);
-err_alloc:
-       mutex_unlock(&esw_psample->ht_lock);
-       return ERR_PTR(err);
-}
-
-static void
-sampler_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sampler *sampler)
-{
-       mutex_lock(&esw_psample->ht_lock);
-       if (--sampler->count == 0) {
-               hash_del(&sampler->hlist);
-               sampler_obj_destroy(esw_psample->priv->mdev, sampler->sampler_id);
-               kfree(sampler);
-       }
-       mutex_unlock(&esw_psample->ht_lock);
-}
-
-static struct mlx5_modify_hdr *
-sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id)
-{
-       struct mlx5e_tc_mod_hdr_acts mod_acts = {};
-       struct mlx5_modify_hdr *modify_hdr;
-       int err;
-
-       err = mlx5e_tc_match_to_reg_set(mdev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
-                                       CHAIN_TO_REG, obj_id);
-       if (err)
-               goto err_set_regc0;
-
-       modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
-                                             mod_acts.num_actions,
-                                             mod_acts.actions);
-       if (IS_ERR(modify_hdr)) {
-               err = PTR_ERR(modify_hdr);
-               goto err_modify_hdr;
-       }
-
-       dealloc_mod_hdr_actions(&mod_acts);
-       return modify_hdr;
-
-err_modify_hdr:
-       dealloc_mod_hdr_actions(&mod_acts);
-err_set_regc0:
-       return ERR_PTR(err);
-}
-
-static struct mlx5_sample_restore *
-sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id)
-{
-       struct mlx5_core_dev *mdev = esw_psample->priv->mdev;
-       struct mlx5_eswitch *esw = mdev->priv.eswitch;
-       struct mlx5_sample_restore *restore;
-       struct mlx5_modify_hdr *modify_hdr;
-       int err;
-
-       mutex_lock(&esw_psample->restore_lock);
-       hash_for_each_possible(esw_psample->restore_hashtbl, restore, hlist, obj_id)
-               if (restore->obj_id == obj_id)
-                       goto add_ref;
-
-       restore = kzalloc(sizeof(*restore), GFP_KERNEL);
-       if (!restore) {
-               err = -ENOMEM;
-               goto err_alloc;
-       }
-       restore->obj_id = obj_id;
-
-       modify_hdr = sample_metadata_rule_get(mdev, obj_id);
-       if (IS_ERR(modify_hdr)) {
-               err = PTR_ERR(modify_hdr);
-               goto err_modify_hdr;
-       }
-       restore->modify_hdr = modify_hdr;
-
-       restore->rule = esw_add_restore_rule(esw, obj_id);
-       if (IS_ERR(restore->rule)) {
-               err = PTR_ERR(restore->rule);
-               goto err_restore;
-       }
-
-       hash_add(esw_psample->restore_hashtbl, &restore->hlist, obj_id);
-add_ref:
-       restore->count++;
-       mutex_unlock(&esw_psample->restore_lock);
-       return restore;
-
-err_restore:
-       mlx5_modify_header_dealloc(mdev, restore->modify_hdr);
-err_modify_hdr:
-       kfree(restore);
-err_alloc:
-       mutex_unlock(&esw_psample->restore_lock);
-       return ERR_PTR(err);
-}
-
-static void
-sample_restore_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sample_restore *restore)
-{
-       mutex_lock(&esw_psample->restore_lock);
-       if (--restore->count == 0)
-               hash_del(&restore->hlist);
-       mutex_unlock(&esw_psample->restore_lock);
-
-       if (!restore->count) {
-               mlx5_del_flow_rules(restore->rule);
-               mlx5_modify_header_dealloc(esw_psample->priv->mdev, restore->modify_hdr);
-               kfree(restore);
-       }
-}
-
-void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj)
-{
-       u32 trunc_size = mapped_obj->sample.trunc_size;
-       struct psample_group psample_group = {};
-       struct psample_metadata md = {};
-
-       md.trunc_size = trunc_size ? min(trunc_size, skb->len) : skb->len;
-       md.in_ifindex = skb->dev->ifindex;
-       psample_group.group_num = mapped_obj->sample.group_id;
-       psample_group.net = &init_net;
-       skb_push(skb, skb->mac_len);
-
-       psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md);
-}
-
-/* For the following typical flow table:
- *
- * +-------------------------------+
- * +       original flow table     +
- * +-------------------------------+
- * +         original match        +
- * +-------------------------------+
- * + sample action + other actions +
- * +-------------------------------+
- *
- * We translate the tc filter with sample action to the following HW model:
- *
- *         +---------------------+
- *         + original flow table +
- *         +---------------------+
- *         +   original match    +
- *         +---------------------+
- *                    |
- *                    v
- * +------------------------------------------------+
- * +                Flow Sampler Object             +
- * +------------------------------------------------+
- * +                    sample ratio                +
- * +------------------------------------------------+
- * +    sample table id    |    default table id    +
- * +------------------------------------------------+
- *            |                            |
- *            v                            v
- * +-----------------------------+  +----------------------------------------+
- * +        sample table         +  + default table per <vport, chain, prio> +
- * +-----------------------------+  +----------------------------------------+
- * + forward to management vport +  +            original match              +
- * +-----------------------------+  +----------------------------------------+
- *                                  +            other actions               +
- *                                  +----------------------------------------+
- */
-struct mlx5_flow_handle *
-mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample,
-                       struct mlx5_flow_spec *spec,
-                       struct mlx5_flow_attr *attr)
-{
-       struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
-       struct mlx5_vport_tbl_attr per_vport_tbl_attr;
-       struct mlx5_esw_flow_attr *pre_esw_attr;
-       struct mlx5_mapped_obj restore_obj = {};
-       struct mlx5_sample_flow *sample_flow;
-       struct mlx5_sample_attr *sample_attr;
-       struct mlx5_flow_table *default_tbl;
-       struct mlx5_flow_attr *pre_attr;
-       struct mlx5_eswitch *esw;
-       u32 obj_id;
-       int err;
-
-       if (IS_ERR_OR_NULL(esw_psample))
-               return ERR_PTR(-EOPNOTSUPP);
-
-       /* If slow path flag is set, eg. when the neigh is invalid for encap,
-        * don't offload sample action.
-        */
-       esw = esw_psample->priv->mdev->priv.eswitch;
-       if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
-               return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
-
-       sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
-       if (!sample_flow)
-               return ERR_PTR(-ENOMEM);
-       esw_attr->sample->sample_flow = sample_flow;
-
-       /* Allocate default table per vport, chain and prio. Otherwise, there is
-        * only one default table for the same sampler object. Rules with different
-        * prio and chain may overlap. For CT sample action, per vport default
-        * table is needed to resotre the metadata.
-        */
-       per_vport_tbl_attr.chain = attr->chain;
-       per_vport_tbl_attr.prio = attr->prio;
-       per_vport_tbl_attr.vport = esw_attr->in_rep->vport;
-       per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
-       default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr);
-       if (IS_ERR(default_tbl)) {
-               err = PTR_ERR(default_tbl);
-               goto err_default_tbl;
-       }
-
-       /* Perform the original matches on the default table.
-        * Offload all actions except the sample action.
-        */
-       esw_attr->sample->sample_default_tbl = default_tbl;
-       /* When offloading sample and encap action, if there is no valid
-        * neigh data struct, a slow path rule is offloaded first. Source
-        * port metadata match is set at that time. A per vport table is
-        * already allocated. No need to match it again. So clear the source
-        * port metadata match.
-        */
-       mlx5_eswitch_clear_rule_source_port(esw, spec);
-       sample_flow->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
-       if (IS_ERR(sample_flow->rule)) {
-               err = PTR_ERR(sample_flow->rule);
-               goto err_offload_rule;
-       }
-
-       /* Create sampler object. */
-       sample_flow->sampler = sampler_get(esw_psample, esw_attr->sample->rate, default_tbl->id);
-       if (IS_ERR(sample_flow->sampler)) {
-               err = PTR_ERR(sample_flow->sampler);
-               goto err_sampler;
-       }
-
-       /* Create an id mapping reg_c0 value to sample object. */
-       restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
-       restore_obj.sample.group_id = esw_attr->sample->group_num;
-       restore_obj.sample.rate = esw_attr->sample->rate;
-       restore_obj.sample.trunc_size = esw_attr->sample->trunc_size;
-       err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id);
-       if (err)
-               goto err_obj_id;
-       esw_attr->sample->restore_obj_id = obj_id;
-
-       /* Create sample restore context. */
-       sample_flow->restore = sample_restore_get(esw_psample, obj_id);
-       if (IS_ERR(sample_flow->restore)) {
-               err = PTR_ERR(sample_flow->restore);
-               goto err_sample_restore;
-       }
-
-       /* Perform the original matches on the original table. Offload the
-        * sample action. The destination is the sampler object.
-        */
-       pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
-       if (!pre_attr) {
-               err = -ENOMEM;
-               goto err_alloc_flow_attr;
-       }
-       sample_attr = kzalloc(sizeof(*sample_attr), GFP_KERNEL);
-       if (!sample_attr) {
-               err = -ENOMEM;
-               goto err_alloc_sample_attr;
-       }
-       pre_esw_attr = pre_attr->esw_attr;
-       pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-       pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
-       pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE;
-       pre_attr->chain = attr->chain;
-       pre_attr->prio = attr->prio;
-       pre_esw_attr->sample = sample_attr;
-       pre_esw_attr->sample->sampler_id = sample_flow->sampler->sampler_id;
-       pre_esw_attr->in_mdev = esw_attr->in_mdev;
-       pre_esw_attr->in_rep = esw_attr->in_rep;
-       sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr);
-       if (IS_ERR(sample_flow->pre_rule)) {
-               err = PTR_ERR(sample_flow->pre_rule);
-               goto err_pre_offload_rule;
-       }
-       sample_flow->pre_attr = pre_attr;
-
-       return sample_flow->rule;
-
-err_pre_offload_rule:
-       kfree(sample_attr);
-err_alloc_sample_attr:
-       kfree(pre_attr);
-err_alloc_flow_attr:
-       sample_restore_put(esw_psample, sample_flow->restore);
-err_sample_restore:
-       mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id);
-err_obj_id:
-       sampler_put(esw_psample, sample_flow->sampler);
-err_sampler:
-       /* For sample offload, rule is added in default_tbl. No need to call
-        * mlx5_esw_chains_put_table()
-        */
-       attr->prio = 0;
-       attr->chain = 0;
-       mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr);
-err_offload_rule:
-       mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr);
-err_default_tbl:
-       kfree(sample_flow);
-       return ERR_PTR(err);
-}
-
-void
-mlx5_esw_sample_unoffload(struct mlx5_esw_psample *esw_psample,
-                         struct mlx5_flow_handle *rule,
-                         struct mlx5_flow_attr *attr)
-{
-       struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
-       struct mlx5_sample_flow *sample_flow;
-       struct mlx5_vport_tbl_attr tbl_attr;
-       struct mlx5_flow_attr *pre_attr;
-       struct mlx5_eswitch *esw;
-
-       if (IS_ERR_OR_NULL(esw_psample))
-               return;
-
-       /* If slow path flag is set, sample action is not offloaded.
-        * No need to delete sample rule.
-        */
-       esw = esw_psample->priv->mdev->priv.eswitch;
-       if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
-               mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
-               return;
-       }
-
-       sample_flow = esw_attr->sample->sample_flow;
-       pre_attr = sample_flow->pre_attr;
-       memset(pre_attr, 0, sizeof(*pre_attr));
-       esw = esw_psample->priv->mdev->priv.eswitch;
-       mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, pre_attr);
-       mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr);
-
-       sample_restore_put(esw_psample, sample_flow->restore);
-       mapping_remove(esw->offloads.reg_c0_obj_pool, esw_attr->sample->restore_obj_id);
-       sampler_put(esw_psample, sample_flow->sampler);
-       tbl_attr.chain = attr->chain;
-       tbl_attr.prio = attr->prio;
-       tbl_attr.vport = esw_attr->in_rep->vport;
-       tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
-       mlx5_esw_vporttbl_put(esw, &tbl_attr);
-
-       kfree(pre_attr->esw_attr->sample);
-       kfree(pre_attr);
-       kfree(sample_flow);
-}
-
-struct mlx5_esw_psample *
-mlx5_esw_sample_init(struct mlx5e_priv *priv)
-{
-       struct mlx5_esw_psample *esw_psample;
-       int err;
-
-       esw_psample = kzalloc(sizeof(*esw_psample), GFP_KERNEL);
-       if (!esw_psample)
-               return ERR_PTR(-ENOMEM);
-       esw_psample->priv = priv;
-       err = sampler_termtbl_create(esw_psample);
-       if (err)
-               goto err_termtbl;
-
-       mutex_init(&esw_psample->ht_lock);
-       mutex_init(&esw_psample->restore_lock);
-
-       return esw_psample;
-
-err_termtbl:
-       kfree(esw_psample);
-       return ERR_PTR(err);
-}
-
-void
-mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample)
-{
-       if (IS_ERR_OR_NULL(esw_psample))
-               return;
-
-       mutex_destroy(&esw_psample->restore_lock);
-       mutex_destroy(&esw_psample->ht_lock);
-       sampler_termtbl_destroy(esw_psample);
-       kfree(esw_psample);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
deleted file mode 100644 (file)
index 2a3f4be..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2021 Mellanox Technologies. */
-
-#ifndef __MLX5_EN_TC_SAMPLE_H__
-#define __MLX5_EN_TC_SAMPLE_H__
-
-#include "en.h"
-#include "eswitch.h"
-
-struct mlx5e_priv;
-struct mlx5_flow_attr;
-struct mlx5_esw_psample;
-
-struct mlx5_sample_attr {
-       u32 group_num;
-       u32 rate;
-       u32 trunc_size;
-       u32 restore_obj_id;
-       u32 sampler_id;
-       struct mlx5_flow_table *sample_default_tbl;
-       struct mlx5_sample_flow *sample_flow;
-};
-
-void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
-
-struct mlx5_flow_handle *
-mlx5_esw_sample_offload(struct mlx5_esw_psample *sample_priv,
-                       struct mlx5_flow_spec *spec,
-                       struct mlx5_flow_attr *attr);
-
-void
-mlx5_esw_sample_unoffload(struct mlx5_esw_psample *sample_priv,
-                         struct mlx5_flow_handle *rule,
-                         struct mlx5_flow_attr *attr);
-
-struct mlx5_esw_psample *
-mlx5_esw_sample_init(struct mlx5e_priv *priv);
-
-void
-mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample);
-
-#endif /* __MLX5_EN_TC_SAMPLE_H__ */
index 97e6cb6..ec136b4 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/mlx5/mpfs.h>
 #include "esw/acl/lgcy.h"
 #include "esw/legacy.h"
+#include "esw/qos.h"
 #include "mlx5_core.h"
 #include "lib/eq.h"
 #include "eswitch.h"
@@ -740,201 +741,6 @@ static void esw_vport_change_handler(struct work_struct *work)
        mutex_unlock(&esw->state_lock);
 }
 
-static bool element_type_supported(struct mlx5_eswitch *esw, int type)
-{
-       const struct mlx5_core_dev *dev = esw->dev;
-
-       switch (type) {
-       case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
-               return MLX5_CAP_QOS(dev, esw_element_type) &
-                      ELEMENT_TYPE_CAP_MASK_TASR;
-       case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
-               return MLX5_CAP_QOS(dev, esw_element_type) &
-                      ELEMENT_TYPE_CAP_MASK_VPORT;
-       case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
-               return MLX5_CAP_QOS(dev, esw_element_type) &
-                      ELEMENT_TYPE_CAP_MASK_VPORT_TC;
-       case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
-               return MLX5_CAP_QOS(dev, esw_element_type) &
-                      ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
-       }
-       return false;
-}
-
-/* Vport QoS management */
-static void esw_create_tsar(struct mlx5_eswitch *esw)
-{
-       u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
-       struct mlx5_core_dev *dev = esw->dev;
-       __be32 *attr;
-       int err;
-
-       if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
-               return;
-
-       if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
-               return;
-
-       if (esw->qos.enabled)
-               return;
-
-       MLX5_SET(scheduling_context, tsar_ctx, element_type,
-                SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
-
-       attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
-       *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
-
-       err = mlx5_create_scheduling_element_cmd(dev,
-                                                SCHEDULING_HIERARCHY_E_SWITCH,
-                                                tsar_ctx,
-                                                &esw->qos.root_tsar_id);
-       if (err) {
-               esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err);
-               return;
-       }
-
-       esw->qos.enabled = true;
-}
-
-static void esw_destroy_tsar(struct mlx5_eswitch *esw)
-{
-       int err;
-
-       if (!esw->qos.enabled)
-               return;
-
-       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
-                                                 SCHEDULING_HIERARCHY_E_SWITCH,
-                                                 esw->qos.root_tsar_id);
-       if (err)
-               esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err);
-
-       esw->qos.enabled = false;
-}
-
-static int esw_vport_enable_qos(struct mlx5_eswitch *esw,
-                               struct mlx5_vport *vport,
-                               u32 initial_max_rate, u32 initial_bw_share)
-{
-       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
-       struct mlx5_core_dev *dev = esw->dev;
-       void *vport_elem;
-       int err = 0;
-
-       if (!esw->qos.enabled)
-               return 0;
-
-       if (vport->qos.enabled)
-               return -EEXIST;
-
-       MLX5_SET(scheduling_context, sched_ctx, element_type,
-                SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
-       vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
-                                 element_attributes);
-       MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
-       MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
-                esw->qos.root_tsar_id);
-       MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
-                initial_max_rate);
-       MLX5_SET(scheduling_context, sched_ctx, bw_share, initial_bw_share);
-
-       err = mlx5_create_scheduling_element_cmd(dev,
-                                                SCHEDULING_HIERARCHY_E_SWITCH,
-                                                sched_ctx,
-                                                &vport->qos.esw_tsar_ix);
-       if (err) {
-               esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
-                        vport->vport, err);
-               return err;
-       }
-
-       vport->qos.enabled = true;
-       return 0;
-}
-
-static void esw_vport_disable_qos(struct mlx5_eswitch *esw,
-                                 struct mlx5_vport *vport)
-{
-       int err;
-
-       if (!vport->qos.enabled)
-               return;
-
-       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
-                                                 SCHEDULING_HIERARCHY_E_SWITCH,
-                                                 vport->qos.esw_tsar_ix);
-       if (err)
-               esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
-                        vport->vport, err);
-
-       vport->qos.enabled = false;
-}
-
-static int esw_vport_qos_config(struct mlx5_eswitch *esw,
-                               struct mlx5_vport *vport,
-                               u32 max_rate, u32 bw_share)
-{
-       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
-       struct mlx5_core_dev *dev = esw->dev;
-       void *vport_elem;
-       u32 bitmask = 0;
-       int err = 0;
-
-       if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
-               return -EOPNOTSUPP;
-
-       if (!vport->qos.enabled)
-               return -EIO;
-
-       MLX5_SET(scheduling_context, sched_ctx, element_type,
-                SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
-       vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
-                                 element_attributes);
-       MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
-       MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
-                esw->qos.root_tsar_id);
-       MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
-                max_rate);
-       MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
-       bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
-       bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
-
-       err = mlx5_modify_scheduling_element_cmd(dev,
-                                                SCHEDULING_HIERARCHY_E_SWITCH,
-                                                sched_ctx,
-                                                vport->qos.esw_tsar_ix,
-                                                bitmask);
-       if (err) {
-               esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
-                        vport->vport, err);
-               return err;
-       }
-
-       return 0;
-}
-
-int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
-                              u32 rate_mbps)
-{
-       u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
-       struct mlx5_vport *vport;
-
-       vport = mlx5_eswitch_get_vport(esw, vport_num);
-       if (IS_ERR(vport))
-               return PTR_ERR(vport);
-
-       if (!vport->qos.enabled)
-               return -EOPNOTSUPP;
-
-       MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
-
-       return mlx5_modify_scheduling_element_cmd(esw->dev,
-                                                 SCHEDULING_HIERARCHY_E_SWITCH,
-                                                 ctx,
-                                                 vport->qos.esw_tsar_ix,
-                                                 MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW);
-}
-
 static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac)
 {
        ((u8 *)node_guid)[7] = mac[0];
@@ -976,7 +782,7 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
                return err;
 
        /* Attach vport to the eswitch rate limiter */
-       esw_vport_enable_qos(esw, vport, vport->qos.max_rate, vport->qos.bw_share);
+       mlx5_esw_qos_vport_enable(esw, vport, vport->qos.max_rate, vport->qos.bw_share);
 
        if (mlx5_esw_is_manager_vport(esw, vport_num))
                return 0;
@@ -1013,7 +819,7 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
                                              vport_num, 1,
                                              MLX5_VPORT_ADMIN_STATE_DOWN);
 
-       esw_vport_disable_qos(esw, vport);
+       mlx5_esw_qos_vport_disable(esw, vport);
        esw_vport_cleanup_acl(esw, vport);
 }
 
@@ -1454,12 +1260,10 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
 
        mlx5_eswitch_update_num_of_vfs(esw, num_vfs);
 
-       esw_create_tsar(esw);
+       mlx5_esw_qos_create(esw);
 
        esw->mode = mode;
 
-       mlx5_lag_update(esw->dev);
-
        if (mode == MLX5_ESWITCH_LEGACY) {
                err = esw_legacy_enable(esw);
        } else {
@@ -1486,7 +1290,7 @@ abort:
        if (mode == MLX5_ESWITCH_OFFLOADS)
                mlx5_rescan_drivers(esw->dev);
 
-       esw_destroy_tsar(esw);
+       mlx5_esw_qos_destroy(esw);
        mlx5_esw_acls_ns_cleanup(esw);
        return err;
 }
@@ -1494,7 +1298,7 @@ abort:
 /**
  * mlx5_eswitch_enable - Enable eswitch
  * @esw:       Pointer to eswitch
- * @num_vfs:   Enable eswitch swich for given number of VFs.
+ * @num_vfs:   Enable eswitch switch for given number of VFs.
  *             Caller must pass num_vfs > 0 when enabling eswitch for
  *             vf vports.
  * mlx5_eswitch_enable() returns 0 on success or error code on failure.
@@ -1506,6 +1310,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
        if (!mlx5_esw_allowed(esw))
                return 0;
 
+       mlx5_lag_disable_change(esw->dev);
        down_write(&esw->mode_lock);
        if (esw->mode == MLX5_ESWITCH_NONE) {
                ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
@@ -1519,6 +1324,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
                        esw->esw_funcs.num_vfs = num_vfs;
        }
        up_write(&esw->mode_lock);
+       mlx5_lag_enable_change(esw->dev);
        return ret;
 }
 
@@ -1550,12 +1356,10 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
        old_mode = esw->mode;
        esw->mode = MLX5_ESWITCH_NONE;
 
-       mlx5_lag_update(esw->dev);
-
        if (old_mode == MLX5_ESWITCH_OFFLOADS)
                mlx5_rescan_drivers(esw->dev);
 
-       esw_destroy_tsar(esw);
+       mlx5_esw_qos_destroy(esw);
        mlx5_esw_acls_ns_cleanup(esw);
 
        if (clear_vf)
@@ -1567,10 +1371,12 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
        if (!mlx5_esw_allowed(esw))
                return;
 
+       mlx5_lag_disable_change(esw->dev);
        down_write(&esw->mode_lock);
        mlx5_eswitch_disable_locked(esw, clear_vf);
        esw->esw_funcs.num_vfs = 0;
        up_write(&esw->mode_lock);
+       mlx5_lag_enable_change(esw->dev);
 }
 
 static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
@@ -1759,7 +1565,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
        ida_init(&esw->offloads.vport_metadata_ida);
        xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
        mutex_init(&esw->state_lock);
+       lockdep_register_key(&esw->mode_lock_key);
        init_rwsem(&esw->mode_lock);
+       lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key);
 
        esw->enabled_vports = 0;
        esw->mode = MLX5_ESWITCH_NONE;
@@ -1793,6 +1601,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 
        esw->dev->priv.eswitch = NULL;
        destroy_workqueue(esw->work_queue);
+       lockdep_unregister_key(&esw->mode_lock_key);
        mutex_destroy(&esw->state_lock);
        WARN_ON(!xa_empty(&esw->offloads.vhca_map));
        xa_destroy(&esw->offloads.vhca_map);
@@ -1889,8 +1698,7 @@ is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
               mlx5_esw_is_sf_vport(esw, vport_num);
 }
 
-int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
                                           u8 *hw_addr, int *hw_addr_len,
                                           struct netlink_ext_ack *extack)
 {
@@ -1899,7 +1707,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
        int err = -EOPNOTSUPP;
        u16 vport_num;
 
-       esw = mlx5_devlink_eswitch_get(devlink);
+       esw = mlx5_devlink_eswitch_get(port->devlink);
        if (IS_ERR(esw))
                return PTR_ERR(esw);
 
@@ -1923,8 +1731,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
        return err;
 }
 
-int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
                                           const u8 *hw_addr, int hw_addr_len,
                                           struct netlink_ext_ack *extack)
 {
@@ -1933,7 +1740,7 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
        int err = -EOPNOTSUPP;
        u16 vport_num;
 
-       esw = mlx5_devlink_eswitch_get(devlink);
+       esw = mlx5_devlink_eswitch_get(port->devlink);
        if (IS_ERR(esw)) {
                NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr");
                return PTR_ERR(esw);
@@ -2049,110 +1856,6 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
        return err;
 }
 
-static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
-{
-       u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
-       struct mlx5_vport *evport;
-       u32 max_guarantee = 0;
-       unsigned long i;
-
-       mlx5_esw_for_each_vport(esw, i, evport) {
-               if (!evport->enabled || evport->qos.min_rate < max_guarantee)
-                       continue;
-               max_guarantee = evport->qos.min_rate;
-       }
-
-       if (max_guarantee)
-               return max_t(u32, max_guarantee / fw_max_bw_share, 1);
-       return 0;
-}
-
-static int normalize_vports_min_rate(struct mlx5_eswitch *esw)
-{
-       u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
-       u32 divider = calculate_vports_min_rate_divider(esw);
-       struct mlx5_vport *evport;
-       u32 vport_max_rate;
-       u32 vport_min_rate;
-       unsigned long i;
-       u32 bw_share;
-       int err;
-
-       mlx5_esw_for_each_vport(esw, i, evport) {
-               if (!evport->enabled)
-                       continue;
-               vport_min_rate = evport->qos.min_rate;
-               vport_max_rate = evport->qos.max_rate;
-               bw_share = 0;
-
-               if (divider)
-                       bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate,
-                                                        divider,
-                                                        fw_max_bw_share);
-
-               if (bw_share == evport->qos.bw_share)
-                       continue;
-
-               err = esw_vport_qos_config(esw, evport, vport_max_rate,
-                                          bw_share);
-               if (!err)
-                       evport->qos.bw_share = bw_share;
-               else
-                       return err;
-       }
-
-       return 0;
-}
-
-int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
-                               u32 max_rate, u32 min_rate)
-{
-       struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
-       u32 fw_max_bw_share;
-       u32 previous_min_rate;
-       bool min_rate_supported;
-       bool max_rate_supported;
-       int err = 0;
-
-       if (!mlx5_esw_allowed(esw))
-               return -EPERM;
-       if (IS_ERR(evport))
-               return PTR_ERR(evport);
-
-       fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
-       min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
-                               fw_max_bw_share >= MLX5_MIN_BW_SHARE;
-       max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
-
-       if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported))
-               return -EOPNOTSUPP;
-
-       mutex_lock(&esw->state_lock);
-
-       if (min_rate == evport->qos.min_rate)
-               goto set_max_rate;
-
-       previous_min_rate = evport->qos.min_rate;
-       evport->qos.min_rate = min_rate;
-       err = normalize_vports_min_rate(esw);
-       if (err) {
-               evport->qos.min_rate = previous_min_rate;
-               goto unlock;
-       }
-
-set_max_rate:
-       if (max_rate == evport->qos.max_rate)
-               goto unlock;
-
-       err = esw_vport_qos_config(esw, evport, max_rate, evport->qos.bw_share);
-       if (!err)
-               evport->qos.max_rate = max_rate;
-
-unlock:
-       mutex_unlock(&esw->state_lock);
-       return err;
-}
-
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
                                 u16 vport_num,
                                 struct ifla_vf_stats *vf_stats)
@@ -2366,9 +2069,22 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
  */
 void mlx5_esw_unlock(struct mlx5_eswitch *esw)
 {
+       if (!mlx5_esw_allowed(esw))
+               return;
        up_write(&esw->mode_lock);
 }
 
+/**
+ * mlx5_esw_lock() - Take write lock on esw mode lock
+ * @esw: eswitch device.
+ */
+void mlx5_esw_lock(struct mlx5_eswitch *esw)
+{
+       if (!mlx5_esw_allowed(esw))
+               return;
+       down_write(&esw->mode_lock);
+}
+
 /**
  * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
  *
@@ -2384,3 +2100,15 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
        return mlx5_esw_allowed(esw) ? esw->total_vports : 0;
 }
 EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
+
+/**
+ * mlx5_eswitch_get_core_dev - Get the mdev device
+ * @esw : eswitch device.
+ *
+ * Return the mellanox core device which manages the eswitch.
+ */
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+       return mlx5_esw_allowed(esw) ? esw->dev : NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_core_dev);
index d562edf..2c74441 100644 (file)
@@ -46,7 +46,7 @@
 #include "lib/fs_chains.h"
 #include "sf/sf.h"
 #include "en/tc_ct.h"
-#include "esw/sample.h"
+#include "en/tc/sample.h"
 
 enum mlx5_mapped_obj_type {
        MLX5_MAPPED_OBJ_CHAIN,
@@ -61,6 +61,7 @@ struct mlx5_mapped_obj {
                        u32 group_id;
                        u32 rate;
                        u32 trunc_size;
+                       u32 tunnel_id;
                } sample;
        };
 };
@@ -75,17 +76,20 @@ struct mlx5_mapped_obj {
 #define MLX5_MAX_MC_PER_VPORT(dev) \
        (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
 
-#define MLX5_MIN_BW_SHARE 1
-
-#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
-       min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit)
-
 #define mlx5_esw_has_fwd_fdb(dev) \
        MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
 
 #define esw_chains(esw) \
        ((esw)->fdb_table.offloads.esw_chains_priv)
 
+enum {
+       MAPPING_TYPE_CHAIN,
+       MAPPING_TYPE_TUNNEL,
+       MAPPING_TYPE_TUNNEL_ENC_OPTS,
+       MAPPING_TYPE_LABELS,
+       MAPPING_TYPE_ZONE,
+};
+
 struct vport_ingress {
        struct mlx5_flow_table *acl;
        struct mlx5_flow_handle *allow_rule;
@@ -124,6 +128,8 @@ struct vport_egress {
                struct {
                        struct mlx5_flow_group *fwd_grp;
                        struct mlx5_flow_handle *fwd_rule;
+                       struct mlx5_flow_handle *bounce_rule;
+                       struct mlx5_flow_group *bounce_grp;
                } offloads;
        };
 };
@@ -150,8 +156,6 @@ enum mlx5_eswitch_vport_event {
        MLX5_VPORT_PROMISC_CHANGE = BIT(3),
 };
 
-struct mlx5_esw_bridge;
-
 struct mlx5_vport {
        struct mlx5_core_dev    *dev;
        struct hlist_head       uc_list[MLX5_L2_ADDR_HASH_SIZE];
@@ -173,6 +177,7 @@ struct mlx5_vport {
                u32             bw_share;
                u32 min_rate;
                u32 max_rate;
+               struct mlx5_esw_rate_group *group;
        } qos;
 
        u16 vport;
@@ -180,7 +185,6 @@ struct mlx5_vport {
        enum mlx5_eswitch_vport_event enabled_events;
        int index;
        struct devlink_port *dl_port;
-       struct mlx5_esw_bridge *bridge;
 };
 
 struct mlx5_esw_indir_table;
@@ -302,7 +306,9 @@ struct mlx5_eswitch {
 
        struct {
                bool            enabled;
-               u32             root_tsar_id;
+               u32             root_tsar_ix;
+               struct mlx5_esw_rate_group *group0;
+               struct list_head groups; /* Protected by esw->state_lock */
        } qos;
 
        struct mlx5_esw_bridge_offloads *br_offloads;
@@ -315,6 +321,7 @@ struct mlx5_eswitch {
                u32             large_group_num;
        }  params;
        struct blocking_notifier_head n_head;
+       struct lock_class_key mode_lock_key;
 };
 
 void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -327,8 +334,7 @@ int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
 u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw);
 void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
 
-int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
-                              u32 rate_mbps);
+int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps);
 
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
@@ -351,6 +357,10 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
                                 u16 vport_num, bool setting);
 int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
                                u32 max_rate, u32 min_rate);
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+                                   struct mlx5_vport *vport,
+                                   struct mlx5_esw_rate_group *group,
+                                   struct netlink_ext_ack *extack);
 int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting);
 int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting);
 int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
@@ -461,7 +471,6 @@ struct mlx5_esw_flow_attr {
        } dests[MLX5_MAX_FLOW_FWD_VPORTS];
        struct mlx5_rx_tun_attr *rx_tun_attr;
        struct mlx5_pkt_reformat *decap_pkt_reformat;
-       struct mlx5_sample_attr *sample;
 };
 
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
@@ -475,12 +484,10 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
                                        struct netlink_ext_ack *extack);
 int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
                                        enum devlink_eswitch_encap_mode *encap);
-int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
                                           u8 *hw_addr, int *hw_addr_len,
                                           struct netlink_ext_ack *extack);
-int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
                                           const u8 *hw_addr, int hw_addr_len,
                                           struct netlink_ext_ack *extack);
 
@@ -699,11 +706,18 @@ void mlx5_esw_get(struct mlx5_core_dev *dev);
 void mlx5_esw_put(struct mlx5_core_dev *dev);
 int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
 void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+void mlx5_esw_lock(struct mlx5_eswitch *esw);
 
 void esw_vport_change_handle_locked(struct mlx5_vport *vport);
 
 bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
 
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+                                           struct mlx5_eswitch *slave_esw);
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+                                             struct mlx5_eswitch *slave_esw);
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -719,6 +733,9 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
        return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; }
+static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; }
+
 static inline struct mlx5_flow_handle *
 esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 {
@@ -731,6 +748,23 @@ mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
 {
        return vport_num;
 }
+
+static inline int
+mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+                                       struct mlx5_eswitch *slave_esw)
+{
+       return 0;
+}
+
+static inline void
+mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+                                        struct mlx5_eswitch *slave_esw) {}
+
+static inline int
+mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+       return 0;
+}
 #endif /* CONFIG_MLX5_ESWITCH */
 
 #endif /* __MLX5_ESWITCH_H__ */
index 3bb71a1..0d461e3 100644 (file)
@@ -187,12 +187,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
 static int
 esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
                       struct mlx5_flow_act *flow_act,
-                      struct mlx5_esw_flow_attr *esw_attr,
+                      struct mlx5_flow_attr *attr,
                       int i)
 {
        flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
        dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
-       dest[i].sampler_id = esw_attr->sample->sampler_id;
+       dest[i].sampler_id = attr->sample_attr->sampler_id;
 
        return 0;
 }
@@ -435,7 +435,7 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
                attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
 
        if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) {
-               esw_setup_sampler_dest(dest, flow_act, esw_attr, *i);
+               esw_setup_sampler_dest(dest, flow_act, attr, *i);
                (*i)++;
        } else if (attr->dest_ft) {
                esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
@@ -540,10 +540,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
                flow_act.modify_hdr = attr->modify_hdr;
 
-       /* esw_attr->sample is allocated only when there is a sample action */
-       if (esw_attr->sample && esw_attr->sample->sample_default_tbl) {
-               fdb = esw_attr->sample->sample_default_tbl;
-       } else if (split) {
+       if (split) {
                fwd_attr.chain = attr->chain;
                fwd_attr.prio = attr->prio;
                fwd_attr.vport = esw_attr->in_rep->vport;
@@ -927,6 +924,7 @@ out:
 
 struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+                                   struct mlx5_eswitch *from_esw,
                                    struct mlx5_eswitch_rep *rep,
                                    u32 sqn)
 {
@@ -945,10 +943,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
        MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
        /* source vport is the esw manager */
-       MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport);
+       MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport);
        if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
                MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
-                        MLX5_CAP_GEN(rep->esw->dev, vhca_id));
+                        MLX5_CAP_GEN(from_esw->dev, vhca_id));
 
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
        MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
@@ -964,6 +962,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
        dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
+       if (rep->vport == MLX5_VPORT_UPLINK)
+               spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
        flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
                                        spec, &flow_act, &dest, 1);
        if (IS_ERR(flow_rule))
@@ -1614,7 +1615,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
                goto ns_err;
        }
 
-       table_size = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+       /* To be strictly correct:
+        *      MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ)
+        * should be:
+        *      esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+        *      peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ
+        * but as the peer device might not be in switchdev mode it's not
+        * possible. We use the fact that by default FW sets max vfs and max sfs
+        * to the same value on both devices. If it needs to be changed in the future note
+        * the peer miss group should also be created based on the number of
+        * total vports of the peer (currently is also uses esw->total_vports).
+        */
+       table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
                MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs;
 
        /* create the slow path fdb with encap set, so further table instances
@@ -1671,7 +1683,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
                         source_eswitch_owner_vhca_id_valid, 1);
        }
 
-       ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ;
+       /* See comment above table_size calculation */
+       ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
 
@@ -2311,14 +2324,293 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
                mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
 }
 
+static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master,
+                                            struct mlx5_core_dev *slave)
+{
+       u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {};
+       u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+       struct mlx5_eswitch *esw;
+       struct mlx5_flow_root_namespace *root;
+       struct mlx5_flow_namespace *ns;
+       struct mlx5_vport *vport;
+       int err;
+
+       MLX5_SET(set_flow_table_root_in, in, opcode,
+                MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+       MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL);
+       MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
+       MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK);
+
+       if (master) {
+               esw = master->priv.eswitch;
+               vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+               MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1);
+               MLX5_SET(set_flow_table_root_in, in, table_vport_number,
+                        MLX5_VPORT_UPLINK);
+
+               ns = mlx5_get_flow_vport_acl_namespace(master,
+                                                      MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+                                                      vport->index);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id_valid, 1);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(master, vhca_id));
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       } else {
+               esw = slave->priv.eswitch;
+               vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+               ns = mlx5_get_flow_vport_acl_namespace(slave,
+                                                      MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+                                                      vport->index);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+               MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id);
+       }
+
+       err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+       mutex_unlock(&root->chain_lock);
+
+       return err;
+}
+
+static int esw_set_slave_root_fdb(struct mlx5_core_dev *master,
+                                 struct mlx5_core_dev *slave)
+{
+       u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {};
+       u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+       struct mlx5_flow_root_namespace *root;
+       struct mlx5_flow_namespace *ns;
+       int err;
+
+       MLX5_SET(set_flow_table_root_in, in, opcode,
+                MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+       MLX5_SET(set_flow_table_root_in, in, table_type,
+                FS_FT_FDB);
+
+       if (master) {
+               ns = mlx5_get_flow_namespace(master,
+                                            MLX5_FLOW_NAMESPACE_FDB);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id_valid, 1);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(master, vhca_id));
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       } else {
+               ns = mlx5_get_flow_namespace(slave,
+                                            MLX5_FLOW_NAMESPACE_FDB);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       }
+
+       err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+       mutex_unlock(&root->chain_lock);
+
+       return err;
+}
+
+static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
+                                       struct mlx5_core_dev *slave,
+                                       struct mlx5_vport *vport,
+                                       struct mlx5_flow_table *acl)
+{
+       struct mlx5_flow_handle *flow_rule = NULL;
+       struct mlx5_flow_destination dest = {};
+       struct mlx5_flow_act flow_act = {};
+       struct mlx5_flow_spec *spec;
+       int err = 0;
+       void *misc;
+
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return -ENOMEM;
+
+       spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+       misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                           misc_parameters);
+       MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+       MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+                MLX5_CAP_GEN(slave, vhca_id));
+
+       misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+       MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+       MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+                        source_eswitch_owner_vhca_id);
+
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+       dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+       dest.vport.num = slave->priv.eswitch->manager_vport;
+       dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id);
+       dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+       flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
+                                       &dest, 1);
+       if (IS_ERR(flow_rule))
+               err = PTR_ERR(flow_rule);
+       else
+               vport->egress.offloads.bounce_rule = flow_rule;
+
+       kvfree(spec);
+       return err;
+}
+
+static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+                                     struct mlx5_core_dev *slave)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       struct mlx5_eswitch *esw = master->priv.eswitch;
+       struct mlx5_flow_table_attr ft_attr = {
+               .max_fte = 1, .prio = 0, .level = 0,
+       };
+       struct mlx5_flow_namespace *egress_ns;
+       struct mlx5_flow_table *acl;
+       struct mlx5_flow_group *g;
+       struct mlx5_vport *vport;
+       void *match_criteria;
+       u32 *flow_group_in;
+       int err;
+
+       vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+       if (IS_ERR(vport))
+               return PTR_ERR(vport);
+
+       egress_ns = mlx5_get_flow_vport_acl_namespace(master,
+                                                     MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+                                                     vport->index);
+       if (!egress_ns)
+               return -EINVAL;
+
+       if (vport->egress.acl)
+               return -EINVAL;
+
+       flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+       if (!flow_group_in)
+               return -ENOMEM;
+
+       acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport);
+       if (IS_ERR(acl)) {
+               err = PTR_ERR(acl);
+               goto out;
+       }
+
+       match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+                                     match_criteria);
+       MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+                        misc_parameters.source_port);
+       MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+                        misc_parameters.source_eswitch_owner_vhca_id);
+       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+                MLX5_MATCH_MISC_PARAMETERS);
+
+       MLX5_SET(create_flow_group_in, flow_group_in,
+                source_eswitch_owner_vhca_id_valid, 1);
+       MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+       g = mlx5_create_flow_group(acl, flow_group_in);
+       if (IS_ERR(g)) {
+               err = PTR_ERR(g);
+               goto err_group;
+       }
+
+       err = __esw_set_master_egress_rule(master, slave, vport, acl);
+       if (err)
+               goto err_rule;
+
+       vport->egress.acl = acl;
+       vport->egress.offloads.bounce_grp = g;
+
+       kvfree(flow_group_in);
+
+       return 0;
+
+err_rule:
+       mlx5_destroy_flow_group(g);
+err_group:
+       mlx5_destroy_flow_table(acl);
+out:
+       kvfree(flow_group_in);
+       return err;
+}
+
+static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev)
+{
+       struct mlx5_vport *vport;
+
+       vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
+                                      dev->priv.eswitch->manager_vport);
+
+       esw_acl_egress_ofld_cleanup(vport);
+}
+
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+                                           struct mlx5_eswitch *slave_esw)
+{
+       int err;
+
+       err = esw_set_uplink_slave_ingress_root(master_esw->dev,
+                                               slave_esw->dev);
+       if (err)
+               return -EINVAL;
+
+       err = esw_set_slave_root_fdb(master_esw->dev,
+                                    slave_esw->dev);
+       if (err)
+               goto err_fdb;
+
+       err = esw_set_master_egress_rule(master_esw->dev,
+                                        slave_esw->dev);
+       if (err)
+               goto err_acl;
+
+       return err;
+
+err_acl:
+       esw_set_slave_root_fdb(NULL, slave_esw->dev);
+
+err_fdb:
+       esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+
+       return err;
+}
+
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+                                             struct mlx5_eswitch *slave_esw)
+{
+       esw_unset_master_egress_rule(master_esw->dev);
+       esw_set_slave_root_fdb(NULL, slave_esw->dev);
+       esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+}
+
 #define ESW_OFFLOADS_DEVCOM_PAIR       (0)
 #define ESW_OFFLOADS_DEVCOM_UNPAIR     (1)
 
-static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
-                                 struct mlx5_eswitch *peer_esw)
+static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
 {
+       const struct mlx5_eswitch_rep_ops *ops;
+       struct mlx5_eswitch_rep *rep;
+       unsigned long i;
+       u8 rep_type;
 
-       return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+       mlx5_esw_for_each_rep(esw, i, rep) {
+               rep_type = NUM_REP_TYPES;
+               while (rep_type--) {
+                       ops = esw->offloads.rep_ops[rep_type];
+                       if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+                           ops->event)
+                               ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL);
+               }
+       }
 }
 
 static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
@@ -2326,9 +2618,42 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
        mlx5e_tc_clean_fdb_peer_flows(esw);
 #endif
+       mlx5_esw_offloads_rep_event_unpair(esw);
        esw_del_fdb_peer_miss_rules(esw);
 }
 
+static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
+                                 struct mlx5_eswitch *peer_esw)
+{
+       const struct mlx5_eswitch_rep_ops *ops;
+       struct mlx5_eswitch_rep *rep;
+       unsigned long i;
+       u8 rep_type;
+       int err;
+
+       err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+       if (err)
+               return err;
+
+       mlx5_esw_for_each_rep(esw, i, rep) {
+               for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+                       ops = esw->offloads.rep_ops[rep_type];
+                       if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+                           ops->event) {
+                               err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw);
+                               if (err)
+                                       goto err_out;
+                       }
+               }
+       }
+
+       return 0;
+
+err_out:
+       mlx5_esw_offloads_unpair(esw);
+       return err;
+}
+
 static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
                                         struct mlx5_eswitch *peer_esw,
                                         bool pair)
@@ -2619,6 +2944,31 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
        esw_vport_destroy_offloads_acl_tables(esw, vport);
 }
 
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+       struct mlx5_eswitch_rep *rep;
+       unsigned long i;
+       int ret;
+
+       if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS)
+               return 0;
+
+       rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+       if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
+               return 0;
+
+       ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
+       if (ret)
+               return ret;
+
+       mlx5_esw_for_each_rep(esw, i, rep) {
+               if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
+                       mlx5_esw_offloads_rep_load(esw, rep->vport);
+       }
+
+       return 0;
+}
+
 static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
 {
        struct mlx5_esw_indir_table *indir;
@@ -2788,6 +3138,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
        struct mapping_ctx *reg_c0_obj_pool;
        struct mlx5_vport *vport;
        unsigned long i;
+       u64 mapping_id;
        int err;
 
        if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
@@ -2811,9 +3162,13 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
        if (err)
                goto err_vport_metadata;
 
-       reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj),
-                                        ESW_REG_C0_USER_DATA_METADATA_MASK,
-                                        true);
+       mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+       reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+                                               sizeof(struct mlx5_mapped_obj),
+                                               ESW_REG_C0_USER_DATA_METADATA_MASK,
+                                               true);
+
        if (IS_ERR(reg_c0_obj_pool)) {
                err = PTR_ERR(reg_c0_obj_pool);
                goto err_pool;
@@ -2991,10 +3346,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
        if (esw_mode_from_devlink(mode, &mlx5_mode))
                return -EINVAL;
 
+       mlx5_lag_disable_change(esw->dev);
        err = mlx5_esw_try_lock(esw);
        if (err < 0) {
                NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
-               return err;
+               goto enable_lag;
        }
        cur_mlx5_mode = err;
        err = 0;
@@ -3018,6 +3374,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 
 unlock:
        mlx5_esw_unlock(esw);
+enable_lag:
+       mlx5_lag_enable_change(esw->dev);
        return err;
 }
 
@@ -3091,8 +3449,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 
        switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
        case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
-               if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
+               if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) {
+                       err = 0;
                        goto out;
+               }
+
                fallthrough;
        case MLX5_CAP_INLINE_MODE_L2:
                NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
index d713ae2..a1ac3a6 100644 (file)
@@ -27,7 +27,7 @@ static int pcie_core(struct notifier_block *, unsigned long, void *);
 static int forward_event(struct notifier_block *, unsigned long, void *);
 
 static struct mlx5_nb events_nbs_ref[] = {
-       /* Events to be proccessed by mlx5_core */
+       /* Events to be processed by mlx5_core */
        {.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
        {.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
        {.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
index d5da4ab..306279b 100644 (file)
@@ -453,7 +453,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
 
        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
        MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
        MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
        MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
                           MLX5_ADAPTER_PAGE_SHIFT);
index 0bba92c..8ec1480 100644 (file)
@@ -1516,7 +1516,7 @@ static int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
        mutex_lock(&fpga_xfrm->lock);
 
        if (!fpga_xfrm->sa_ctx)
-               /* Unbounded xfrm, chane only sw attrs */
+               /* Unbounded xfrm, change only sw attrs */
                goto change_sw_xfrm_attrs;
 
        /* copy original hw sa */
index 896a6c3..7db8df6 100644 (file)
@@ -152,17 +152,56 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
        return 0;
 }
 
+static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
+                                      struct mlx5_core_dev *slave,
+                                      bool ft_id_valid,
+                                      u32 ft_id)
+{
+       u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+       struct mlx5_flow_root_namespace *root;
+       struct mlx5_flow_namespace *ns;
+
+       MLX5_SET(set_flow_table_root_in, in, opcode,
+                MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+       MLX5_SET(set_flow_table_root_in, in, table_type,
+                FS_FT_FDB);
+       if (ft_id_valid) {
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id_valid, 1);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(master, vhca_id));
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        ft_id);
+       } else {
+               ns = mlx5_get_flow_namespace(slave,
+                                            MLX5_FLOW_NAMESPACE_FDB);
+               root = find_root(&ns->node);
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       }
+
+       return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+}
+
 static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
                                   struct mlx5_flow_table *ft, u32 underlay_qpn,
                                   bool disconnect)
 {
        u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
        struct mlx5_core_dev *dev = ns->dev;
+       int err;
 
        if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
            underlay_qpn == 0)
                return 0;
 
+       if (ft->type == FS_FT_FDB &&
+           mlx5_lag_is_shared_fdb(dev) &&
+           !mlx5_lag_is_master(dev))
+               return 0;
+
        MLX5_SET(set_flow_table_root_in, in, opcode,
                 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
        MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
@@ -177,7 +216,24 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
        MLX5_SET(set_flow_table_root_in, in, other_vport,
                 !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
 
-       return mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+       err = mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+       if (!err &&
+           ft->type == FS_FT_FDB &&
+           mlx5_lag_is_shared_fdb(dev) &&
+           mlx5_lag_is_master(dev)) {
+               err = mlx5_cmd_set_slave_root_fdb(dev,
+                                                 mlx5_lag_get_peer_mdev(dev),
+                                                 !disconnect, (!disconnect) ?
+                                                 ft->id : 0);
+               if (err && !disconnect) {
+                       MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+                       MLX5_SET(set_flow_table_root_in, in, table_id,
+                                ns->root_ft->id);
+                       mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+               }
+       }
+
+       return err;
 }
 
 static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
index c0697e1..9fe8e3c 100644 (file)
@@ -413,7 +413,7 @@ static bool check_valid_spec(const struct mlx5_flow_spec *spec)
        return true;
 }
 
-static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
 {
        struct fs_node *root;
        struct mlx5_flow_namespace *ns;
@@ -2343,7 +2343,7 @@ static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio,
 
 #define FLOW_TABLE_BIT_SZ 1
 #define GET_FLOW_TABLE_CAP(dev, offset) \
-       ((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) +    \
+       ((be32_to_cpu(*((__be32 *)(dev->caps.hca[MLX5_CAP_FLOW_TABLE]->cur) +   \
                        offset / 32)) >>                                        \
          (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ)
 static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps)
@@ -2493,7 +2493,7 @@ static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
                acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
 
                /* If this a prio with chains, and we can jump from one chain
-                * (namepsace) to another, so we accumulate the levels
+                * (namespace) to another, so we accumulate the levels
                 */
                if (prio->node.type == FS_TYPE_PRIO_CHAINS)
                        acc_level = acc_level_ns;
index 7317cde..98240ba 100644 (file)
@@ -294,6 +294,8 @@ void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
 int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
 void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
 
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
+
 #define fs_get_obj(v, _node)  {v = container_of((_node), typeof(*v), node); }
 
 #define fs_list_for_each_entry(pos, root)              \
index 9abeb80..037e18d 100644 (file)
@@ -170,7 +170,7 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
 
        /* The reset only needs to be issued by one PF. The health buffer is
         * shared between all functions, and will be cleared during a reset.
-        * Check again to avoid a redundant 2nd reset. If the fatal erros was
+        * Check again to avoid a redundant 2nd reset. If the fatal errors was
         * PCI related a reset won't help.
         */
        fatal_error = mlx5_health_check_fatal_sensors(dev);
@@ -213,10 +213,6 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
        mutex_lock(&dev->intf_state_mutex);
        if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
                goto unlock;/* a previous error is still being handled */
-       if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) {
-               dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
-               goto unlock;
-       }
 
        enter_error_state(dev, force);
 unlock:
index 0e487ec..0c8594c 100644 (file)
@@ -99,7 +99,9 @@ static void mlx5i_get_channels(struct net_device *dev,
 }
 
 static int mlx5i_set_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *coal)
+                             struct ethtool_coalesce *coal,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct mlx5e_priv *priv = mlx5i_epriv(netdev);
 
@@ -107,7 +109,9 @@ static int mlx5i_set_coalesce(struct net_device *netdev,
 }
 
 static int mlx5i_get_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *coal)
+                             struct ethtool_coalesce *coal,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct mlx5e_priv *priv = mlx5i_epriv(netdev);
 
index 7d7ed02..67571e5 100644 (file)
@@ -50,7 +50,7 @@ static const struct net_device_ops mlx5i_netdev_ops = {
        .ndo_init                = mlx5i_dev_init,
        .ndo_uninit              = mlx5i_dev_cleanup,
        .ndo_change_mtu          = mlx5i_change_mtu,
-       .ndo_do_ioctl            = mlx5i_ioctl,
+       .ndo_eth_ioctl            = mlx5i_ioctl,
 };
 
 /* IPoIB mlx5 netdev profile */
@@ -314,8 +314,7 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
 
 static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
 {
-       struct ttc_params ttc_params = {};
-       int tt, err;
+       int err;
 
        priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
                                               MLX5_FLOW_NAMESPACE_KERNEL);
@@ -330,33 +329,15 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
                priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
        }
 
-       mlx5e_set_ttc_basic_params(priv, &ttc_params);
-       mlx5e_set_inner_ttc_ft_params(&ttc_params);
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params.indir_tirn[tt] = priv->inner_indir_tir[tt].tirn;
-
-       err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
-       if (err) {
-               netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
-                          err);
-               goto err_destroy_arfs_tables;
-       }
-
-       mlx5e_set_ttc_ft_params(&ttc_params);
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
-
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+       err = mlx5e_create_ttc_table(priv);
        if (err) {
                netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
                           err);
-               goto err_destroy_inner_ttc_table;
+               goto err_destroy_arfs_tables;
        }
 
        return 0;
 
-err_destroy_inner_ttc_table:
-       mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
 err_destroy_arfs_tables:
        mlx5e_arfs_destroy_tables(priv);
 
@@ -365,17 +346,20 @@ err_destroy_arfs_tables:
 
 static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
 {
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-       mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+       mlx5e_destroy_ttc_table(priv);
        mlx5e_arfs_destroy_tables(priv);
 }
 
 static int mlx5i_init_rx(struct mlx5e_priv *priv)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
-       u16 max_nch = priv->max_nch;
+       struct mlx5e_lro_param lro_param;
        int err;
 
+       priv->rx_res = mlx5e_rx_res_alloc();
+       if (!priv->rx_res)
+               return -ENOMEM;
+
        mlx5e_create_q_counters(priv);
 
        err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -384,54 +368,38 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
                goto err_destroy_q_counters;
        }
 
-       err = mlx5e_create_indirect_rqt(priv);
+       lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->channels.params.num_channels);
        if (err)
                goto err_close_drop_rq;
 
-       err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch);
-       if (err)
-               goto err_destroy_indirect_rqts;
-
-       err = mlx5e_create_indirect_tirs(priv, true);
-       if (err)
-               goto err_destroy_direct_rqts;
-
-       err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch);
-       if (err)
-               goto err_destroy_indirect_tirs;
-
        err = mlx5i_create_flow_steering(priv);
        if (err)
-               goto err_destroy_direct_tirs;
+               goto err_destroy_rx_res;
 
        return 0;
 
-err_destroy_direct_tirs:
-       mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_tirs:
-       mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
-       mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
-err_destroy_indirect_rqts:
-       mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_destroy_rx_res:
+       mlx5e_rx_res_destroy(priv->rx_res);
 err_close_drop_rq:
        mlx5e_close_drop_rq(&priv->drop_rq);
 err_destroy_q_counters:
        mlx5e_destroy_q_counters(priv);
+       mlx5e_rx_res_free(priv->rx_res);
+       priv->rx_res = NULL;
        return err;
 }
 
 static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
 {
-       u16 max_nch = priv->max_nch;
-
        mlx5i_destroy_flow_steering(priv);
-       mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
-       mlx5e_destroy_indirect_tirs(priv);
-       mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
-       mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+       mlx5e_rx_res_destroy(priv->rx_res);
        mlx5e_close_drop_rq(&priv->drop_rq);
        mlx5e_destroy_q_counters(priv);
+       mlx5e_rx_res_free(priv->rx_res);
+       priv->rx_res = NULL;
 }
 
 /* The stats groups order is opposite to the update_stats() order calls */
index 18ee21b..5308f23 100644 (file)
@@ -149,7 +149,7 @@ static const struct net_device_ops mlx5i_pkey_netdev_ops = {
        .ndo_get_stats64         = mlx5i_get_stats,
        .ndo_uninit              = mlx5i_pkey_dev_cleanup,
        .ndo_change_mtu          = mlx5i_pkey_change_mtu,
-       .ndo_do_ioctl            = mlx5i_pkey_ioctl,
+       .ndo_eth_ioctl            = mlx5i_pkey_ioctl,
 };
 
 /* Child NDOs */
index 40ef60f..49ca57c 100644 (file)
@@ -32,7 +32,9 @@
 
 #include <linux/netdevice.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
 #include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
 #include "mlx5_core.h"
 #include "eswitch.h"
 #include "lag.h"
@@ -45,7 +47,7 @@
 static DEFINE_SPINLOCK(lag_lock);
 
 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
-                              u8 remap_port2)
+                              u8 remap_port2, bool shared_fdb)
 {
        u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
        void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
@@ -54,6 +56,7 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
 
        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+       MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
 
        return mlx5_cmd_exec_in(dev, create_lag, in);
 }
@@ -224,35 +227,59 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
 }
 
 static int mlx5_create_lag(struct mlx5_lag *ldev,
-                          struct lag_tracker *tracker)
+                          struct lag_tracker *tracker,
+                          bool shared_fdb)
 {
        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+       u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
        int err;
 
        mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
                                       &ldev->v2p_map[MLX5_LAG_P2]);
 
-       mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
-                      ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
+       mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
+                      ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
+                      shared_fdb);
 
        err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
-                                 ldev->v2p_map[MLX5_LAG_P2]);
-       if (err)
+                                 ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
+       if (err) {
                mlx5_core_err(dev0,
                              "Failed to create LAG (%d)\n",
                              err);
+               return err;
+       }
+
+       if (shared_fdb) {
+               err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
+                                                             dev1->priv.eswitch);
+               if (err)
+                       mlx5_core_err(dev0, "Can't enable single FDB mode\n");
+               else
+                       mlx5_core_info(dev0, "Operation mode is single FDB\n");
+       }
+
+       if (err) {
+               MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+               if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
+                       mlx5_core_err(dev0,
+                                     "Failed to deactivate RoCE LAG; driver restart required\n");
+       }
+
        return err;
 }
 
 int mlx5_activate_lag(struct mlx5_lag *ldev,
                      struct lag_tracker *tracker,
-                     u8 flags)
+                     u8 flags,
+                     bool shared_fdb)
 {
        bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
        int err;
 
-       err = mlx5_create_lag(ldev, tracker);
+       err = mlx5_create_lag(ldev, tracker, shared_fdb);
        if (err) {
                if (roce_lag) {
                        mlx5_core_err(dev0,
@@ -266,6 +293,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
        }
 
        ldev->flags |= flags;
+       ldev->shared_fdb = shared_fdb;
        return 0;
 }
 
@@ -279,6 +307,12 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
        ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
        mlx5_lag_mp_reset(ldev);
 
+       if (ldev->shared_fdb) {
+               mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
+                                                        ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
+               ldev->shared_fdb = false;
+       }
+
        MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
        err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
        if (err) {
@@ -334,6 +368,10 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
                if (!ldev->pf[i].dev)
                        continue;
 
+               if (ldev->pf[i].dev->priv.flags &
+                   MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+                       continue;
+
                ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
                mlx5_rescan_drivers_locked(ldev->pf[i].dev);
        }
@@ -343,12 +381,15 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
 {
        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
        struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+       bool shared_fdb = ldev->shared_fdb;
        bool roce_lag;
        int err;
 
        roce_lag = __mlx5_lag_is_roce(ldev);
 
-       if (roce_lag) {
+       if (shared_fdb) {
+               mlx5_lag_remove_devices(ldev);
+       } else if (roce_lag) {
                if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
                        dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
                        mlx5_rescan_drivers_locked(dev0);
@@ -360,8 +401,34 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
        if (err)
                return;
 
-       if (roce_lag)
+       if (shared_fdb || roce_lag)
                mlx5_lag_add_devices(ldev);
+
+       if (shared_fdb) {
+               if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+                       mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+               if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+                       mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+       }
+}
+
+static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+
+       if (is_mdev_switchdev_mode(dev0) &&
+           is_mdev_switchdev_mode(dev1) &&
+           mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
+           mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
+           mlx5_devcom_is_paired(dev0->priv.devcom,
+                                 MLX5_DEVCOM_ESW_OFFLOADS) &&
+           MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
+           MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
+           MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
+               return true;
+
+       return false;
 }
 
 static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -372,14 +439,17 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
        bool do_bond, roce_lag;
        int err;
 
-       if (!mlx5_lag_is_ready(ldev))
-               return;
-
-       tracker = ldev->tracker;
+       if (!mlx5_lag_is_ready(ldev)) {
+               do_bond = false;
+       } else {
+               tracker = ldev->tracker;
 
-       do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+               do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+       }
 
        if (do_bond && !__mlx5_lag_is_active(ldev)) {
+               bool shared_fdb = mlx5_shared_fdb_supported(ldev);
+
                roce_lag = !mlx5_sriov_is_enabled(dev0) &&
                           !mlx5_sriov_is_enabled(dev1);
 
@@ -389,23 +459,40 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
                           dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
 #endif
 
-               if (roce_lag)
+               if (shared_fdb || roce_lag)
                        mlx5_lag_remove_devices(ldev);
 
                err = mlx5_activate_lag(ldev, &tracker,
                                        roce_lag ? MLX5_LAG_FLAG_ROCE :
-                                       MLX5_LAG_FLAG_SRIOV);
+                                                  MLX5_LAG_FLAG_SRIOV,
+                                       shared_fdb);
                if (err) {
-                       if (roce_lag)
+                       if (shared_fdb || roce_lag)
                                mlx5_lag_add_devices(ldev);
 
                        return;
-               }
-
-               if (roce_lag) {
+               } else if (roce_lag) {
                        dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
                        mlx5_rescan_drivers_locked(dev0);
                        mlx5_nic_vport_enable_roce(dev1);
+               } else if (shared_fdb) {
+                       dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+                       mlx5_rescan_drivers_locked(dev0);
+
+                       err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+                       if (!err)
+                               err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+
+                       if (err) {
+                               dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+                               mlx5_rescan_drivers_locked(dev0);
+                               mlx5_deactivate_lag(ldev);
+                               mlx5_lag_add_devices(ldev);
+                               mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+                               mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+                               mlx5_core_err(dev0, "Failed to enable lag\n");
+                               return;
+                       }
                }
        } else if (do_bond && __mlx5_lag_is_active(ldev)) {
                mlx5_modify_lag(ldev, &tracker);
@@ -419,21 +506,48 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
        queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
 }
 
+static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
+                                   struct mlx5_core_dev *dev1)
+{
+       if (dev0)
+               mlx5_esw_lock(dev0->priv.eswitch);
+       if (dev1)
+               mlx5_esw_lock(dev1->priv.eswitch);
+}
+
+static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
+                                     struct mlx5_core_dev *dev1)
+{
+       if (dev1)
+               mlx5_esw_unlock(dev1->priv.eswitch);
+       if (dev0)
+               mlx5_esw_unlock(dev0->priv.eswitch);
+}
+
 static void mlx5_do_bond_work(struct work_struct *work)
 {
        struct delayed_work *delayed_work = to_delayed_work(work);
        struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
                                             bond_work);
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
        int status;
 
        status = mlx5_dev_list_trylock();
        if (!status) {
-               /* 1 sec delay. */
                mlx5_queue_bond_work(ldev, HZ);
                return;
        }
 
+       if (ldev->mode_changes_in_progress) {
+               mlx5_dev_list_unlock();
+               mlx5_queue_bond_work(ldev, HZ);
+               return;
+       }
+
+       mlx5_lag_lock_eswitches(dev0, dev1);
        mlx5_do_bond(ldev);
+       mlx5_lag_unlock_eswitches(dev0, dev1);
        mlx5_dev_list_unlock();
 }
 
@@ -631,7 +745,7 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
 }
 
 /* Must be called with intf_mutex held */
-static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
+static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
 {
        struct mlx5_lag *ldev = NULL;
        struct mlx5_core_dev *tmp_dev;
@@ -639,7 +753,7 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
        if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
            !MLX5_CAP_GEN(dev, lag_master) ||
            MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
-               return;
+               return 0;
 
        tmp_dev = mlx5_get_next_phys_dev(dev);
        if (tmp_dev)
@@ -649,15 +763,17 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
                ldev = mlx5_lag_dev_alloc(dev);
                if (!ldev) {
                        mlx5_core_err(dev, "Failed to alloc lag dev\n");
-                       return;
+                       return 0;
                }
        } else {
+               if (ldev->mode_changes_in_progress)
+                       return -EAGAIN;
                mlx5_ldev_get(ldev);
        }
 
        mlx5_ldev_add_mdev(ldev, dev);
 
-       return;
+       return 0;
 }
 
 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
@@ -668,7 +784,13 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
        if (!ldev)
                return;
 
+recheck:
        mlx5_dev_list_lock();
+       if (ldev->mode_changes_in_progress) {
+               mlx5_dev_list_unlock();
+               msleep(100);
+               goto recheck;
+       }
        mlx5_ldev_remove_mdev(ldev, dev);
        mlx5_dev_list_unlock();
        mlx5_ldev_put(ldev);
@@ -676,8 +798,16 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
 
 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
 {
+       int err;
+
+recheck:
        mlx5_dev_list_lock();
-       __mlx5_lag_dev_add_mdev(dev);
+       err = __mlx5_lag_dev_add_mdev(dev);
+       if (err) {
+               mlx5_dev_list_unlock();
+               msleep(100);
+               goto recheck;
+       }
        mlx5_dev_list_unlock();
 }
 
@@ -691,11 +821,11 @@ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
        if (!ldev)
                return;
 
-       if (__mlx5_lag_is_active(ldev))
-               mlx5_disable_lag(ldev);
-
        mlx5_ldev_remove_netdev(ldev, netdev);
        ldev->flags &= ~MLX5_LAG_FLAG_READY;
+
+       if (__mlx5_lag_is_active(ldev))
+               mlx5_queue_bond_work(ldev, 0);
 }
 
 /* Must be called with intf_mutex held */
@@ -717,6 +847,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
 
        if (i >= MLX5_MAX_PORTS)
                ldev->flags |= MLX5_LAG_FLAG_READY;
+       mlx5_queue_bond_work(ldev, 0);
 }
 
 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
@@ -747,6 +878,21 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_lag_is_active);
 
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev;
+       bool res;
+
+       spin_lock(&lag_lock);
+       ldev = mlx5_lag_dev(dev);
+       res = ldev && __mlx5_lag_is_active(ldev) &&
+               dev == ldev->pf[MLX5_LAG_P1].dev;
+       spin_unlock(&lag_lock);
+
+       return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_master);
+
 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
 {
        struct mlx5_lag *ldev;
@@ -761,19 +907,50 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_lag_is_sriov);
 
-void mlx5_lag_update(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev;
+       bool res;
+
+       spin_lock(&lag_lock);
+       ldev = mlx5_lag_dev(dev);
+       res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
+       spin_unlock(&lag_lock);
+
+       return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
+
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
 {
+       struct mlx5_core_dev *dev0;
+       struct mlx5_core_dev *dev1;
        struct mlx5_lag *ldev;
 
        mlx5_dev_list_lock();
+
        ldev = mlx5_lag_dev(dev);
-       if (!ldev)
-               goto unlock;
+       dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       dev1 = ldev->pf[MLX5_LAG_P2].dev;
 
-       mlx5_do_bond(ldev);
+       ldev->mode_changes_in_progress++;
+       if (__mlx5_lag_is_active(ldev)) {
+               mlx5_lag_lock_eswitches(dev0, dev1);
+               mlx5_disable_lag(ldev);
+               mlx5_lag_unlock_eswitches(dev0, dev1);
+       }
+       mlx5_dev_list_unlock();
+}
 
-unlock:
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev;
+
+       mlx5_dev_list_lock();
+       ldev = mlx5_lag_dev(dev);
+       ldev->mode_changes_in_progress--;
        mlx5_dev_list_unlock();
+       mlx5_queue_bond_work(ldev, 0);
 }
 
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
@@ -828,6 +1005,26 @@ unlock:
 }
 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
 
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_dev *peer_dev = NULL;
+       struct mlx5_lag *ldev;
+
+       spin_lock(&lag_lock);
+       ldev = mlx5_lag_dev(dev);
+       if (!ldev)
+               goto unlock;
+
+       peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
+                          ldev->pf[MLX5_LAG_P2].dev :
+                          ldev->pf[MLX5_LAG_P1].dev;
+
+unlock:
+       spin_unlock(&lag_lock);
+       return peer_dev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+
 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
                                 u64 *values,
                                 int num_counters,
index 191392c..d4bae52 100644 (file)
@@ -39,6 +39,8 @@ struct lag_tracker {
  */
 struct mlx5_lag {
        u8                        flags;
+       int                       mode_changes_in_progress;
+       bool                      shared_fdb;
        u8                        v2p_map[MLX5_MAX_PORTS];
        struct kref               ref;
        struct lag_func           pf[MLX5_MAX_PORTS];
@@ -71,7 +73,8 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
                     struct lag_tracker *tracker);
 int mlx5_activate_lag(struct mlx5_lag *ldev,
                      struct lag_tracker *tracker,
-                     u8 flags);
+                     u8 flags,
+                     bool shared_fdb);
 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
                                struct net_device *ndev);
 
index 516bfc2..f239b35 100644 (file)
@@ -161,7 +161,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
                struct lag_tracker tracker;
 
                tracker = ldev->tracker;
-               mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
+               mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
        }
 
        mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
index ce696d5..ffac8a0 100644 (file)
@@ -749,7 +749,7 @@ static int mlx5_pps_event(struct notifier_block *nb,
                } else {
                        ptp_event.type = PTP_CLOCK_EXTTS;
                }
-               /* TODOL clock->ptp can be NULL if ptp_clock_register failes */
+               /* TODOL clock->ptp can be NULL if ptp_clock_register fails */
                ptp_clock_event(clock->ptp, &ptp_event);
                break;
        case PTP_PF_PEROUT:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
new file mode 100644 (file)
index 0000000..749d17c
--- /dev/null
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/fs_ttc.h"
+
+#define MLX5_TTC_NUM_GROUPS    3
+#define MLX5_TTC_GROUP1_SIZE   (BIT(3) + MLX5_NUM_TUNNEL_TT)
+#define MLX5_TTC_GROUP2_SIZE    BIT(1)
+#define MLX5_TTC_GROUP3_SIZE    BIT(0)
+#define MLX5_TTC_TABLE_SIZE    (MLX5_TTC_GROUP1_SIZE +\
+                                MLX5_TTC_GROUP2_SIZE +\
+                                MLX5_TTC_GROUP3_SIZE)
+
+#define MLX5_INNER_TTC_NUM_GROUPS      3
+#define MLX5_INNER_TTC_GROUP1_SIZE     BIT(3)
+#define MLX5_INNER_TTC_GROUP2_SIZE     BIT(1)
+#define MLX5_INNER_TTC_GROUP3_SIZE     BIT(0)
+#define MLX5_INNER_TTC_TABLE_SIZE      (MLX5_INNER_TTC_GROUP1_SIZE +\
+                                        MLX5_INNER_TTC_GROUP2_SIZE +\
+                                        MLX5_INNER_TTC_GROUP3_SIZE)
+
+/* L3/L4 traffic type classifier */
+struct mlx5_ttc_table {
+       int num_groups;
+       struct mlx5_flow_table *t;
+       struct mlx5_flow_group **g;
+       struct mlx5_ttc_rule rules[MLX5_NUM_TT];
+       struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc)
+{
+       return ttc->t;
+}
+
+static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc)
+{
+       int i;
+
+       for (i = 0; i < MLX5_NUM_TT; i++) {
+               if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
+                       mlx5_del_flow_rules(ttc->rules[i].rule);
+                       ttc->rules[i].rule = NULL;
+               }
+       }
+
+       for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) {
+               if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
+                       mlx5_del_flow_rules(ttc->tunnel_rules[i]);
+                       ttc->tunnel_rules[i] = NULL;
+               }
+       }
+}
+
+struct mlx5_etype_proto {
+       u16 etype;
+       u8 proto;
+};
+
+static struct mlx5_etype_proto ttc_rules[] = {
+       [MLX5_TT_IPV4_TCP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_TCP,
+       },
+       [MLX5_TT_IPV6_TCP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_TCP,
+       },
+       [MLX5_TT_IPV4_UDP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_UDP,
+       },
+       [MLX5_TT_IPV6_UDP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_UDP,
+       },
+       [MLX5_TT_IPV4_IPSEC_AH] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_AH,
+       },
+       [MLX5_TT_IPV6_IPSEC_AH] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_AH,
+       },
+       [MLX5_TT_IPV4_IPSEC_ESP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_ESP,
+       },
+       [MLX5_TT_IPV6_IPSEC_ESP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_ESP,
+       },
+       [MLX5_TT_IPV4] = {
+               .etype = ETH_P_IP,
+               .proto = 0,
+       },
+       [MLX5_TT_IPV6] = {
+               .etype = ETH_P_IPV6,
+               .proto = 0,
+       },
+       [MLX5_TT_ANY] = {
+               .etype = 0,
+               .proto = 0,
+       },
+};
+
+static struct mlx5_etype_proto ttc_tunnel_rules[] = {
+       [MLX5_TT_IPV4_GRE] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_GRE,
+       },
+       [MLX5_TT_IPV6_GRE] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_GRE,
+       },
+       [MLX5_TT_IPV4_IPIP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_IPIP,
+       },
+       [MLX5_TT_IPV6_IPIP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_IPIP,
+       },
+       [MLX5_TT_IPV4_IPV6] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_IPV6,
+       },
+       [MLX5_TT_IPV6_IPV6] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_IPV6,
+       },
+
+};
+
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt)
+{
+       return ttc_tunnel_rules[tt].proto;
+}
+
+static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev,
+                                          u8 proto_type)
+{
+       switch (proto_type) {
+       case IPPROTO_GRE:
+               return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
+       case IPPROTO_IPIP:
+       case IPPROTO_IPV6:
+               return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
+                       MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
+       default:
+               return false;
+       }
+}
+
+static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
+{
+       int tt;
+
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               if (mlx5_tunnel_proto_supported_rx(mdev,
+                                                  ttc_tunnel_rules[tt].proto))
+                       return true;
+       }
+       return false;
+}
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+       return (mlx5_tunnel_any_rx_proto_supported(mdev) &&
+               MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                         ft_field_support.inner_ip_version));
+}
+
+static u8 mlx5_etype_to_ipv(u16 ethertype)
+{
+       if (ethertype == ETH_P_IP)
+               return 4;
+
+       if (ethertype == ETH_P_IPV6)
+               return 6;
+
+       return 0;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+                      struct mlx5_flow_destination *dest, u16 etype, u8 proto)
+{
+       int match_ipv_outer =
+               MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+                                         ft_field_support.outer_ip_version);
+       MLX5_DECLARE_FLOW_ACT(flow_act);
+       struct mlx5_flow_handle *rule;
+       struct mlx5_flow_spec *spec;
+       int err = 0;
+       u8 ipv;
+
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return ERR_PTR(-ENOMEM);
+
+       if (proto) {
+               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
+       }
+
+       ipv = mlx5_etype_to_ipv(etype);
+       if (match_ipv_outer && ipv) {
+               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
+       } else if (etype) {
+               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
+       }
+
+       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+       if (IS_ERR(rule)) {
+               err = PTR_ERR(rule);
+               mlx5_core_err(dev, "%s: add rule failed\n", __func__);
+       }
+
+       kvfree(spec);
+       return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev,
+                                        struct ttc_params *params,
+                                        struct mlx5_ttc_table *ttc)
+{
+       struct mlx5_flow_handle **trules;
+       struct mlx5_ttc_rule *rules;
+       struct mlx5_flow_table *ft;
+       int tt;
+       int err;
+
+       ft = ttc->t;
+       rules = ttc->rules;
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               struct mlx5_ttc_rule *rule = &rules[tt];
+
+               rule->rule = mlx5_generate_ttc_rule(dev, ft, &params->dests[tt],
+                                                   ttc_rules[tt].etype,
+                                                   ttc_rules[tt].proto);
+               if (IS_ERR(rule->rule)) {
+                       err = PTR_ERR(rule->rule);
+                       rule->rule = NULL;
+                       goto del_rules;
+               }
+               rule->default_dest = params->dests[tt];
+       }
+
+       if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev))
+               return 0;
+
+       trules    = ttc->tunnel_rules;
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               if (!mlx5_tunnel_proto_supported_rx(dev,
+                                                   ttc_tunnel_rules[tt].proto))
+                       continue;
+               trules[tt] = mlx5_generate_ttc_rule(dev, ft,
+                                                   &params->tunnel_dests[tt],
+                                                   ttc_tunnel_rules[tt].etype,
+                                                   ttc_tunnel_rules[tt].proto);
+               if (IS_ERR(trules[tt])) {
+                       err = PTR_ERR(trules[tt]);
+                       trules[tt] = NULL;
+                       goto del_rules;
+               }
+       }
+
+       return 0;
+
+del_rules:
+       mlx5_cleanup_ttc_rules(ttc);
+       return err;
+}
+
+static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc,
+                                       bool use_ipv)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       int ix = 0;
+       u32 *in;
+       int err;
+       u8 *mc;
+
+       ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL);
+       if (!ttc->g)
+               return -ENOMEM;
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in) {
+               kfree(ttc->g);
+               ttc->g = NULL;
+               return -ENOMEM;
+       }
+
+       /* L4 Group */
+       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+       MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+       if (use_ipv)
+               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
+       else
+               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_TTC_GROUP1_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* L3 Group */
+       MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_TTC_GROUP2_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* Any Group */
+       memset(in, 0, inlen);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_TTC_GROUP3_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       kvfree(in);
+       return 0;
+
+err:
+       err = PTR_ERR(ttc->g[ttc->num_groups]);
+       ttc->g[ttc->num_groups] = NULL;
+       kvfree(in);
+
+       return err;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev,
+                            struct mlx5_flow_table *ft,
+                            struct mlx5_flow_destination *dest,
+                            u16 etype, u8 proto)
+{
+       MLX5_DECLARE_FLOW_ACT(flow_act);
+       struct mlx5_flow_handle *rule;
+       struct mlx5_flow_spec *spec;
+       int err = 0;
+       u8 ipv;
+
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return ERR_PTR(-ENOMEM);
+
+       ipv = mlx5_etype_to_ipv(etype);
+       if (etype && ipv) {
+               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
+               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
+       }
+
+       if (proto) {
+               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
+               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
+       }
+
+       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+       if (IS_ERR(rule)) {
+               err = PTR_ERR(rule);
+               mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__);
+       }
+
+       kvfree(spec);
+       return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev,
+                                              struct ttc_params *params,
+                                              struct mlx5_ttc_table *ttc)
+{
+       struct mlx5_ttc_rule *rules;
+       struct mlx5_flow_table *ft;
+       int err;
+       int tt;
+
+       ft = ttc->t;
+       rules = ttc->rules;
+
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               struct mlx5_ttc_rule *rule = &rules[tt];
+
+               rule->rule = mlx5_generate_inner_ttc_rule(dev, ft,
+                                                         &params->dests[tt],
+                                                         ttc_rules[tt].etype,
+                                                         ttc_rules[tt].proto);
+               if (IS_ERR(rule->rule)) {
+                       err = PTR_ERR(rule->rule);
+                       rule->rule = NULL;
+                       goto del_rules;
+               }
+               rule->default_dest = params->dests[tt];
+       }
+
+       return 0;
+
+del_rules:
+
+       mlx5_cleanup_ttc_rules(ttc);
+       return err;
+}
+
+static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       int ix = 0;
+       u32 *in;
+       int err;
+       u8 *mc;
+
+       ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g),
+                        GFP_KERNEL);
+       if (!ttc->g)
+               return -ENOMEM;
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in) {
+               kfree(ttc->g);
+               ttc->g = NULL;
+               return -ENOMEM;
+       }
+
+       /* L4 Group */
+       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
+       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_INNER_TTC_GROUP1_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* L3 Group */
+       MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_INNER_TTC_GROUP2_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* Any Group */
+       memset(in, 0, inlen);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_INNER_TTC_GROUP3_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       kvfree(in);
+       return 0;
+
+err:
+       err = PTR_ERR(ttc->g[ttc->num_groups]);
+       ttc->g[ttc->num_groups] = NULL;
+       kvfree(in);
+
+       return err;
+}
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+                                                  struct ttc_params *params)
+{
+       struct mlx5_ttc_table *ttc;
+       int err;
+
+       ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+       if (!ttc)
+               return ERR_PTR(-ENOMEM);
+
+       WARN_ON_ONCE(params->ft_attr.max_fte);
+       params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE;
+       ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+       if (IS_ERR(ttc->t)) {
+               err = PTR_ERR(ttc->t);
+               kvfree(ttc);
+               return ERR_PTR(err);
+       }
+
+       err = mlx5_create_inner_ttc_table_groups(ttc);
+       if (err)
+               goto destroy_ft;
+
+       err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc);
+       if (err)
+               goto destroy_ft;
+
+       return ttc;
+
+destroy_ft:
+       mlx5_destroy_ttc_table(ttc);
+       return ERR_PTR(err);
+}
+
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc)
+{
+       int i;
+
+       mlx5_cleanup_ttc_rules(ttc);
+       for (i = ttc->num_groups - 1; i >= 0; i--) {
+               if (!IS_ERR_OR_NULL(ttc->g[i]))
+                       mlx5_destroy_flow_group(ttc->g[i]);
+               ttc->g[i] = NULL;
+       }
+
+       kfree(ttc->g);
+       mlx5_destroy_flow_table(ttc->t);
+       kvfree(ttc);
+}
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+                                            struct ttc_params *params)
+{
+       bool match_ipv_outer =
+               MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+                                         ft_field_support.outer_ip_version);
+       struct mlx5_ttc_table *ttc;
+       int err;
+
+       ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+       if (!ttc)
+               return ERR_PTR(-ENOMEM);
+
+       WARN_ON_ONCE(params->ft_attr.max_fte);
+       params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE;
+       ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+       if (IS_ERR(ttc->t)) {
+               err = PTR_ERR(ttc->t);
+               kvfree(ttc);
+               return ERR_PTR(err);
+       }
+
+       err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer);
+       if (err)
+               goto destroy_ft;
+
+       err = mlx5_generate_ttc_table_rules(dev, params, ttc);
+       if (err)
+               goto destroy_ft;
+
+       return ttc;
+
+destroy_ft:
+       mlx5_destroy_ttc_table(ttc);
+       return ERR_PTR(err);
+}
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+                     struct mlx5_flow_destination *new_dest)
+{
+       return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest,
+                                           NULL);
+}
+
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+                         enum mlx5_traffic_types type)
+{
+       struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest;
+
+       WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
+                 "TTC[%d] default dest is not setup yet", type);
+
+       return *dest;
+}
+
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+                             enum mlx5_traffic_types type)
+{
+       struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type);
+
+       return mlx5_ttc_fwd_dest(ttc, type, &dest);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
new file mode 100644 (file)
index 0000000..ce95be8
--- /dev/null
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_FS_TTC_H__
+#define __ML5_FS_TTC_H__
+
+#include <linux/mlx5/fs.h>
+
+enum mlx5_traffic_types {
+       MLX5_TT_IPV4_TCP,
+       MLX5_TT_IPV6_TCP,
+       MLX5_TT_IPV4_UDP,
+       MLX5_TT_IPV6_UDP,
+       MLX5_TT_IPV4_IPSEC_AH,
+       MLX5_TT_IPV6_IPSEC_AH,
+       MLX5_TT_IPV4_IPSEC_ESP,
+       MLX5_TT_IPV6_IPSEC_ESP,
+       MLX5_TT_IPV4,
+       MLX5_TT_IPV6,
+       MLX5_TT_ANY,
+       MLX5_NUM_TT,
+       MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY,
+};
+
+enum mlx5_tunnel_types {
+       MLX5_TT_IPV4_GRE,
+       MLX5_TT_IPV6_GRE,
+       MLX5_TT_IPV4_IPIP,
+       MLX5_TT_IPV6_IPIP,
+       MLX5_TT_IPV4_IPV6,
+       MLX5_TT_IPV6_IPV6,
+       MLX5_NUM_TUNNEL_TT,
+};
+
+struct mlx5_ttc_rule {
+       struct mlx5_flow_handle *rule;
+       struct mlx5_flow_destination default_dest;
+};
+
+struct mlx5_ttc_table;
+
+struct ttc_params {
+       struct mlx5_flow_namespace *ns;
+       struct mlx5_flow_table_attr ft_attr;
+       struct mlx5_flow_destination dests[MLX5_NUM_TT];
+       bool   inner_ttc;
+       struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+                                            struct ttc_params *params);
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+                                                  struct ttc_params *params);
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+                     struct mlx5_flow_destination *new_dest);
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+                         enum mlx5_traffic_types type);
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+                             enum mlx5_traffic_types type);
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt);
+
+#endif /* __MLX5_FS_TTC_H__ */
index 3808440..e3b0a13 100644 (file)
@@ -40,7 +40,7 @@
 
 struct mlx5_vxlan {
        struct mlx5_core_dev            *mdev;
-       /* max_num_ports is usuallly 4, 16 buckets is more than enough */
+       /* max_num_ports is usually 4, 16 buckets is more than enough */
        DECLARE_HASHTABLE(htable, 4);
        struct mutex                    sync_lock; /* sync add/del port HW operations */
 };
index c84ad87..7948282 100644 (file)
@@ -252,28 +252,16 @@ static int set_dma_caps(struct pci_dev *pdev)
 {
        int err;
 
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
        if (err) {
                dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
                if (err) {
                        dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
                        return err;
                }
        }
 
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (err) {
-               dev_warn(&pdev->dev,
-                        "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-               if (err) {
-                       dev_err(&pdev->dev,
-                               "Can't set consistent PCI DMA mask, aborting\n");
-                       return err;
-               }
-       }
-
        dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
        return err;
 }
@@ -389,11 +377,11 @@ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
 
        switch (cap_mode) {
        case HCA_CAP_OPMOD_GET_MAX:
-               memcpy(dev->caps.hca_max[cap_type], hca_caps,
+               memcpy(dev->caps.hca[cap_type]->max, hca_caps,
                       MLX5_UN_SZ_BYTES(hca_cap_union));
                break;
        case HCA_CAP_OPMOD_GET_CUR:
-               memcpy(dev->caps.hca_cur[cap_type], hca_caps,
+               memcpy(dev->caps.hca[cap_type]->cur, hca_caps,
                       MLX5_UN_SZ_BYTES(hca_cap_union));
                break;
        default:
@@ -469,7 +457,7 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
                return err;
 
        set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
-       memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
+       memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur,
               MLX5_ST_SZ_BYTES(odp_cap));
 
 #define ODP_CAP_SET_MAX(dev, field)                                            \
@@ -514,7 +502,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 
        set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
                                   capability);
-       memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_GENERAL],
+       memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL]->cur,
               MLX5_ST_SZ_BYTES(cmd_hca_cap));
 
        mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
@@ -596,7 +584,7 @@ static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx)
                return 0;
 
        set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
-       memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ROCE],
+       memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ROCE]->cur,
               MLX5_ST_SZ_BYTES(roce_cap));
        MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
 
@@ -748,14 +736,12 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
                         const struct pci_device_id *id)
 {
-       struct mlx5_priv *priv = &dev->priv;
        int err = 0;
 
        mutex_init(&dev->pci_status_mutex);
        pci_set_drvdata(dev->pdev, dev);
 
        dev->bar_addr = pci_resource_start(pdev, 0);
-       priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev));
 
        err = mlx5_pci_enable_device(dev);
        if (err) {
@@ -1179,6 +1165,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
                goto err_ec;
        }
 
+       mlx5_lag_add_mdev(dev);
        err = mlx5_sriov_attach(dev);
        if (err) {
                mlx5_core_err(dev, "sriov init failed %d\n", err);
@@ -1186,11 +1173,11 @@ static int mlx5_load(struct mlx5_core_dev *dev)
        }
 
        mlx5_sf_dev_table_create(dev);
-       mlx5_lag_add_mdev(dev);
 
        return 0;
 
 err_sriov:
+       mlx5_lag_remove_mdev(dev);
        mlx5_ec_cleanup(dev);
 err_ec:
        mlx5_sf_hw_table_destroy(dev);
@@ -1222,9 +1209,9 @@ err_irq_table:
 
 static void mlx5_unload(struct mlx5_core_dev *dev)
 {
-       mlx5_lag_remove_mdev(dev);
        mlx5_sf_dev_table_destroy(dev);
        mlx5_sriov_detach(dev);
+       mlx5_lag_remove_mdev(dev);
        mlx5_ec_cleanup(dev);
        mlx5_sf_hw_table_destroy(dev);
        mlx5_vhca_event_stop(dev);
@@ -1248,11 +1235,6 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
        int err = 0;
 
        mutex_lock(&dev->intf_state_mutex);
-       if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
-               mlx5_core_warn(dev, "interface is up, NOP\n");
-               goto out;
-       }
-       /* remove any previous indication of internal error */
        dev->state = MLX5_DEVICE_STATE_UP;
 
        err = mlx5_function_setup(dev, true);
@@ -1271,7 +1253,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
 
        set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 
-       err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
+       err = mlx5_devlink_register(priv_to_devlink(dev));
        if (err)
                goto err_devlink_reg;
 
@@ -1293,7 +1275,6 @@ function_teardown:
        mlx5_function_teardown(dev, true);
 err_function:
        dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
-out:
        mutex_unlock(&dev->intf_state_mutex);
        return err;
 }
@@ -1380,6 +1361,60 @@ out:
        mutex_unlock(&dev->intf_state_mutex);
 }
 
+static const int types[] = {
+       MLX5_CAP_GENERAL,
+       MLX5_CAP_GENERAL_2,
+       MLX5_CAP_ETHERNET_OFFLOADS,
+       MLX5_CAP_IPOIB_ENHANCED_OFFLOADS,
+       MLX5_CAP_ODP,
+       MLX5_CAP_ATOMIC,
+       MLX5_CAP_ROCE,
+       MLX5_CAP_IPOIB_OFFLOADS,
+       MLX5_CAP_FLOW_TABLE,
+       MLX5_CAP_ESWITCH_FLOW_TABLE,
+       MLX5_CAP_ESWITCH,
+       MLX5_CAP_VECTOR_CALC,
+       MLX5_CAP_QOS,
+       MLX5_CAP_DEBUG,
+       MLX5_CAP_DEV_MEM,
+       MLX5_CAP_DEV_EVENT,
+       MLX5_CAP_TLS,
+       MLX5_CAP_VDPA_EMULATION,
+       MLX5_CAP_IPSEC,
+};
+
+static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
+{
+       int type;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(types); i++) {
+               type = types[i];
+               kfree(dev->caps.hca[type]);
+       }
+}
+
+static int mlx5_hca_caps_alloc(struct mlx5_core_dev *dev)
+{
+       struct mlx5_hca_cap *cap;
+       int type;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(types); i++) {
+               cap = kzalloc(sizeof(*cap), GFP_KERNEL);
+               if (!cap)
+                       goto err;
+               type = types[i];
+               dev->caps.hca[type] = cap;
+       }
+
+       return 0;
+
+err:
+       mlx5_hca_caps_free(dev);
+       return -ENOMEM;
+}
+
 int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
 {
        struct mlx5_priv *priv = &dev->priv;
@@ -1399,6 +1434,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
        mutex_init(&priv->pgdir_mutex);
        INIT_LIST_HEAD(&priv->pgdir_list);
 
+       priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev));
        priv->dbg_root = debugfs_create_dir(dev_name(dev->device),
                                            mlx5_debugfs_root);
        INIT_LIST_HEAD(&priv->traps);
@@ -1415,8 +1451,14 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
        if (err)
                goto err_adev_init;
 
+       err = mlx5_hca_caps_alloc(dev);
+       if (err)
+               goto err_hca_caps;
+
        return 0;
 
+err_hca_caps:
+       mlx5_adev_cleanup(dev);
 err_adev_init:
        mlx5_pagealloc_cleanup(dev);
 err_pagealloc_init:
@@ -1435,6 +1477,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
 {
        struct mlx5_priv *priv = &dev->priv;
 
+       mlx5_hca_caps_free(dev);
        mlx5_adev_cleanup(dev);
        mlx5_pagealloc_cleanup(dev);
        mlx5_health_cleanup(dev);
@@ -1452,7 +1495,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
        struct devlink *devlink;
        int err;
 
-       devlink = mlx5_devlink_alloc();
+       devlink = mlx5_devlink_alloc(&pdev->dev);
        if (!devlink) {
                dev_err(&pdev->dev, "devlink alloc failed\n");
                return -ENOMEM;
index da365b8..230eab7 100644 (file)
@@ -168,6 +168,8 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev);
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev);
 
 int mlx5_events_init(struct mlx5_core_dev *dev);
 void mlx5_events_cleanup(struct mlx5_core_dev *dev);
@@ -275,4 +277,9 @@ static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
 
        return MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
 }
+
+bool mlx5_eth_supported(struct mlx5_core_dev *dev);
+bool mlx5_rdma_supported(struct mlx5_core_dev *dev);
+bool mlx5_vnet_supported(struct mlx5_core_dev *dev);
+
 #endif /* __MLX5_CORE_H__ */
index 3465b36..c79a10b 100644 (file)
@@ -18,7 +18,7 @@
 
 #define MLX5_SFS_PER_CTRL_IRQ 64
 #define MLX5_IRQ_CTRL_SF_MAX 8
-/* min num of vectores for SFs to be enabled */
+/* min num of vectors for SFs to be enabled */
 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
 
 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
 #define MLX5_EQ_REFS_PER_IRQ (2)
 
 struct mlx5_irq {
-       u32 index;
        struct atomic_notifier_head nh;
        cpumask_var_t mask;
        char name[MLX5_MAX_IRQ_NAME];
-       struct kref kref;
-       int irqn;
        struct mlx5_irq_pool *pool;
+       int refcount;
+       u32 index;
+       int irqn;
 };
 
 struct mlx5_irq_pool {
@@ -138,9 +138,8 @@ out:
        return ret;
 }
 
-static void irq_release(struct kref *kref)
+static void irq_release(struct mlx5_irq *irq)
 {
-       struct mlx5_irq *irq = container_of(kref, struct mlx5_irq, kref);
        struct mlx5_irq_pool *pool = irq->pool;
 
        xa_erase(&pool->irqs, irq->index);
@@ -159,10 +158,31 @@ static void irq_put(struct mlx5_irq *irq)
        struct mlx5_irq_pool *pool = irq->pool;
 
        mutex_lock(&pool->lock);
-       kref_put(&irq->kref, irq_release);
+       irq->refcount--;
+       if (!irq->refcount)
+               irq_release(irq);
        mutex_unlock(&pool->lock);
 }
 
+static int irq_get_locked(struct mlx5_irq *irq)
+{
+       lockdep_assert_held(&irq->pool->lock);
+       if (WARN_ON_ONCE(!irq->refcount))
+               return 0;
+       irq->refcount++;
+       return 1;
+}
+
+static int irq_get(struct mlx5_irq *irq)
+{
+       int err;
+
+       mutex_lock(&irq->pool->lock);
+       err = irq_get_locked(irq);
+       mutex_unlock(&irq->pool->lock);
+       return err;
+}
+
 static irqreturn_t irq_int_handler(int irq, void *nh)
 {
        atomic_notifier_call_chain(nh, 0, NULL);
@@ -215,7 +235,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
                goto err_cpumask;
        }
        irq->pool = pool;
-       kref_init(&irq->kref);
+       irq->refcount = 1;
        irq->index = i;
        err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
        if (err) {
@@ -235,18 +255,18 @@ err_req_irq:
 
 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
 {
-       int err;
+       int ret;
 
-       err = kref_get_unless_zero(&irq->kref);
-       if (WARN_ON_ONCE(!err))
+       ret = irq_get(irq);
+       if (!ret)
                /* Something very bad happens here, we are enabling EQ
                 * on non-existing IRQ.
                 */
                return -ENOENT;
-       err = atomic_notifier_chain_register(&irq->nh, nb);
-       if (err)
+       ret = atomic_notifier_chain_register(&irq->nh, nb);
+       if (ret)
                irq_put(irq);
-       return err;
+       return ret;
 }
 
 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
@@ -304,10 +324,9 @@ static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool,
        xa_for_each_range(&pool->irqs, index, iter, start, end) {
                if (!cpumask_equal(iter->mask, affinity))
                        continue;
-               if (kref_read(&iter->kref) < pool->min_threshold)
+               if (iter->refcount < pool->min_threshold)
                        return iter;
-               if (!irq || kref_read(&iter->kref) <
-                   kref_read(&irq->kref))
+               if (!irq || iter->refcount < irq->refcount)
                        irq = iter;
        }
        return irq;
@@ -322,7 +341,7 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
        mutex_lock(&pool->lock);
        least_loaded_irq = irq_pool_find_least_loaded(pool, affinity);
        if (least_loaded_irq &&
-           kref_read(&least_loaded_irq->kref) < pool->min_threshold)
+           least_loaded_irq->refcount < pool->min_threshold)
                goto out;
        new_irq = irq_pool_create_irq(pool, affinity);
        if (IS_ERR(new_irq)) {
@@ -340,11 +359,11 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
        least_loaded_irq = new_irq;
        goto unlock;
 out:
-       kref_get(&least_loaded_irq->kref);
-       if (kref_read(&least_loaded_irq->kref) > pool->max_threshold)
+       irq_get_locked(least_loaded_irq);
+       if (least_loaded_irq->refcount > pool->max_threshold)
                mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
                              least_loaded_irq->irqn, pool->name,
-                             kref_read(&least_loaded_irq->kref) / MLX5_EQ_REFS_PER_IRQ);
+                             least_loaded_irq->refcount / MLX5_EQ_REFS_PER_IRQ);
 unlock:
        mutex_unlock(&pool->lock);
        return least_loaded_irq;
@@ -360,7 +379,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
        mutex_lock(&pool->lock);
        irq = xa_load(&pool->irqs, vecidx);
        if (irq) {
-               kref_get(&irq->kref);
+               irq_get_locked(irq);
                goto unlock;
        }
        irq = irq_request(pool, vecidx);
@@ -427,7 +446,7 @@ out:
                return irq;
        mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
                      irq->irqn, cpumask_pr_args(affinity),
-                     kref_read(&irq->kref) / MLX5_EQ_REFS_PER_IRQ);
+                     irq->refcount / MLX5_EQ_REFS_PER_IRQ);
        return irq;
 }
 
@@ -459,8 +478,12 @@ static void irq_pool_free(struct mlx5_irq_pool *pool)
        struct mlx5_irq *irq;
        unsigned long index;
 
+       /* There are cases in which we are destrying the irq_table before
+        * freeing all the IRQs, fast teardown for example. Hence, free the irqs
+        * which might not have been freed.
+        */
        xa_for_each(&pool->irqs, index, irq)
-               irq_release(&irq->kref);
+               irq_release(irq);
        xa_destroy(&pool->irqs);
        mutex_destroy(&pool->lock);
        kvfree(pool);
@@ -483,7 +506,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
        if (!mlx5_sf_max_functions(dev))
                return 0;
        if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
-               mlx5_core_err(dev, "Not enough IRQs for SFs. SF may run at lower performance\n");
+               mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
                return 0;
        }
 
@@ -601,7 +624,7 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
                return;
 
        /* There are cases where IRQs still will be in used when we reaching
-        * to here. Hence, making sure all the irqs are realeased.
+        * to here. Hence, making sure all the irqs are released.
         */
        irq_pools_destroy(table);
        pci_free_irq_vectors(dev->pdev);
index fa0288a..871c2fb 100644 (file)
@@ -39,7 +39,7 @@ static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, cha
        struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev);
        struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
 
-       return scnprintf(buf, PAGE_SIZE, "%u\n", sf_dev->sfnum);
+       return sysfs_emit(buf, "%u\n", sf_dev->sfnum);
 }
 static DEVICE_ATTR_RO(sfnum);
 
index 42c8ee0..052f480 100644 (file)
@@ -14,7 +14,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
        struct devlink *devlink;
        int err;
 
-       devlink = mlx5_devlink_alloc();
+       devlink = mlx5_devlink_alloc(&adev->dev);
        if (!devlink)
                return -ENOMEM;
 
index 1be0487..13891fd 100644 (file)
@@ -164,12 +164,12 @@ static bool mlx5_sf_is_active(const struct mlx5_sf *sf)
        return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE;
 }
 
-int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
                                      enum devlink_port_fn_state *state,
                                      enum devlink_port_fn_opstate *opstate,
                                      struct netlink_ext_ack *extack)
 {
-       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
        struct mlx5_sf_table *table;
        struct mlx5_sf *sf;
        int err = 0;
@@ -248,11 +248,11 @@ out:
        return err;
 }
 
-int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
                                      enum devlink_port_fn_state state,
                                      struct netlink_ext_ack *extack)
 {
-       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
        struct mlx5_sf_table *table;
        struct mlx5_sf *sf;
        int err;
@@ -476,7 +476,7 @@ static void mlx5_sf_table_disable(struct mlx5_sf_table *table)
                return;
 
        /* Balances with refcount_set; drop the reference so that new user cmd cannot start
-        * and new vhca event handler cannnot run.
+        * and new vhca event handler cannot run.
         */
        mlx5_sf_table_put(table);
        wait_for_completion(&table->disable_complete);
index 81ce13b..3a480e0 100644 (file)
@@ -24,11 +24,11 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink,
                             unsigned int *new_port_index);
 int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
                             struct netlink_ext_ack *extack);
-int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
                                      enum devlink_port_fn_state *state,
                                      enum devlink_port_fn_opstate *opstate,
                                      struct netlink_ext_ack *extack);
-int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
                                      enum devlink_port_fn_state state,
                                      struct netlink_ext_ack *extack);
 #else
index 6475ba3..a5b9f65 100644 (file)
@@ -18,12 +18,39 @@ enum dr_action_valid_state {
        DR_ACTION_STATE_ENCAP,
        DR_ACTION_STATE_DECAP,
        DR_ACTION_STATE_MODIFY_HDR,
-       DR_ACTION_STATE_MODIFY_VLAN,
+       DR_ACTION_STATE_POP_VLAN,
+       DR_ACTION_STATE_PUSH_VLAN,
        DR_ACTION_STATE_NON_TERM,
        DR_ACTION_STATE_TERM,
        DR_ACTION_STATE_MAX,
 };
 
+static const char * const action_type_to_str[] = {
+       [DR_ACTION_TYP_TNL_L2_TO_L2] = "DR_ACTION_TYP_TNL_L2_TO_L2",
+       [DR_ACTION_TYP_L2_TO_TNL_L2] = "DR_ACTION_TYP_L2_TO_TNL_L2",
+       [DR_ACTION_TYP_TNL_L3_TO_L2] = "DR_ACTION_TYP_TNL_L3_TO_L2",
+       [DR_ACTION_TYP_L2_TO_TNL_L3] = "DR_ACTION_TYP_L2_TO_TNL_L3",
+       [DR_ACTION_TYP_DROP] = "DR_ACTION_TYP_DROP",
+       [DR_ACTION_TYP_QP] = "DR_ACTION_TYP_QP",
+       [DR_ACTION_TYP_FT] = "DR_ACTION_TYP_FT",
+       [DR_ACTION_TYP_CTR] = "DR_ACTION_TYP_CTR",
+       [DR_ACTION_TYP_TAG] = "DR_ACTION_TYP_TAG",
+       [DR_ACTION_TYP_MODIFY_HDR] = "DR_ACTION_TYP_MODIFY_HDR",
+       [DR_ACTION_TYP_VPORT] = "DR_ACTION_TYP_VPORT",
+       [DR_ACTION_TYP_POP_VLAN] = "DR_ACTION_TYP_POP_VLAN",
+       [DR_ACTION_TYP_PUSH_VLAN] = "DR_ACTION_TYP_PUSH_VLAN",
+       [DR_ACTION_TYP_INSERT_HDR] = "DR_ACTION_TYP_INSERT_HDR",
+       [DR_ACTION_TYP_REMOVE_HDR] = "DR_ACTION_TYP_REMOVE_HDR",
+       [DR_ACTION_TYP_MAX] = "DR_ACTION_UNKNOWN",
+};
+
+static const char *dr_action_id_to_str(enum mlx5dr_action_type action_id)
+{
+       if (action_id > DR_ACTION_TYP_MAX)
+               action_id = DR_ACTION_TYP_MAX;
+       return action_type_to_str[action_id];
+}
+
 static const enum dr_action_valid_state
 next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] = {
        [DR_ACTION_DOMAIN_NIC_INGRESS] = {
@@ -39,8 +66,10 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_REMOVE_HDR]      = DR_ACTION_STATE_DECAP,
                        [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
-                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
                },
                [DR_ACTION_STATE_DECAP] = {
                        [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
@@ -53,7 +82,8 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
-                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
                },
                [DR_ACTION_STATE_ENCAP] = {
                        [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
@@ -73,20 +103,31 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
                },
-               [DR_ACTION_STATE_MODIFY_VLAN] = {
+               [DR_ACTION_STATE_POP_VLAN] = {
                        [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_QP]              = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_FT]              = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_SAMPLER]         = DR_ACTION_STATE_TERM,
-                       [DR_ACTION_TYP_TAG]             = DR_ACTION_STATE_MODIFY_VLAN,
-                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_MODIFY_VLAN,
-                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_TAG]             = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
                        [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
                },
+               [DR_ACTION_STATE_PUSH_VLAN] = {
+                       [DR_ACTION_TYP_QP]              = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_FT]              = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_SAMPLER]         = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_TAG]             = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
+               },
                [DR_ACTION_STATE_NON_TERM] = {
                        [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_QP]              = DR_ACTION_STATE_TERM,
@@ -99,8 +140,10 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_REMOVE_HDR]      = DR_ACTION_STATE_DECAP,
                        [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
-                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
                },
                [DR_ACTION_STATE_TERM] = {
                        [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_TERM,
@@ -115,8 +158,16 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_REMOVE_HDR]      = DR_ACTION_STATE_DECAP,
                        [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
-                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
+               },
+               [DR_ACTION_STATE_DECAP] = {
+                       [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_FT]              = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_SAMPLER]         = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_DECAP,
                },
                [DR_ACTION_STATE_ENCAP] = {
                        [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
@@ -132,14 +183,25 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
-                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+               },
+               [DR_ACTION_STATE_POP_VLAN] = {
+                       [DR_ACTION_TYP_FT]              = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_SAMPLER]         = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
+                       [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
                },
-               [DR_ACTION_STATE_MODIFY_VLAN] = {
+               [DR_ACTION_STATE_PUSH_VLAN] = {
                        [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_FT]              = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_SAMPLER]         = DR_ACTION_STATE_TERM,
-                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_MODIFY_VLAN,
-                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
@@ -152,8 +214,10 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_REMOVE_HDR]      = DR_ACTION_STATE_DECAP,
                        [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
-                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
                },
                [DR_ACTION_STATE_TERM] = {
                        [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_TERM,
@@ -170,8 +234,10 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_REMOVE_HDR]      = DR_ACTION_STATE_DECAP,
                        [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
-                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
                        [DR_ACTION_TYP_VPORT]           = DR_ACTION_STATE_TERM,
                },
                [DR_ACTION_STATE_DECAP] = {
@@ -180,11 +246,12 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_DECAP,
                        [DR_ACTION_TYP_SAMPLER]         = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
-                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_MODIFY_VLAN,
                        [DR_ACTION_TYP_VPORT]           = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
                },
                [DR_ACTION_STATE_ENCAP] = {
                        [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
@@ -203,13 +270,26 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
                },
-               [DR_ACTION_STATE_MODIFY_VLAN] = {
+               [DR_ACTION_STATE_POP_VLAN] = {
                        [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_FT]              = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_SAMPLER]         = DR_ACTION_STATE_TERM,
-                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_MODIFY_VLAN,
-                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_VPORT]           = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
+                       [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
+               },
+               [DR_ACTION_STATE_PUSH_VLAN] = {
+                       [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_FT]              = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_SAMPLER]         = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_PUSH_VLAN,
                        [DR_ACTION_TYP_VPORT]           = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
@@ -226,8 +306,10 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_REMOVE_HDR]      = DR_ACTION_STATE_DECAP,
                        [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
-                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
                        [DR_ACTION_TYP_VPORT]           = DR_ACTION_STATE_TERM,
                },
                [DR_ACTION_STATE_TERM] = {
@@ -244,8 +326,17 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
-                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_REMOVE_HDR]      = DR_ACTION_STATE_DECAP,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_VPORT]           = DR_ACTION_STATE_TERM,
+               },
+               [DR_ACTION_STATE_DECAP] = {
+                       [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_FT]              = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_DECAP,
                        [DR_ACTION_TYP_VPORT]           = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_SAMPLER]         = DR_ACTION_STATE_TERM,
                },
                [DR_ACTION_STATE_ENCAP] = {
                        [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
@@ -262,15 +353,27 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
-                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_VPORT]           = DR_ACTION_STATE_TERM,
+               },
+               [DR_ACTION_STATE_POP_VLAN] = {
+                       [DR_ACTION_TYP_FT]              = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_SAMPLER]         = DR_ACTION_STATE_TERM,
+                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_MODIFY_HDR]      = DR_ACTION_STATE_MODIFY_HDR,
+                       [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
+                       [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_VPORT]           = DR_ACTION_STATE_TERM,
                },
-               [DR_ACTION_STATE_MODIFY_VLAN] = {
+               [DR_ACTION_STATE_PUSH_VLAN] = {
                        [DR_ACTION_TYP_DROP]            = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_FT]              = DR_ACTION_STATE_TERM,
                        [DR_ACTION_TYP_SAMPLER]         = DR_ACTION_STATE_TERM,
-                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_MODIFY_VLAN,
-                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_CTR]             = DR_ACTION_STATE_PUSH_VLAN,
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
@@ -285,7 +388,9 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX]
                        [DR_ACTION_TYP_L2_TO_TNL_L2]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_L2_TO_TNL_L3]    = DR_ACTION_STATE_ENCAP,
                        [DR_ACTION_TYP_INSERT_HDR]      = DR_ACTION_STATE_ENCAP,
-                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_MODIFY_VLAN,
+                       [DR_ACTION_TYP_REMOVE_HDR]      = DR_ACTION_STATE_DECAP,
+                       [DR_ACTION_TYP_PUSH_VLAN]       = DR_ACTION_STATE_PUSH_VLAN,
+                       [DR_ACTION_TYP_POP_VLAN]        = DR_ACTION_STATE_POP_VLAN,
                        [DR_ACTION_TYP_VPORT]           = DR_ACTION_STATE_TERM,
                },
                [DR_ACTION_STATE_TERM] = {
@@ -314,6 +419,9 @@ dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type
        case DR_ACTION_REFORMAT_TYP_INSERT_HDR:
                *action_type = DR_ACTION_TYP_INSERT_HDR;
                break;
+       case DR_ACTION_REFORMAT_TYP_REMOVE_HDR:
+               *action_type = DR_ACTION_TYP_REMOVE_HDR;
+               break;
        default:
                return -EINVAL;
        }
@@ -326,7 +434,7 @@ dr_action_reformat_to_action_type(enum mlx5dr_action_reformat_type reformat_type
  * the new size of the STEs array, rule with actions.
  */
 static void dr_actions_apply(struct mlx5dr_domain *dmn,
-                            enum mlx5dr_ste_entry_type ste_type,
+                            enum mlx5dr_domain_nic_type nic_type,
                             u8 *action_type_set,
                             u8 *last_ste,
                             struct mlx5dr_ste_actions_attr *attr,
@@ -335,7 +443,7 @@ static void dr_actions_apply(struct mlx5dr_domain *dmn,
        struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
        u32 added_stes = 0;
 
-       if (ste_type == MLX5DR_STE_TYPE_RX)
+       if (nic_type == DR_DOMAIN_NIC_TYPE_RX)
                mlx5dr_ste_set_actions_rx(ste_ctx, dmn, action_type_set,
                                          last_ste, attr, &added_stes);
        else
@@ -347,7 +455,7 @@ static void dr_actions_apply(struct mlx5dr_domain *dmn,
 
 static enum dr_action_domain
 dr_action_get_action_domain(enum mlx5dr_domain_type domain,
-                           enum mlx5dr_ste_entry_type ste_type)
+                           enum mlx5dr_domain_nic_type nic_type)
 {
        switch (domain) {
        case MLX5DR_DOMAIN_TYPE_NIC_RX:
@@ -355,7 +463,7 @@ dr_action_get_action_domain(enum mlx5dr_domain_type domain,
        case MLX5DR_DOMAIN_TYPE_NIC_TX:
                return DR_ACTION_DOMAIN_NIC_EGRESS;
        case MLX5DR_DOMAIN_TYPE_FDB:
-               if (ste_type == MLX5DR_STE_TYPE_RX)
+               if (nic_type == DR_DOMAIN_NIC_TYPE_RX)
                        return DR_ACTION_DOMAIN_FDB_INGRESS;
                return DR_ACTION_DOMAIN_FDB_EGRESS;
        default:
@@ -421,6 +529,18 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
        return 0;
 }
 
+static void dr_action_print_sequence(struct mlx5dr_domain *dmn,
+                                    struct mlx5dr_action *actions[],
+                                    int last_idx)
+{
+       int i;
+
+       for (i = 0; i <= last_idx; i++)
+               mlx5dr_err(dmn, "< %s (%d) > ",
+                          dr_action_id_to_str(actions[i]->action_type),
+                          actions[i]->action_type);
+}
+
 #define WITH_VLAN_NUM_HW_ACTIONS 6
 
 int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
@@ -431,7 +551,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
                                 u32 *new_hw_ste_arr_sz)
 {
        struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
-       bool rx_rule = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
+       bool rx_rule = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
        struct mlx5dr_domain *dmn = matcher->tbl->dmn;
        u8 action_type_set[DR_ACTION_TYP_MAX] = {};
        struct mlx5dr_ste_actions_attr attr = {};
@@ -445,7 +565,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
        attr.gvmi = dmn->info.caps.gvmi;
        attr.hit_gvmi = dmn->info.caps.gvmi;
        attr.final_icm_addr = nic_dmn->default_icm_addr;
-       action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->ste_type);
+       action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->type);
 
        for (i = 0; i < num_actions; i++) {
                struct mlx5dr_action_dest_tbl *dest_tbl;
@@ -467,11 +587,11 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
                                if (dest_tbl->tbl->dmn != dmn) {
                                        mlx5dr_err(dmn,
                                                   "Destination table belongs to a different domain\n");
-                                       goto out_invalid_arg;
+                                       return -EINVAL;
                                }
                                if (dest_tbl->tbl->level <= matcher->tbl->level) {
-                                       mlx5_core_warn_once(dmn->mdev,
-                                                           "Connecting table to a lower/same level destination table\n");
+                                       mlx5_core_dbg_once(dmn->mdev,
+                                                          "Connecting table to a lower/same level destination table\n");
                                        mlx5dr_dbg(dmn,
                                                   "Connecting table at level %d to a destination table at level %d\n",
                                                   matcher->tbl->level,
@@ -509,7 +629,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
                        break;
                case DR_ACTION_TYP_QP:
                        mlx5dr_info(dmn, "Domain doesn't support QP\n");
-                       goto out_invalid_arg;
+                       return -EOPNOTSUPP;
                case DR_ACTION_TYP_CTR:
                        attr.ctr_id = action->ctr->ctr_id +
                                action->ctr->offeset;
@@ -536,7 +656,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
                        if (rx_rule &&
                            !(dmn->ste_ctx->actions_caps & DR_STE_CTX_ACTION_CAP_RX_ENCAP)) {
                                mlx5dr_info(dmn, "Device doesn't support Encap on RX\n");
-                               goto out_invalid_arg;
+                               return -EOPNOTSUPP;
                        }
                        attr.reformat.size = action->reformat->size;
                        attr.reformat.id = action->reformat->id;
@@ -549,48 +669,66 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
                        attr.hit_gvmi = action->vport->caps->vhca_gvmi;
                        dest_action = action;
                        if (rx_rule) {
-                               /* Loopback on WIRE vport is not supported */
-                               if (action->vport->caps->num == WIRE_PORT)
-                                       goto out_invalid_arg;
-
+                               if (action->vport->caps->num == WIRE_PORT) {
+                                       mlx5dr_dbg(dmn, "Device doesn't support Loopback on WIRE vport\n");
+                                       return -EOPNOTSUPP;
+                               }
                                attr.final_icm_addr = action->vport->caps->icm_address_rx;
                        } else {
                                attr.final_icm_addr = action->vport->caps->icm_address_tx;
                        }
                        break;
                case DR_ACTION_TYP_POP_VLAN:
+                       if (!rx_rule && !(dmn->ste_ctx->actions_caps &
+                                         DR_STE_CTX_ACTION_CAP_TX_POP)) {
+                               mlx5dr_dbg(dmn, "Device doesn't support POP VLAN action on TX\n");
+                               return -EOPNOTSUPP;
+                       }
+
                        max_actions_type = MLX5DR_MAX_VLANS;
                        attr.vlans.count++;
                        break;
                case DR_ACTION_TYP_PUSH_VLAN:
+                       if (rx_rule && !(dmn->ste_ctx->actions_caps &
+                                        DR_STE_CTX_ACTION_CAP_RX_PUSH)) {
+                               mlx5dr_dbg(dmn, "Device doesn't support PUSH VLAN action on RX\n");
+                               return -EOPNOTSUPP;
+                       }
+
                        max_actions_type = MLX5DR_MAX_VLANS;
-                       if (attr.vlans.count == MLX5DR_MAX_VLANS)
+                       if (attr.vlans.count == MLX5DR_MAX_VLANS) {
+                               mlx5dr_dbg(dmn, "Max VLAN push/pop count exceeded\n");
                                return -EINVAL;
+                       }
 
                        attr.vlans.headers[attr.vlans.count++] = action->push_vlan->vlan_hdr;
                        break;
                case DR_ACTION_TYP_INSERT_HDR:
+               case DR_ACTION_TYP_REMOVE_HDR:
                        attr.reformat.size = action->reformat->size;
                        attr.reformat.id = action->reformat->id;
                        attr.reformat.param_0 = action->reformat->param_0;
                        attr.reformat.param_1 = action->reformat->param_1;
                        break;
                default:
-                       goto out_invalid_arg;
+                       mlx5dr_err(dmn, "Unsupported action type %d\n", action_type);
+                       return -EINVAL;
                }
 
                /* Check action duplication */
                if (++action_type_set[action_type] > max_actions_type) {
                        mlx5dr_err(dmn, "Action type %d supports only max %d time(s)\n",
                                   action_type, max_actions_type);
-                       goto out_invalid_arg;
+                       return -EINVAL;
                }
 
                /* Check action state machine is valid */
                if (dr_action_validate_and_get_next_state(action_domain,
                                                          action_type,
                                                          &state)) {
-                       mlx5dr_err(dmn, "Invalid action sequence provided\n");
+                       mlx5dr_err(dmn, "Invalid action (gvmi: %d, is_rx: %d) sequence provided:",
+                                  attr.gvmi, rx_rule);
+                       dr_action_print_sequence(dmn, actions, i);
                        return -EOPNOTSUPP;
                }
        }
@@ -614,16 +752,13 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
        }
 
        dr_actions_apply(dmn,
-                        nic_dmn->ste_type,
+                        nic_dmn->type,
                         action_type_set,
                         last_ste,
                         &attr,
                         new_hw_ste_arr_sz);
 
        return 0;
-
-out_invalid_arg:
-       return -EINVAL;
 }
 
 static unsigned int action_size[DR_ACTION_TYP_MAX] = {
@@ -638,6 +773,7 @@ static unsigned int action_size[DR_ACTION_TYP_MAX] = {
        [DR_ACTION_TYP_VPORT]        = sizeof(struct mlx5dr_action_vport),
        [DR_ACTION_TYP_PUSH_VLAN]    = sizeof(struct mlx5dr_action_push_vlan),
        [DR_ACTION_TYP_INSERT_HDR]   = sizeof(struct mlx5dr_action_reformat),
+       [DR_ACTION_TYP_REMOVE_HDR]   = sizeof(struct mlx5dr_action_reformat),
        [DR_ACTION_TYP_SAMPLER]      = sizeof(struct mlx5dr_action_sampler),
 };
 
@@ -709,7 +845,8 @@ dec_ref:
 struct mlx5dr_action *
 mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
                                   struct mlx5dr_action_dest *dests,
-                                  u32 num_of_dests)
+                                  u32 num_of_dests,
+                                  bool ignore_flow_level)
 {
        struct mlx5dr_cmd_flow_destination_hw_info *hw_dests;
        struct mlx5dr_action **ref_actions;
@@ -776,7 +913,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
                                      num_of_dests,
                                      reformat_req,
                                      &action->dest_tbl->fw_tbl.id,
-                                     &action->dest_tbl->fw_tbl.group_id);
+                                     &action->dest_tbl->fw_tbl.group_id,
+                                     ignore_flow_level);
        if (ret)
                goto free_action;
 
@@ -884,11 +1022,23 @@ dr_action_verify_reformat_params(enum mlx5dr_action_type reformat_type,
                                 size_t data_sz,
                                 void *data)
 {
-       if ((!data && data_sz) || (data && !data_sz) ||
-           ((reformat_param_0 || reformat_param_1) &&
-            reformat_type != DR_ACTION_TYP_INSERT_HDR) ||
-           reformat_type > DR_ACTION_TYP_INSERT_HDR) {
-               mlx5dr_dbg(dmn, "Invalid reformat parameter!\n");
+       if (reformat_type == DR_ACTION_TYP_INSERT_HDR) {
+               if ((!data && data_sz) || (data && !data_sz) ||
+                   MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_size) < data_sz ||
+                   MLX5_CAP_GEN_2(dmn->mdev, max_reformat_insert_offset) < reformat_param_1) {
+                       mlx5dr_dbg(dmn, "Invalid reformat parameters for INSERT_HDR\n");
+                       goto out_err;
+               }
+       } else if (reformat_type == DR_ACTION_TYP_REMOVE_HDR) {
+               if (data ||
+                   MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_size) < data_sz ||
+                   MLX5_CAP_GEN_2(dmn->mdev, max_reformat_remove_offset) < reformat_param_1) {
+                       mlx5dr_dbg(dmn, "Invalid reformat parameters for REMOVE_HDR\n");
+                       goto out_err;
+               }
+       } else if (reformat_param_0 || reformat_param_1 ||
+                  reformat_type > DR_ACTION_TYP_REMOVE_HDR) {
+               mlx5dr_dbg(dmn, "Invalid reformat parameters\n");
                goto out_err;
        }
 
@@ -987,7 +1137,6 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
                return 0;
        }
        case DR_ACTION_TYP_INSERT_HDR:
-       {
                ret = mlx5dr_cmd_create_reformat_ctx(dmn->mdev,
                                                     MLX5_REFORMAT_TYPE_INSERT_HDR,
                                                     reformat_param_0,
@@ -1002,7 +1151,12 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
                action->reformat->param_0 = reformat_param_0;
                action->reformat->param_1 = reformat_param_1;
                return 0;
-       }
+       case DR_ACTION_TYP_REMOVE_HDR:
+               action->reformat->id = 0;
+               action->reformat->size = data_sz;
+               action->reformat->param_0 = reformat_param_0;
+               action->reformat->param_1 = reformat_param_1;
+               return 0;
        default:
                mlx5dr_info(dmn, "Reformat type is not supported %d\n", action->action_type);
                return -EINVAL;
@@ -1658,6 +1812,7 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
                }
                break;
        case DR_ACTION_TYP_TNL_L2_TO_L2:
+       case DR_ACTION_TYP_REMOVE_HDR:
                refcount_dec(&action->reformat->dmn->refcount);
                break;
        case DR_ACTION_TYP_TNL_L3_TO_L2:
index 54e1f54..5630728 100644 (file)
@@ -655,6 +655,7 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
        MLX5_SET(set_fte_in, in, table_type, ft->type);
        MLX5_SET(set_fte_in, in, table_id, ft->id);
        MLX5_SET(set_fte_in, in, flow_index, fte->index);
+       MLX5_SET(set_fte_in, in, ignore_flow_level, fte->ignore_flow_level);
        if (ft->vport) {
                MLX5_SET(set_fte_in, in, vport_number, ft->vport);
                MLX5_SET(set_fte_in, in, other_vport, 1);
index 7091b1b..0fe1598 100644 (file)
@@ -245,7 +245,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
                        return -ENOTSUPP;
 
                dmn->info.supp_sw_steering = true;
-               dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX;
+               dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX;
                dmn->info.rx.default_icm_addr = dmn->info.caps.nic_rx_drop_address;
                dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address;
                break;
@@ -254,7 +254,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
                        return -ENOTSUPP;
 
                dmn->info.supp_sw_steering = true;
-               dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX;
+               dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX;
                dmn->info.tx.default_icm_addr = dmn->info.caps.nic_tx_allow_address;
                dmn->info.tx.drop_icm_addr = dmn->info.caps.nic_tx_drop_address;
                break;
@@ -265,8 +265,8 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev,
                if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, fdb))
                        return -ENOTSUPP;
 
-               dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX;
-               dmn->info.tx.ste_type = MLX5DR_STE_TYPE_TX;
+               dmn->info.rx.type = DR_DOMAIN_NIC_TYPE_RX;
+               dmn->info.tx.type = DR_DOMAIN_NIC_TYPE_TX;
                vport_cap = mlx5dr_get_vport_cap(&dmn->info.caps, 0);
                if (!vport_cap) {
                        mlx5dr_err(dmn, "Failed to get esw manager vport\n");
index 7ccfd40..0d6f86e 100644 (file)
@@ -103,7 +103,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
                            int num_dest,
                            bool reformat_req,
                            u32 *tbl_id,
-                           u32 *group_id)
+                           u32 *group_id,
+                           bool ignore_flow_level)
 {
        struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
        struct mlx5dr_cmd_fte_info fte_info = {};
@@ -137,6 +138,7 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
        fte_info.dests_size = num_dest;
        fte_info.val = val;
        fte_info.dest_arr = dest;
+       fte_info.ignore_flow_level = ignore_flow_level;
 
        ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info);
        if (ret) {
index 6f6191d..b5409cc 100644 (file)
@@ -396,13 +396,14 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
        struct mlx5dr_domain *dmn = matcher->tbl->dmn;
        struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
        struct mlx5dr_match_param mask = {};
+       bool allow_empty_match = false;
        struct mlx5dr_ste_build *sb;
        bool inner, rx;
        int idx = 0;
        int ret, i;
 
        sb = nic_matcher->ste_builder_arr[outer_ipv][inner_ipv];
-       rx = nic_dmn->ste_type == MLX5DR_STE_TYPE_RX;
+       rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
 
        /* Create a temporary mask to track and clear used mask fields */
        if (matcher->match_criteria & DR_MATCHER_CRITERIA_OUTER)
@@ -428,6 +429,16 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
        if (ret)
                return ret;
 
+       /* Optimize RX pipe by reducing source port match, since
+        * the FDB RX part is connected only to the wire.
+        */
+       if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB &&
+           rx && mask.misc.source_port) {
+               mask.misc.source_port = 0;
+               mask.misc.source_eswitch_owner_vhca_id = 0;
+               allow_empty_match = true;
+       }
+
        /* Outer */
        if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER |
                                       DR_MATCHER_CRITERIA_MISC |
@@ -619,7 +630,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
        }
 
        /* Empty matcher, takes all */
-       if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY)
+       if ((!idx && allow_empty_match) ||
+           matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY)
                mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx);
 
        if (idx == 0) {
index ffdfb5a..aca80ef 100644 (file)
@@ -81,6 +81,7 @@ dr_rule_create_collision_entry(struct mlx5dr_matcher *matcher,
        }
 
        ste->ste_chain_location = orig_ste->ste_chain_location;
+       ste->htbl->pointing_ste = orig_ste->htbl->pointing_ste;
 
        /* In collision entry, all members share the same miss_list_head */
        ste->htbl->miss_list = mlx5dr_ste_get_miss_list(orig_ste);
@@ -185,6 +186,9 @@ dr_rule_rehash_handle_collision(struct mlx5dr_matcher *matcher,
        if (!new_ste)
                return NULL;
 
+       /* Update collision pointing STE */
+       new_ste->htbl->pointing_ste = col_ste->htbl->pointing_ste;
+
        /* In collision entry, all members share the same miss_list_head */
        new_ste->htbl->miss_list = mlx5dr_ste_get_miss_list(col_ste);
 
@@ -212,7 +216,7 @@ static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher,
        new_ste->next_htbl = cur_ste->next_htbl;
        new_ste->ste_chain_location = cur_ste->ste_chain_location;
 
-       if (!mlx5dr_ste_is_last_in_rule(nic_matcher, new_ste->ste_chain_location))
+       if (new_ste->next_htbl)
                new_ste->next_htbl->pointing_ste = new_ste;
 
        /* We need to copy the refcount since this ste
@@ -220,10 +224,8 @@ static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher,
         */
        new_ste->refcount = cur_ste->refcount;
 
-       /* Link old STEs rule_mem list to the new ste */
-       mlx5dr_rule_update_rule_member(cur_ste, new_ste);
-       INIT_LIST_HEAD(&new_ste->rule_list);
-       list_splice_tail_init(&cur_ste->rule_list, &new_ste->rule_list);
+       /* Link old STEs rule to the new ste */
+       mlx5dr_rule_set_last_member(cur_ste->rule_rx_tx, new_ste, false);
 }
 
 static struct mlx5dr_ste *
@@ -404,7 +406,7 @@ dr_rule_rehash_htbl(struct mlx5dr_rule *rule,
        info.miss_icm_addr = nic_matcher->e_anchor->chunk->icm_addr;
        mlx5dr_ste_set_formatted_ste(dmn->ste_ctx,
                                     dmn->info.caps.gvmi,
-                                    nic_dmn,
+                                    nic_dmn->type,
                                     new_htbl,
                                     formatted_ste,
                                     &info);
@@ -581,34 +583,66 @@ free_action_members:
        return -ENOMEM;
 }
 
-/* While the pointer of ste is no longer valid, like while moving ste to be
- * the first in the miss_list, and to be in the origin table,
- * all rule-members that are attached to this ste should update their ste member
- * to the new pointer
- */
-void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *ste,
-                                   struct mlx5dr_ste *new_ste)
+void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule,
+                                struct mlx5dr_ste *ste,
+                                bool force)
+{
+       /* Update rule member is usually done for the last STE or during rule
+        * creation to recover from mid-creation failure (for this peruse the
+        * force flag is used)
+        */
+       if (ste->next_htbl && !force)
+               return;
+
+       /* Update is required since each rule keeps track of its last STE */
+       ste->rule_rx_tx = nic_rule;
+       nic_rule->last_rule_ste = ste;
+}
+
+static struct mlx5dr_ste *dr_rule_get_pointed_ste(struct mlx5dr_ste *curr_ste)
+{
+       struct mlx5dr_ste *first_ste;
+
+       first_ste = list_first_entry(mlx5dr_ste_get_miss_list(curr_ste),
+                                    struct mlx5dr_ste, miss_list_node);
+
+       return first_ste->htbl->pointing_ste;
+}
+
+int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr,
+                                        struct mlx5dr_ste *curr_ste,
+                                        int *num_of_stes)
 {
-       struct mlx5dr_rule_member *rule_mem;
+       bool first = false;
+
+       *num_of_stes = 0;
+
+       if (!curr_ste)
+               return -ENOENT;
+
+       /* Iterate from last to first */
+       while (!first) {
+               first = curr_ste->ste_chain_location == 1;
+               ste_arr[*num_of_stes] = curr_ste;
+               *num_of_stes += 1;
+               curr_ste = dr_rule_get_pointed_ste(curr_ste);
+       }
 
-       list_for_each_entry(rule_mem, &ste->rule_list, use_ste_list)
-               rule_mem->ste = new_ste;
+       return 0;
 }
 
 static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule,
                                       struct mlx5dr_rule_rx_tx *nic_rule)
 {
-       struct mlx5dr_rule_member *rule_mem;
-       struct mlx5dr_rule_member *tmp_mem;
+       struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
+       struct mlx5dr_ste *curr_ste = nic_rule->last_rule_ste;
+       int i;
 
-       if (list_empty(&nic_rule->rule_members_list))
+       if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i))
                return;
-       list_for_each_entry_safe(rule_mem, tmp_mem, &nic_rule->rule_members_list, list) {
-               list_del(&rule_mem->list);
-               list_del(&rule_mem->use_ste_list);
-               mlx5dr_ste_put(rule_mem->ste, rule->matcher, nic_rule->nic_matcher);
-               kvfree(rule_mem);
-       }
+
+       while (i--)
+               mlx5dr_ste_put(ste_arr[i], rule->matcher, nic_rule->nic_matcher);
 }
 
 static u16 dr_get_bits_per_mask(u16 byte_mask)
@@ -628,43 +662,25 @@ static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl,
                                      struct mlx5dr_domain_rx_tx *nic_dmn)
 {
        struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
+       int threshold;
 
        if (dmn->info.max_log_sw_icm_sz <= htbl->chunk_size)
                return false;
 
-       if (!ctrl->may_grow)
+       if (!mlx5dr_ste_htbl_may_grow(htbl))
                return false;
 
        if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk_size)
                return false;
 
-       if (ctrl->num_of_collisions >= ctrl->increase_threshold &&
-           (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= ctrl->increase_threshold)
+       threshold = mlx5dr_ste_htbl_increase_threshold(htbl);
+       if (ctrl->num_of_collisions >= threshold &&
+           (ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= threshold)
                return true;
 
        return false;
 }
 
-static int dr_rule_add_member(struct mlx5dr_rule_rx_tx *nic_rule,
-                             struct mlx5dr_ste *ste)
-{
-       struct mlx5dr_rule_member *rule_mem;
-
-       rule_mem = kvzalloc(sizeof(*rule_mem), GFP_KERNEL);
-       if (!rule_mem)
-               return -ENOMEM;
-
-       INIT_LIST_HEAD(&rule_mem->list);
-       INIT_LIST_HEAD(&rule_mem->use_ste_list);
-
-       rule_mem->ste = ste;
-       list_add_tail(&rule_mem->list, &nic_rule->rule_members_list);
-
-       list_add_tail(&rule_mem->use_ste_list, &ste->rule_list);
-
-       return 0;
-}
-
 static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
                                      struct mlx5dr_rule_rx_tx *nic_rule,
                                      struct list_head *send_ste_list,
@@ -679,15 +695,13 @@ static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
        struct mlx5dr_domain *dmn = matcher->tbl->dmn;
        u8 *curr_hw_ste, *prev_hw_ste;
        struct mlx5dr_ste *action_ste;
-       int i, k, ret;
+       int i, k;
 
        /* Two cases:
         * 1. num_of_builders is equal to new_hw_ste_arr_sz, the action in the ste
         * 2. num_of_builders is less then new_hw_ste_arr_sz, new ste was added
         *    to support the action.
         */
-       if (num_of_builders == new_hw_ste_arr_sz)
-               return 0;
 
        for (i = num_of_builders, k = 0; i < new_hw_ste_arr_sz; i++, k++) {
                curr_hw_ste = hw_ste_arr + i * DR_STE_SIZE;
@@ -700,6 +714,10 @@ static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
 
                mlx5dr_ste_get(action_ste);
 
+               action_ste->htbl->pointing_ste = last_ste;
+               last_ste->next_htbl = action_ste->htbl;
+               last_ste = action_ste;
+
                /* While free ste we go over the miss list, so add this ste to the list */
                list_add_tail(&action_ste->miss_list_node,
                              mlx5dr_ste_get_miss_list(action_ste));
@@ -713,21 +731,19 @@ static int dr_rule_handle_action_stes(struct mlx5dr_rule *rule,
                mlx5dr_ste_set_hit_addr_by_next_htbl(dmn->ste_ctx,
                                                     prev_hw_ste,
                                                     action_ste->htbl);
-               ret = dr_rule_add_member(nic_rule, action_ste);
-               if (ret) {
-                       mlx5dr_dbg(dmn, "Failed adding rule member\n");
-                       goto free_ste_info;
-               }
+
+               mlx5dr_rule_set_last_member(nic_rule, action_ste, true);
+
                mlx5dr_send_fill_and_append_ste_send_info(action_ste, DR_STE_SIZE, 0,
                                                          curr_hw_ste,
                                                          ste_info_arr[k],
                                                          send_ste_list, false);
        }
 
+       last_ste->next_htbl = NULL;
+
        return 0;
 
-free_ste_info:
-       kfree(ste_info_arr[k]);
 err_exit:
        mlx5dr_ste_put(action_ste, matcher, nic_matcher);
        return -ENOMEM;
@@ -1015,12 +1031,12 @@ static enum mlx5dr_ipv dr_rule_get_ipv(struct mlx5dr_match_spec *spec)
 }
 
 static bool dr_rule_skip(enum mlx5dr_domain_type domain,
-                        enum mlx5dr_ste_entry_type ste_type,
+                        enum mlx5dr_domain_nic_type nic_type,
                         struct mlx5dr_match_param *mask,
                         struct mlx5dr_match_param *value,
                         u32 flow_source)
 {
-       bool rx = ste_type == MLX5DR_STE_TYPE_RX;
+       bool rx = nic_type == DR_DOMAIN_NIC_TYPE_RX;
 
        if (domain != MLX5DR_DOMAIN_TYPE_FDB)
                return false;
@@ -1065,9 +1081,7 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
        nic_matcher = nic_rule->nic_matcher;
        nic_dmn = nic_matcher->nic_tbl->nic_dmn;
 
-       INIT_LIST_HEAD(&nic_rule->rule_members_list);
-
-       if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param,
+       if (dr_rule_skip(dmn->type, nic_dmn->type, &matcher->mask, param,
                         rule->flow_source))
                return 0;
 
@@ -1121,14 +1135,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
 
                cur_htbl = ste->next_htbl;
 
-               /* Keep all STEs in the rule struct */
-               ret = dr_rule_add_member(nic_rule, ste);
-               if (ret) {
-                       mlx5dr_dbg(dmn, "Failed adding rule member index %d\n", i);
-                       goto free_ste;
-               }
-
                mlx5dr_ste_get(ste);
+               mlx5dr_rule_set_last_member(nic_rule, ste, true);
        }
 
        /* Connect actions */
@@ -1153,8 +1161,6 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
 
        return 0;
 
-free_ste:
-       mlx5dr_ste_put(ste, matcher, nic_matcher);
 free_rule:
        dr_rule_clean_rule_members(rule, nic_rule);
        /* Clean all ste_info's */
index 9df0e73..bfb14b4 100644 (file)
@@ -325,10 +325,14 @@ static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
 
        do {
                ne = dr_poll_cq(send_ring->cq, 1);
-               if (ne < 0)
+               if (unlikely(ne < 0)) {
+                       mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited",
+                                           send_ring->qp->qpn);
+                       send_ring->err_state = true;
                        return ne;
-               else if (ne == 1)
+               } else if (ne == 1) {
                        send_ring->pending_wqe -= send_ring->signal_th;
+               }
        } while (is_drain && send_ring->pending_wqe);
 
        return 0;
@@ -361,6 +365,14 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
        u32 buff_offset;
        int ret;
 
+       if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
+                    send_ring->err_state)) {
+               mlx5_core_dbg_once(dmn->mdev,
+                                  "Skipping post send: QP err state: %d, device state: %d\n",
+                                  send_ring->err_state, dmn->mdev->state);
+               return 0;
+       }
+
        spin_lock(&send_ring->lock);
 
        ret = dr_handle_pending_wc(dmn, send_ring);
@@ -620,6 +632,7 @@ static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
 
        MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
        MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
+       MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
 
        MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
        MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
@@ -789,7 +802,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
 
        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
        MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
        MLX5_SET(cqc, cqc, uar_page, uar->index);
        MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
                 MLX5_ADAPTER_PAGE_SHIFT);
index 9b15291..1cdfe4f 100644 (file)
@@ -172,9 +172,6 @@ static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src)
                dst->next_htbl->pointing_ste = dst;
 
        dst->refcount = src->refcount;
-
-       INIT_LIST_HEAD(&dst->rule_list);
-       list_splice_tail_init(&src->rule_list, &dst->rule_list);
 }
 
 /* Free ste which is the head and the only one in miss_list */
@@ -233,12 +230,12 @@ dr_ste_replace_head_ste(struct mlx5dr_matcher_rx_tx *nic_matcher,
        /* Remove from the miss_list the next_ste before copy */
        list_del_init(&next_ste->miss_list_node);
 
-       /* All rule-members that use next_ste should know about that */
-       mlx5dr_rule_update_rule_member(next_ste, ste);
-
        /* Move data from next into ste */
        dr_ste_replace(ste, next_ste);
 
+       /* Update the rule on STE change */
+       mlx5dr_rule_set_last_member(next_ste->rule_rx_tx, ste, false);
+
        /* Copy all 64 hw_ste bytes */
        memcpy(hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED);
        sb_idx = ste->ste_chain_location - 1;
@@ -382,14 +379,15 @@ void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx,
 /* Init one ste as a pattern for ste data array */
 void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx,
                                  u16 gvmi,
-                                 struct mlx5dr_domain_rx_tx *nic_dmn,
+                                 enum mlx5dr_domain_nic_type nic_type,
                                  struct mlx5dr_ste_htbl *htbl,
                                  u8 *formatted_ste,
                                  struct mlx5dr_htbl_connect_info *connect_info)
 {
+       bool is_rx = nic_type == DR_DOMAIN_NIC_TYPE_RX;
        struct mlx5dr_ste ste = {};
 
-       ste_ctx->ste_init(formatted_ste, htbl->lu_type, nic_dmn->ste_type, gvmi);
+       ste_ctx->ste_init(formatted_ste, htbl->lu_type, is_rx, gvmi);
        ste.hw_ste = formatted_ste;
 
        if (connect_info->type == CONNECT_HIT)
@@ -408,7 +406,7 @@ int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
 
        mlx5dr_ste_set_formatted_ste(dmn->ste_ctx,
                                     dmn->info.caps.gvmi,
-                                    nic_dmn,
+                                    nic_dmn->type,
                                     htbl,
                                     formatted_ste,
                                     connect_info);
@@ -466,21 +464,6 @@ free_table:
        return -ENOENT;
 }
 
-static void dr_ste_set_ctrl(struct mlx5dr_ste_htbl *htbl)
-{
-       struct mlx5dr_ste_htbl_ctrl *ctrl = &htbl->ctrl;
-       int num_of_entries;
-
-       htbl->ctrl.may_grow = true;
-
-       if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask)
-               htbl->ctrl.may_grow = false;
-
-       /* Threshold is 50%, one is added to table of size 1 */
-       num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size);
-       ctrl->increase_threshold = (num_of_entries + 1) / 2;
-}
-
 struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
                                              enum mlx5dr_icm_chunk_size chunk_size,
                                              u16 lu_type, u16 byte_mask)
@@ -513,11 +496,9 @@ struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
                ste->refcount = 0;
                INIT_LIST_HEAD(&ste->miss_list_node);
                INIT_LIST_HEAD(&htbl->miss_list[i]);
-               INIT_LIST_HEAD(&ste->rule_list);
        }
 
        htbl->chunk_size = chunk_size;
-       dr_ste_set_ctrl(htbl);
        return htbl;
 
 out_free_htbl:
@@ -649,6 +630,7 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
                             u8 *ste_arr)
 {
        struct mlx5dr_domain_rx_tx *nic_dmn = nic_matcher->nic_tbl->nic_dmn;
+       bool is_rx = nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX;
        struct mlx5dr_domain *dmn = matcher->tbl->dmn;
        struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx;
        struct mlx5dr_ste_build *sb;
@@ -663,7 +645,7 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher,
        for (i = 0; i < nic_matcher->num_of_builders; i++) {
                ste_ctx->ste_init(ste_arr,
                                  sb->lu_type,
-                                 nic_dmn->ste_type,
+                                 is_rx,
                                  dmn->info.caps.gvmi);
 
                mlx5dr_ste_set_bit_mask(ste_arr, sb->bit_mask);
index 12a8bbb..2d52d06 100644 (file)
@@ -146,7 +146,7 @@ struct mlx5dr_ste_ctx {
 
        /* Getters and Setters */
        void (*ste_init)(u8 *hw_ste_p, u16 lu_type,
-                        u8 entry_type, u16 gvmi);
+                        bool is_rx, u16 gvmi);
        void (*set_next_lu_type)(u8 *hw_ste_p, u16 lu_type);
        u16  (*get_next_lu_type)(u8 *hw_ste_p);
        void (*set_miss_addr)(u8 *hw_ste_p, u64 miss_addr);
index e4dd4ee..9c704bc 100644 (file)
@@ -8,6 +8,12 @@
 #define SVLAN_ETHERTYPE                0x88a8
 #define DR_STE_ENABLE_FLOW_TAG BIT(31)
 
+enum dr_ste_v0_entry_type {
+       DR_STE_TYPE_TX          = 1,
+       DR_STE_TYPE_RX          = 2,
+       DR_STE_TYPE_MODIFY_PKT  = 6,
+};
+
 enum dr_ste_v0_action_tunl {
        DR_STE_TUNL_ACTION_NONE         = 0,
        DR_STE_TUNL_ACTION_ENABLE       = 1,
@@ -292,8 +298,8 @@ static void dr_ste_v0_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size)
        MLX5_SET(ste_general, hw_ste_p, next_table_base_31_5_size, index);
 }
 
-static void dr_ste_v0_init(u8 *hw_ste_p, u16 lu_type,
-                          u8 entry_type, u16 gvmi)
+static void dr_ste_v0_init_full(u8 *hw_ste_p, u16 lu_type,
+                               enum dr_ste_v0_entry_type entry_type, u16 gvmi)
 {
        dr_ste_v0_set_entry_type(hw_ste_p, entry_type);
        dr_ste_v0_set_lu_type(hw_ste_p, lu_type);
@@ -307,6 +313,15 @@ static void dr_ste_v0_init(u8 *hw_ste_p, u16 lu_type,
        MLX5_SET(ste_rx_steering_mult, hw_ste_p, miss_address_63_48, gvmi);
 }
 
+static void dr_ste_v0_init(u8 *hw_ste_p, u16 lu_type,
+                          bool is_rx, u16 gvmi)
+{
+       enum dr_ste_v0_entry_type entry_type;
+
+       entry_type = is_rx ? DR_STE_TYPE_RX : DR_STE_TYPE_TX;
+       dr_ste_v0_init_full(hw_ste_p, lu_type, entry_type, gvmi);
+}
+
 static void dr_ste_v0_rx_set_flow_tag(u8 *hw_ste_p, u32 flow_tag)
 {
        MLX5_SET(ste_rx_steering_mult, hw_ste_p, qp_list_pointer,
@@ -380,13 +395,13 @@ static void dr_ste_v0_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions,
 
 static void dr_ste_v0_arr_init_next(u8 **last_ste,
                                    u32 *added_stes,
-                                   enum mlx5dr_ste_entry_type entry_type,
+                                   enum dr_ste_v0_entry_type entry_type,
                                    u16 gvmi)
 {
        (*added_stes)++;
        *last_ste += DR_STE_SIZE;
-       dr_ste_v0_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE,
-                      entry_type, gvmi);
+       dr_ste_v0_init_full(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE,
+                           entry_type, gvmi);
 }
 
 static void
@@ -404,7 +419,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
         * modify headers for outer headers only
         */
        if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
-               dr_ste_v0_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+               dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
                dr_ste_v0_set_rewrite_actions(last_ste,
                                              attr->modify_actions,
                                              attr->modify_index);
@@ -417,7 +432,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
                        if (i || action_type_set[DR_ACTION_TYP_MODIFY_HDR])
                                dr_ste_v0_arr_init_next(&last_ste,
                                                        added_stes,
-                                                       MLX5DR_STE_TYPE_TX,
+                                                       DR_STE_TYPE_TX,
                                                        attr->gvmi);
 
                        dr_ste_v0_set_tx_push_vlan(last_ste,
@@ -435,7 +450,7 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn,
                    action_type_set[DR_ACTION_TYP_PUSH_VLAN])
                        dr_ste_v0_arr_init_next(&last_ste,
                                                added_stes,
-                                               MLX5DR_STE_TYPE_TX,
+                                               DR_STE_TYPE_TX,
                                                attr->gvmi);
 
                dr_ste_v0_set_tx_encap(last_ste,
@@ -469,7 +484,7 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
                dr_ste_v0_set_counter_id(last_ste, attr->ctr_id);
 
        if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
-               dr_ste_v0_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+               dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
                dr_ste_v0_set_rx_decap_l3(last_ste, attr->decap_with_vlan);
                dr_ste_v0_set_rewrite_actions(last_ste,
                                              attr->decap_actions,
@@ -488,7 +503,7 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
                            action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2])
                                dr_ste_v0_arr_init_next(&last_ste,
                                                        added_stes,
-                                                       MLX5DR_STE_TYPE_RX,
+                                                       DR_STE_TYPE_RX,
                                                        attr->gvmi);
 
                        dr_ste_v0_set_rx_pop_vlan(last_ste);
@@ -496,13 +511,13 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
        }
 
        if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
-               if (dr_ste_v0_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT)
+               if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
                        dr_ste_v0_arr_init_next(&last_ste,
                                                added_stes,
-                                               MLX5DR_STE_TYPE_MODIFY_PKT,
+                                               DR_STE_TYPE_MODIFY_PKT,
                                                attr->gvmi);
                else
-                       dr_ste_v0_set_entry_type(last_ste, MLX5DR_STE_TYPE_MODIFY_PKT);
+                       dr_ste_v0_set_entry_type(last_ste, DR_STE_TYPE_MODIFY_PKT);
 
                dr_ste_v0_set_rewrite_actions(last_ste,
                                              attr->modify_actions,
@@ -510,10 +525,10 @@ dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn,
        }
 
        if (action_type_set[DR_ACTION_TYP_TAG]) {
-               if (dr_ste_v0_get_entry_type(last_ste) == MLX5DR_STE_TYPE_MODIFY_PKT)
+               if (dr_ste_v0_get_entry_type(last_ste) == DR_STE_TYPE_MODIFY_PKT)
                        dr_ste_v0_arr_init_next(&last_ste,
                                                added_stes,
-                                               MLX5DR_STE_TYPE_RX,
+                                               DR_STE_TYPE_RX,
                                                attr->gvmi);
 
                dr_ste_v0_rx_set_flow_tag(last_ste, attr->flow_tag);
@@ -1157,6 +1172,7 @@ dr_ste_v0_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
                                   u8 *tag)
 {
        struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer;
+       struct mlx5dr_match_misc *misc = &value->misc;
 
        DR_STE_SET_TAG(eth_l4, tag, dst_port, spec, tcp_dport);
        DR_STE_SET_TAG(eth_l4, tag, src_port, spec, tcp_sport);
@@ -1168,6 +1184,11 @@ dr_ste_v0_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value,
        DR_STE_SET_TAG(eth_l4, tag, ecn, spec, ip_ecn);
        DR_STE_SET_TAG(eth_l4, tag, ipv6_hop_limit, spec, ttl_hoplimit);
 
+       if (sb->inner)
+               DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, inner_ipv6_flow_label);
+       else
+               DR_STE_SET_TAG(eth_l4, tag, flow_label, misc, outer_ipv6_flow_label);
+
        if (spec->tcp_flags) {
                DR_STE_SET_TCP_FLAGS(eth_l4, tag, spec);
                spec->tcp_flags = 0;
@@ -1772,7 +1793,7 @@ dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
 
 static int dr_ste_v0_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value,
                                                    struct mlx5dr_ste_build *sb,
-                                                   uint8_t *tag)
+                                                   u8 *tag)
 {
        struct mlx5dr_match_misc3 *misc3 = &value->misc3;
 
@@ -1802,7 +1823,7 @@ static void dr_ste_v0_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *s
 static int
 dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value,
                                           struct mlx5dr_ste_build *sb,
-                                          uint8_t *tag)
+                                          u8 *tag)
 {
        if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0))
                DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
@@ -1829,7 +1850,7 @@ dr_ste_v0_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
 static int
 dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value,
                                           struct mlx5dr_ste_build *sb,
-                                          uint8_t *tag)
+                                          u8 *tag)
 {
        if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0))
                DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
index 4aaca8e..b2481c9 100644 (file)
@@ -322,7 +322,7 @@ static void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size)
 }
 
 static void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type,
-                          u8 entry_type, u16 gvmi)
+                          bool is_rx, u16 gvmi)
 {
        dr_ste_v1_set_lu_type(hw_ste_p, lu_type);
        dr_ste_v1_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE);
@@ -402,8 +402,23 @@ static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action,
        dr_ste_v1_set_reparse(hw_ste_p);
 }
 
-static void dr_ste_v1_set_tx_push_vlan(u8 *hw_ste_p, u8 *d_action,
-                                      u32 vlan_hdr)
+static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action,
+                                    u8 anchor, u8 offset,
+                                    int size)
+{
+       MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
+                action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
+       MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_anchor, anchor);
+
+       /* The hardware expects here size and offset in words (2 byte) */
+       MLX5_SET(ste_single_action_remove_header_size_v1, s_action, remove_size, size / 2);
+       MLX5_SET(ste_single_action_remove_header_size_v1, s_action, start_offset, offset / 2);
+
+       dr_ste_v1_set_reparse(hw_ste_p);
+}
+
+static void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action,
+                                   u32 vlan_hdr)
 {
        MLX5_SET(ste_double_action_insert_with_inline_v1, d_action,
                 action_id, DR_STE_V1_ACTION_ID_INSERT_INLINE);
@@ -416,7 +431,7 @@ static void dr_ste_v1_set_tx_push_vlan(u8 *hw_ste_p, u8 *d_action,
        dr_ste_v1_set_reparse(hw_ste_p);
 }
 
-static void dr_ste_v1_set_rx_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num)
+static void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num)
 {
        MLX5_SET(ste_single_action_remove_header_size_v1, s_action,
                 action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
@@ -503,13 +518,28 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
 {
        u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action);
        u8 action_sz = DR_STE_ACTION_DOUBLE_SZ;
+       bool allow_modify_hdr = true;
        bool allow_encap = true;
 
+       if (action_type_set[DR_ACTION_TYP_POP_VLAN]) {
+               if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+                       dr_ste_v1_arr_init_next_match(&last_ste, added_stes,
+                                                     attr->gvmi);
+                       action = MLX5_ADDR_OF(ste_mask_and_match_v1,
+                                             last_ste, action);
+                       action_sz = DR_STE_ACTION_TRIPLE_SZ;
+               }
+               dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count);
+               action_sz -= DR_STE_ACTION_SINGLE_SZ;
+               action += DR_STE_ACTION_SINGLE_SZ;
+               allow_modify_hdr = false;
+       }
+
        if (action_type_set[DR_ACTION_TYP_CTR])
                dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
 
        if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) {
-               if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
+               if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
                        dr_ste_v1_arr_init_next_match(&last_ste, added_stes,
                                                      attr->gvmi);
                        action = MLX5_ADDR_OF(ste_mask_and_match_v1,
@@ -534,7 +564,8 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
                                action_sz = DR_STE_ACTION_TRIPLE_SZ;
                                allow_encap = true;
                        }
-                       dr_ste_v1_set_tx_push_vlan(last_ste, action, attr->vlans.headers[i]);
+                       dr_ste_v1_set_push_vlan(last_ste, action,
+                                               attr->vlans.headers[i]);
                        action_sz -= DR_STE_ACTION_DOUBLE_SZ;
                        action += DR_STE_ACTION_DOUBLE_SZ;
                }
@@ -579,6 +610,18 @@ static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn,
                                         attr->reformat.size);
                action_sz -= DR_STE_ACTION_DOUBLE_SZ;
                action += DR_STE_ACTION_DOUBLE_SZ;
+       } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) {
+               if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+                       dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+                       action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+                       action_sz = DR_STE_ACTION_TRIPLE_SZ;
+               }
+               dr_ste_v1_set_remove_hdr(last_ste, action,
+                                        attr->reformat.param_0,
+                                        attr->reformat.param_1,
+                                        attr->reformat.size);
+               action_sz -= DR_STE_ACTION_SINGLE_SZ;
+               action += DR_STE_ACTION_SINGLE_SZ;
        }
 
        dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
@@ -635,7 +678,7 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
                        allow_ctr = false;
                }
 
-               dr_ste_v1_set_rx_pop_vlan(last_ste, action, attr->vlans.count);
+               dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count);
                action_sz -= DR_STE_ACTION_SINGLE_SZ;
                action += DR_STE_ACTION_SINGLE_SZ;
        }
@@ -656,6 +699,26 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
                action += DR_STE_ACTION_DOUBLE_SZ;
        }
 
+       if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) {
+               int i;
+
+               for (i = 0; i < attr->vlans.count; i++) {
+                       if (action_sz < DR_STE_ACTION_DOUBLE_SZ ||
+                           !allow_modify_hdr) {
+                               dr_ste_v1_arr_init_next_match(&last_ste,
+                                                             added_stes,
+                                                             attr->gvmi);
+                               action = MLX5_ADDR_OF(ste_mask_and_match_v1,
+                                                     last_ste, action);
+                               action_sz = DR_STE_ACTION_TRIPLE_SZ;
+                       }
+                       dr_ste_v1_set_push_vlan(last_ste, action,
+                                               attr->vlans.headers[i]);
+                       action_sz -= DR_STE_ACTION_DOUBLE_SZ;
+                       action += DR_STE_ACTION_DOUBLE_SZ;
+               }
+       }
+
        if (action_type_set[DR_ACTION_TYP_CTR]) {
                /* Counter action set after decap and before insert_hdr
                 * to exclude decaped / encaped header respectively.
@@ -714,6 +777,20 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
                action_sz -= DR_STE_ACTION_DOUBLE_SZ;
                action += DR_STE_ACTION_DOUBLE_SZ;
                allow_modify_hdr = false;
+       } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) {
+               if (action_sz < DR_STE_ACTION_SINGLE_SZ) {
+                       dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
+                       action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
+                       action_sz = DR_STE_ACTION_TRIPLE_SZ;
+                       allow_modify_hdr = true;
+                       allow_ctr = true;
+               }
+               dr_ste_v1_set_remove_hdr(last_ste, action,
+                                        attr->reformat.param_0,
+                                        attr->reformat.param_1,
+                                        attr->reformat.size);
+               action_sz -= DR_STE_ACTION_SINGLE_SZ;
+               action += DR_STE_ACTION_SINGLE_SZ;
        }
 
        dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
@@ -1844,7 +1921,7 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
 
 static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value,
                                                    struct mlx5dr_ste_build *sb,
-                                                   uint8_t *tag)
+                                                   u8 *tag)
 {
        struct mlx5dr_match_misc3 *misc3 = &value->misc3;
 
@@ -1868,7 +1945,7 @@ static void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *s
 static int
 dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value,
                                           struct mlx5dr_ste_build *sb,
-                                          uint8_t *tag)
+                                          u8 *tag)
 {
        if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0))
                DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
@@ -1895,7 +1972,7 @@ dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
 static int
 dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value,
                                           struct mlx5dr_ste_build *sb,
-                                          uint8_t *tag)
+                                          u8 *tag)
 {
        if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0))
                DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
@@ -1960,7 +2037,9 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = {
        .set_byte_mask                  = &dr_ste_v1_set_byte_mask,
        .get_byte_mask                  = &dr_ste_v1_get_byte_mask,
        /* Actions */
-       .actions_caps                   = DR_STE_CTX_ACTION_CAP_RX_ENCAP,
+       .actions_caps                   = DR_STE_CTX_ACTION_CAP_TX_POP |
+                                         DR_STE_CTX_ACTION_CAP_RX_PUSH |
+                                         DR_STE_CTX_ACTION_CAP_RX_ENCAP,
        .set_actions_rx                 = &dr_ste_v1_set_actions_rx,
        .set_actions_tx                 = &dr_ste_v1_set_actions_tx,
        .modify_field_arr_sz            = ARRAY_SIZE(dr_ste_v1_action_modify_field_arr),
index f5e93fa..b20e8aa 100644 (file)
@@ -83,15 +83,14 @@ enum {
        DR_STE_SIZE_CTRL = 32,
        DR_STE_SIZE_TAG = 16,
        DR_STE_SIZE_MASK = 16,
-};
-
-enum {
        DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK,
 };
 
 enum mlx5dr_ste_ctx_action_cap {
        DR_STE_CTX_ACTION_CAP_NONE = 0,
-       DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 0,
+       DR_STE_CTX_ACTION_CAP_TX_POP   = 1 << 0,
+       DR_STE_CTX_ACTION_CAP_RX_PUSH  = 1 << 1,
+       DR_STE_CTX_ACTION_CAP_RX_ENCAP = 1 << 2,
 };
 
 enum {
@@ -124,6 +123,7 @@ enum mlx5dr_action_type {
        DR_ACTION_TYP_POP_VLAN,
        DR_ACTION_TYP_PUSH_VLAN,
        DR_ACTION_TYP_INSERT_HDR,
+       DR_ACTION_TYP_REMOVE_HDR,
        DR_ACTION_TYP_SAMPLER,
        DR_ACTION_TYP_MAX,
 };
@@ -140,6 +140,7 @@ struct mlx5dr_icm_buddy_mem;
 struct mlx5dr_ste_htbl;
 struct mlx5dr_match_param;
 struct mlx5dr_cmd_caps;
+struct mlx5dr_rule_rx_tx;
 struct mlx5dr_matcher_rx_tx;
 struct mlx5dr_ste_ctx;
 
@@ -151,14 +152,14 @@ struct mlx5dr_ste {
        /* attached to the miss_list head at each htbl entry */
        struct list_head miss_list_node;
 
-       /* each rule member that uses this ste attached here */
-       struct list_head rule_list;
-
        /* this ste is member of htbl */
        struct mlx5dr_ste_htbl *htbl;
 
        struct mlx5dr_ste_htbl *next_htbl;
 
+       /* The rule this STE belongs to */
+       struct mlx5dr_rule_rx_tx *rule_rx_tx;
+
        /* this ste is part of a rule, located in ste's chain */
        u8 ste_chain_location;
 };
@@ -171,8 +172,6 @@ struct mlx5dr_ste_htbl_ctrl {
 
        /* total number of collisions entries attached to this table */
        unsigned int num_of_collisions;
-       unsigned int increase_threshold;
-       u8 may_grow:1;
 };
 
 struct mlx5dr_ste_htbl {
@@ -804,10 +803,15 @@ struct mlx5dr_cmd_caps {
        u8 isolate_vl_tc:1;
 };
 
+enum mlx5dr_domain_nic_type {
+       DR_DOMAIN_NIC_TYPE_RX,
+       DR_DOMAIN_NIC_TYPE_TX,
+};
+
 struct mlx5dr_domain_rx_tx {
        u64 drop_icm_addr;
        u64 default_icm_addr;
-       enum mlx5dr_ste_entry_type ste_type;
+       enum mlx5dr_domain_nic_type type;
        struct mutex mutex; /* protect rx/tx domain */
 };
 
@@ -885,14 +889,6 @@ struct mlx5dr_matcher {
        struct mlx5dv_flow_matcher *dv_matcher;
 };
 
-struct mlx5dr_rule_member {
-       struct mlx5dr_ste *ste;
-       /* attached to mlx5dr_rule via this */
-       struct list_head list;
-       /* attached to mlx5dr_ste via this */
-       struct list_head use_ste_list;
-};
-
 struct mlx5dr_ste_action_modify_field {
        u16 hw_field;
        u8 start;
@@ -993,8 +989,8 @@ struct mlx5dr_htbl_connect_info {
 };
 
 struct mlx5dr_rule_rx_tx {
-       struct list_head rule_members_list;
        struct mlx5dr_matcher_rx_tx *nic_matcher;
+       struct mlx5dr_ste *last_rule_ste;
 };
 
 struct mlx5dr_rule {
@@ -1005,8 +1001,12 @@ struct mlx5dr_rule {
        u32 flow_source;
 };
 
-void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste,
-                                   struct mlx5dr_ste *ste);
+void mlx5dr_rule_set_last_member(struct mlx5dr_rule_rx_tx *nic_rule,
+                                struct mlx5dr_ste *ste,
+                                bool force);
+int mlx5dr_rule_get_reverse_rule_members(struct mlx5dr_ste **ste_arr,
+                                        struct mlx5dr_ste *curr_ste,
+                                        int *num_of_stes);
 
 struct mlx5dr_icm_chunk {
        struct mlx5dr_icm_buddy_mem *buddy_mem;
@@ -1083,6 +1083,25 @@ mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size,
        return entry_size * num_of_entries;
 }
 
+static inline int
+mlx5dr_ste_htbl_increase_threshold(struct mlx5dr_ste_htbl *htbl)
+{
+       int num_of_entries =
+               mlx5dr_icm_pool_chunk_size_to_entries(htbl->chunk_size);
+
+       /* Threshold is 50%, one is added to table of size 1 */
+       return (num_of_entries + 1) / 2;
+}
+
+static inline bool
+mlx5dr_ste_htbl_may_grow(struct mlx5dr_ste_htbl *htbl)
+{
+       if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask)
+               return false;
+
+       return true;
+}
+
 static inline struct mlx5dr_cmd_vport_cap *
 mlx5dr_get_vport_cap(struct mlx5dr_cmd_caps *caps, u32 vport)
 {
@@ -1216,7 +1235,7 @@ int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn,
                                      bool update_hw_ste);
 void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx,
                                  u16 gvmi,
-                                 struct mlx5dr_domain_rx_tx *nic_dmn,
+                                 enum mlx5dr_domain_nic_type nic_type,
                                  struct mlx5dr_ste_htbl *htbl,
                                  u8 *formatted_ste,
                                  struct mlx5dr_htbl_connect_info *connect_info);
@@ -1282,6 +1301,7 @@ struct mlx5dr_send_ring {
        u8 sync_buff[MIN_READ_SYNC];
        struct mlx5dr_mr *sync_mr;
        spinlock_t lock; /* Protect the data path of the send ring */
+       bool err_state; /* send_ring is not usable in err state */
 };
 
 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn);
@@ -1333,6 +1353,7 @@ struct mlx5dr_cmd_fte_info {
        u32 *val;
        struct mlx5_flow_act action;
        struct mlx5dr_cmd_flow_destination_hw_info *dest_arr;
+       bool ignore_flow_level;
 };
 
 int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
@@ -1362,7 +1383,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
                            int num_dest,
                            bool reformat_req,
                            u32 *tbl_id,
-                           u32 *group_id);
+                           u32 *group_id,
+                           bool ignore_flow_level);
 void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
                              u32 group_id);
 #endif  /* _DR_TYPES_H_ */
index d5926dd..7e58f4e 100644 (file)
@@ -133,6 +133,9 @@ static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns,
                                         struct mlx5_flow_table *ft,
                                         struct mlx5_flow_table *next_ft)
 {
+       if (mlx5_dr_is_fw_table(ft->flags))
+               return mlx5_fs_cmd_get_fw_cmds()->modify_flow_table(ns, ft, next_ft);
+
        return set_miss_action(ns, ft, next_ft);
 }
 
@@ -487,9 +490,13 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
 
                actions[num_actions++] = term_actions->dest;
        } else if (num_term_actions > 1) {
+               bool ignore_flow_level =
+                       !!(fte->action.flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
+
                tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
                                                                term_actions,
-                                                               num_term_actions);
+                                                               num_term_actions,
+                                                               ignore_flow_level);
                if (!tmp_action) {
                        err = -EOPNOTSUPP;
                        goto free_actions;
@@ -557,6 +564,9 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns
        case MLX5_REFORMAT_TYPE_INSERT_HDR:
                dr_reformat = DR_ACTION_REFORMAT_TYP_INSERT_HDR;
                break;
+       case MLX5_REFORMAT_TYPE_REMOVE_HDR:
+               dr_reformat = DR_ACTION_REFORMAT_TYP_REMOVE_HDR;
+               break;
        default:
                mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n",
                              params->type);
@@ -615,15 +625,6 @@ static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *n
        mlx5dr_action_destroy(modify_hdr->action.dr_action);
 }
 
-static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns,
-                                 struct mlx5_flow_table *ft,
-                                 struct mlx5_flow_group *group,
-                                 int modify_mask,
-                                 struct fs_fte *fte)
-{
-       return -EOPNOTSUPP;
-}
-
 static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
                                  struct mlx5_flow_table *ft,
                                  struct fs_fte *fte)
@@ -648,6 +649,36 @@ static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
        return 0;
 }
 
+static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns,
+                                 struct mlx5_flow_table *ft,
+                                 struct mlx5_flow_group *group,
+                                 int modify_mask,
+                                 struct fs_fte *fte)
+{
+       struct fs_fte fte_tmp = {};
+       int ret;
+
+       if (mlx5_dr_is_fw_table(ft->flags))
+               return mlx5_fs_cmd_get_fw_cmds()->update_fte(ns, ft, group, modify_mask, fte);
+
+       /* Backup current dr rule details */
+       fte_tmp.fs_dr_rule = fte->fs_dr_rule;
+       memset(&fte->fs_dr_rule, 0, sizeof(struct mlx5_fs_dr_rule));
+
+       /* First add the new updated rule, then delete the old rule */
+       ret = mlx5_cmd_dr_create_fte(ns, ft, group, fte);
+       if (ret)
+               goto restore_fte;
+
+       ret = mlx5_cmd_dr_delete_fte(ns, ft, &fte_tmp);
+       WARN_ONCE(ret, "dr update fte duplicate rule deletion failed\n");
+       return ret;
+
+restore_fte:
+       fte->fs_dr_rule = fte_tmp.fs_dr_rule;
+       return ret;
+}
+
 static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns,
                                struct mlx5_flow_root_namespace *peer_ns)
 {
index 9643ee6..d2a937f 100644 (file)
@@ -8,12 +8,6 @@ enum {
        MLX5DR_STE_LU_TYPE_DONT_CARE                    = 0x0f,
 };
 
-enum mlx5dr_ste_entry_type {
-       MLX5DR_STE_TYPE_TX              = 1,
-       MLX5DR_STE_TYPE_RX              = 2,
-       MLX5DR_STE_TYPE_MODIFY_PKT      = 6,
-};
-
 struct mlx5_ifc_ste_general_bits {
        u8         entry_type[0x4];
        u8         reserved_at_4[0x4];
index bbfe101..c5a8b16 100644 (file)
@@ -27,6 +27,7 @@ enum mlx5dr_action_reformat_type {
        DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2,
        DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3,
        DR_ACTION_REFORMAT_TYP_INSERT_HDR,
+       DR_ACTION_REFORMAT_TYP_REMOVE_HDR,
 };
 
 struct mlx5dr_match_parameters {
@@ -94,7 +95,8 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
 struct mlx5dr_action *
 mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
                                   struct mlx5dr_action_dest *dests,
-                                  u32 num_of_dests);
+                                  u32 num_of_dests,
+                                  bool ignore_flow_level);
 
 struct mlx5dr_action *mlx5dr_action_create_drop(void);
 
index a0a059e..3e85b17 100644 (file)
@@ -199,7 +199,7 @@ static int mlxbf_gige_stop(struct net_device *netdev)
        return 0;
 }
 
-static int mlxbf_gige_do_ioctl(struct net_device *netdev,
+static int mlxbf_gige_eth_ioctl(struct net_device *netdev,
                               struct ifreq *ifr, int cmd)
 {
        if (!(netif_running(netdev)))
@@ -253,7 +253,7 @@ static const struct net_device_ops mlxbf_gige_netdev_ops = {
        .ndo_start_xmit         = mlxbf_gige_start_xmit,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = mlxbf_gige_do_ioctl,
+       .ndo_eth_ioctl          = mlxbf_gige_eth_ioctl,
        .ndo_set_rx_mode        = mlxbf_gige_set_rx_mode,
        .ndo_get_stats64        = mlxbf_gige_get_stats64,
 };
@@ -269,9 +269,6 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
 {
        struct phy_device *phydev;
        struct net_device *netdev;
-       struct resource *mac_res;
-       struct resource *llu_res;
-       struct resource *plu_res;
        struct mlxbf_gige *priv;
        void __iomem *llu_base;
        void __iomem *plu_base;
@@ -280,27 +277,15 @@ static int mlxbf_gige_probe(struct platform_device *pdev)
        int addr;
        int err;
 
-       mac_res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_MAC);
-       if (!mac_res)
-               return -ENXIO;
-
-       base = devm_ioremap_resource(&pdev->dev, mac_res);
+       base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC);
        if (IS_ERR(base))
                return PTR_ERR(base);
 
-       llu_res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_LLU);
-       if (!llu_res)
-               return -ENXIO;
-
-       llu_base = devm_ioremap_resource(&pdev->dev, llu_res);
+       llu_base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_LLU);
        if (IS_ERR(llu_base))
                return PTR_ERR(llu_base);
 
-       plu_res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_PLU);
-       if (!plu_res)
-               return -ENXIO;
-
-       plu_base = devm_ioremap_resource(&pdev->dev, plu_res);
+       plu_base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_PLU);
        if (IS_ERR(plu_base))
                return PTR_ERR(plu_base);
 
index e32dd34..7905179 100644 (file)
@@ -145,14 +145,9 @@ static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add,
 int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv)
 {
        struct device *dev = &pdev->dev;
-       struct resource *res;
        int ret;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, MLXBF_GIGE_RES_MDIO9);
-       if (!res)
-               return -ENODEV;
-
-       priv->mdio_io = devm_ioremap_resource(dev, res);
+       priv->mdio_io = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MDIO9);
        if (IS_ERR(priv->mdio_io))
                return PTR_ERR(priv->mdio_io);
 
index 12871c8..d1ae248 100644 (file)
@@ -58,10 +58,10 @@ config MLXSW_SPECTRUM
        depends on NET_IPGRE || NET_IPGRE=n
        depends on IPV6_GRE || IPV6_GRE=n
        depends on VXLAN || VXLAN=n
+       depends on PTP_1588_CLOCK_OPTIONAL
        select GENERIC_ALLOCATOR
        select PARMAN
        select OBJAGG
-       imply PTP_1588_CLOCK
        select NET_PTP_CLASSIFY if PTP_1588_CLOCK
        default m
        help
index e775f08..f080fab 100644 (file)
@@ -1927,7 +1927,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 
        if (!reload) {
                alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size;
-               devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size);
+               devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size,
+                                       mlxsw_bus_info->dev);
                if (!devlink) {
                        err = -ENOMEM;
                        goto err_devlink_alloc;
@@ -1974,7 +1975,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
                goto err_emad_init;
 
        if (!reload) {
-               err = devlink_register(devlink, mlxsw_bus_info->dev);
+               err = devlink_register(devlink);
                if (err)
                        goto err_devlink_register;
        }
index 88699e6..250c5a2 100644 (file)
@@ -1207,7 +1207,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
        .ndo_vlan_rx_kill_vid   = mlxsw_sp_port_kill_vid,
        .ndo_set_features       = mlxsw_sp_set_features,
        .ndo_get_devlink_port   = mlxsw_sp_port_get_devlink_port,
-       .ndo_do_ioctl           = mlxsw_sp_port_ioctl,
+       .ndo_eth_ioctl          = mlxsw_sp_port_ioctl,
 };
 
 static int
@@ -2717,6 +2717,22 @@ mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
                                    unsigned long event, void *ptr);
 
+#define MLXSW_SP_DEFAULT_PARSING_DEPTH 96
+#define MLXSW_SP_INCREASED_PARSING_DEPTH 128
+#define MLXSW_SP_DEFAULT_VXLAN_UDP_DPORT 4789
+
+static void mlxsw_sp_parsing_init(struct mlxsw_sp *mlxsw_sp)
+{
+       mlxsw_sp->parsing.parsing_depth = MLXSW_SP_DEFAULT_PARSING_DEPTH;
+       mlxsw_sp->parsing.vxlan_udp_dport = MLXSW_SP_DEFAULT_VXLAN_UDP_DPORT;
+       mutex_init(&mlxsw_sp->parsing.lock);
+}
+
+static void mlxsw_sp_parsing_fini(struct mlxsw_sp *mlxsw_sp)
+{
+       mutex_destroy(&mlxsw_sp->parsing.lock);
+}
+
 static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                         const struct mlxsw_bus_info *mlxsw_bus_info,
                         struct netlink_ext_ack *extack)
@@ -2727,6 +2743,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
        mlxsw_sp->core = mlxsw_core;
        mlxsw_sp->bus_info = mlxsw_bus_info;
 
+       mlxsw_sp_parsing_init(mlxsw_sp);
        mlxsw_core_emad_string_tlv_enable(mlxsw_core);
 
        err = mlxsw_sp_base_mac_get(mlxsw_sp);
@@ -2926,6 +2943,7 @@ err_policers_init:
        mlxsw_sp_fids_fini(mlxsw_sp);
 err_fids_init:
        mlxsw_sp_kvdl_fini(mlxsw_sp);
+       mlxsw_sp_parsing_fini(mlxsw_sp);
        return err;
 }
 
@@ -3046,6 +3064,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
        mlxsw_sp_policers_fini(mlxsw_sp);
        mlxsw_sp_fids_fini(mlxsw_sp);
        mlxsw_sp_kvdl_fini(mlxsw_sp);
+       mlxsw_sp_parsing_fini(mlxsw_sp);
 }
 
 /* Per-FID flood tables are used for both "true" 802.1D FIDs and emulated
@@ -3611,6 +3630,69 @@ void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port)
        dev_put(mlxsw_sp_port->dev);
 }
 
+int mlxsw_sp_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp)
+{
+       char mprs_pl[MLXSW_REG_MPRS_LEN];
+       int err = 0;
+
+       mutex_lock(&mlxsw_sp->parsing.lock);
+
+       if (refcount_inc_not_zero(&mlxsw_sp->parsing.parsing_depth_ref))
+               goto out_unlock;
+
+       mlxsw_reg_mprs_pack(mprs_pl, MLXSW_SP_INCREASED_PARSING_DEPTH,
+                           mlxsw_sp->parsing.vxlan_udp_dport);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
+       if (err)
+               goto out_unlock;
+
+       mlxsw_sp->parsing.parsing_depth = MLXSW_SP_INCREASED_PARSING_DEPTH;
+       refcount_set(&mlxsw_sp->parsing.parsing_depth_ref, 1);
+
+out_unlock:
+       mutex_unlock(&mlxsw_sp->parsing.lock);
+       return err;
+}
+
+void mlxsw_sp_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp)
+{
+       char mprs_pl[MLXSW_REG_MPRS_LEN];
+
+       mutex_lock(&mlxsw_sp->parsing.lock);
+
+       if (!refcount_dec_and_test(&mlxsw_sp->parsing.parsing_depth_ref))
+               goto out_unlock;
+
+       mlxsw_reg_mprs_pack(mprs_pl, MLXSW_SP_DEFAULT_PARSING_DEPTH,
+                           mlxsw_sp->parsing.vxlan_udp_dport);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
+       mlxsw_sp->parsing.parsing_depth = MLXSW_SP_DEFAULT_PARSING_DEPTH;
+
+out_unlock:
+       mutex_unlock(&mlxsw_sp->parsing.lock);
+}
+
+int mlxsw_sp_parsing_vxlan_udp_dport_set(struct mlxsw_sp *mlxsw_sp,
+                                        __be16 udp_dport)
+{
+       char mprs_pl[MLXSW_REG_MPRS_LEN];
+       int err;
+
+       mutex_lock(&mlxsw_sp->parsing.lock);
+
+       mlxsw_reg_mprs_pack(mprs_pl, mlxsw_sp->parsing.parsing_depth,
+                           be16_to_cpu(udp_dport));
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
+       if (err)
+               goto out_unlock;
+
+       mlxsw_sp->parsing.vxlan_udp_dport = be16_to_cpu(udp_dport);
+
+out_unlock:
+       mutex_unlock(&mlxsw_sp->parsing.lock);
+       return err;
+}
+
 static void
 mlxsw_sp_port_lag_uppers_cleanup(struct mlxsw_sp_port *mlxsw_sp_port,
                                 struct net_device *lag_dev)
index f99db88..3a43cba 100644 (file)
@@ -148,6 +148,13 @@ struct mlxsw_sp_port_mapping {
        u8 lane;
 };
 
+struct mlxsw_sp_parsing {
+       refcount_t parsing_depth_ref;
+       u16 parsing_depth;
+       u16 vxlan_udp_dport;
+       struct mutex lock; /* Protects parsing configuration */
+};
+
 struct mlxsw_sp {
        struct mlxsw_sp_port **ports;
        struct mlxsw_core *core;
@@ -173,6 +180,7 @@ struct mlxsw_sp {
        struct mlxsw_sp_counter_pool *counter_pool;
        struct mlxsw_sp_span *span;
        struct mlxsw_sp_trap *trap;
+       struct mlxsw_sp_parsing parsing;
        const struct mlxsw_sp_switchdev_ops *switchdev_ops;
        const struct mlxsw_sp_kvdl_ops *kvdl_ops;
        const struct mlxsw_afa_ops *afa_ops;
@@ -652,6 +660,10 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev);
 struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
 void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port);
 struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev);
+int mlxsw_sp_parsing_depth_inc(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_parsing_depth_dec(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_parsing_vxlan_udp_dport_set(struct mlxsw_sp *mlxsw_sp,
+                                        __be16 udp_dport);
 
 /* spectrum_dcb.c */
 #ifdef CONFIG_MLXSW_SPECTRUM_DCB
index d8104fc..98d1fdc 100644 (file)
@@ -29,7 +29,6 @@ struct mlxsw_sp_nve {
        unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX];
        u32 tunnel_index;
        u16 ul_rif_index;       /* Reserved for Spectrum */
-       unsigned int inc_parsing_depth_refs;
 };
 
 struct mlxsw_sp_nve_ops {
index b84bb4b..d018d2d 100644 (file)
 #include "spectrum.h"
 #include "spectrum_nve.h"
 
-/* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B)
- *
- * In the worst case - where we have a VLAN tag on the outer Ethernet
- * header and IPv6 in overlay and underlay - we need to parse 128 bytes
- */
-#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128
-#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96
-
 #define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS     (VXLAN_F_UDP_ZERO_CSUM_TX | \
                                                 VXLAN_F_LEARN)
 
@@ -115,66 +107,6 @@ static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
        config->udp_dport = cfg->dst_port;
 }
 
-static int __mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
-                                     unsigned int parsing_depth,
-                                     __be16 udp_dport)
-{
-       char mprs_pl[MLXSW_REG_MPRS_LEN];
-
-       mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport));
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
-}
-
-static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
-                                   __be16 udp_dport)
-{
-       int parsing_depth = mlxsw_sp->nve->inc_parsing_depth_refs ?
-                               MLXSW_SP_NVE_VXLAN_PARSING_DEPTH :
-                               MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH;
-
-       return __mlxsw_sp_nve_parsing_set(mlxsw_sp, parsing_depth, udp_dport);
-}
-
-static int
-__mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp,
-                                    __be16 udp_dport)
-{
-       int err;
-
-       mlxsw_sp->nve->inc_parsing_depth_refs++;
-
-       err = mlxsw_sp_nve_parsing_set(mlxsw_sp, udp_dport);
-       if (err)
-               goto err_nve_parsing_set;
-       return 0;
-
-err_nve_parsing_set:
-       mlxsw_sp->nve->inc_parsing_depth_refs--;
-       return err;
-}
-
-static void
-__mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp,
-                                    __be16 udp_dport)
-{
-       mlxsw_sp->nve->inc_parsing_depth_refs--;
-       mlxsw_sp_nve_parsing_set(mlxsw_sp, udp_dport);
-}
-
-int mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp)
-{
-       __be16 udp_dport = mlxsw_sp->nve->config.udp_dport;
-
-       return __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, udp_dport);
-}
-
-void mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp)
-{
-       __be16 udp_dport = mlxsw_sp->nve->config.udp_dport;
-
-       __mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, udp_dport);
-}
-
 static void
 mlxsw_sp_nve_vxlan_config_prepare(char *tngcr_pl,
                                  const struct mlxsw_sp_nve_config *config)
@@ -238,10 +170,14 @@ static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve,
        struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
        int err;
 
-       err = __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, config->udp_dport);
+       err = mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, config->udp_dport);
        if (err)
                return err;
 
+       err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
+       if (err)
+               goto err_parsing_depth_inc;
+
        err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config);
        if (err)
                goto err_config_set;
@@ -263,7 +199,9 @@ err_promote_decap:
 err_rtdp_set:
        mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
 err_config_set:
-       __mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+       mlxsw_sp_parsing_depth_dec(mlxsw_sp);
+err_parsing_depth_inc:
+       mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0);
        return err;
 }
 
@@ -275,7 +213,8 @@ static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
        mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
                                         config->ul_proto, &config->ul_sip);
        mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
-       __mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+       mlxsw_sp_parsing_depth_dec(mlxsw_sp);
+       mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0);
 }
 
 static int
@@ -412,10 +351,14 @@ static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve,
        struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
        int err;
 
-       err = __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, config->udp_dport);
+       err = mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, config->udp_dport);
        if (err)
                return err;
 
+       err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
+       if (err)
+               goto err_parsing_depth_inc;
+
        err = mlxsw_sp2_nve_vxlan_config_set(mlxsw_sp, config);
        if (err)
                goto err_config_set;
@@ -438,7 +381,9 @@ err_promote_decap:
 err_rtdp_set:
        mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp);
 err_config_set:
-       __mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+       mlxsw_sp_parsing_depth_dec(mlxsw_sp);
+err_parsing_depth_inc:
+       mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0);
        return err;
 }
 
@@ -450,7 +395,8 @@ static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
        mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
                                         config->ul_proto, &config->ul_sip);
        mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp);
-       __mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
+       mlxsw_sp_parsing_depth_dec(mlxsw_sp);
+       mlxsw_sp_parsing_vxlan_udp_dport_set(mlxsw_sp, 0);
 }
 
 const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = {
index bfef65d..1a18038 100644 (file)
@@ -975,14 +975,14 @@ static int mlxsw_sp1_ptp_mtpppc_update(struct mlxsw_sp_port *mlxsw_sp_port,
        }
 
        if ((ing_types || egr_types) && !(orig_ing_types || orig_egr_types)) {
-               err = mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp);
+               err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
                if (err) {
                        netdev_err(mlxsw_sp_port->dev, "Failed to increase parsing depth");
                        return err;
                }
        }
        if (!(ing_types || egr_types) && (orig_ing_types || orig_egr_types))
-               mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp);
+               mlxsw_sp_parsing_depth_dec(mlxsw_sp);
 
        return mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp_port->mlxsw_sp,
                                       ing_types, egr_types);
index f69cbb3..19bb3ca 100644 (file)
@@ -9484,6 +9484,7 @@ struct mlxsw_sp_mp_hash_config {
        DECLARE_BITMAP(fields, __MLXSW_REG_RECR2_FIELD_CNT);
        DECLARE_BITMAP(inner_headers, __MLXSW_REG_RECR2_HEADER_CNT);
        DECLARE_BITMAP(inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT);
+       bool inc_parsing_depth;
 };
 
 #define MLXSW_SP_MP_HASH_HEADER_SET(_headers, _header) \
@@ -9654,6 +9655,7 @@ static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
                MLXSW_SP_MP_HASH_FIELD_SET(fields, IPV6_FLOW_LABEL);
                /* Inner */
                mlxsw_sp_mp_hash_inner_l3(config);
+               config->inc_parsing_depth = true;
                break;
        case 3:
                /* Outer */
@@ -9678,22 +9680,53 @@ static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp,
                        MLXSW_SP_MP_HASH_FIELD_SET(fields, TCP_UDP_DPORT);
                /* Inner */
                mlxsw_sp_mp_hash_inner_custom(config, hash_fields);
+               if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)
+                       config->inc_parsing_depth = true;
                break;
        }
 }
 
+static int mlxsw_sp_mp_hash_parsing_depth_adjust(struct mlxsw_sp *mlxsw_sp,
+                                                bool old_inc_parsing_depth,
+                                                bool new_inc_parsing_depth)
+{
+       int err;
+
+       if (!old_inc_parsing_depth && new_inc_parsing_depth) {
+               err = mlxsw_sp_parsing_depth_inc(mlxsw_sp);
+               if (err)
+                       return err;
+               mlxsw_sp->router->inc_parsing_depth = true;
+       } else if (old_inc_parsing_depth && !new_inc_parsing_depth) {
+               mlxsw_sp_parsing_depth_dec(mlxsw_sp);
+               mlxsw_sp->router->inc_parsing_depth = false;
+       }
+
+       return 0;
+}
+
 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
 {
+       bool old_inc_parsing_depth, new_inc_parsing_depth;
        struct mlxsw_sp_mp_hash_config config = {};
        char recr2_pl[MLXSW_REG_RECR2_LEN];
        unsigned long bit;
        u32 seed;
+       int err;
 
        seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
        mlxsw_reg_recr2_pack(recr2_pl, seed);
        mlxsw_sp_mp4_hash_init(mlxsw_sp, &config);
        mlxsw_sp_mp6_hash_init(mlxsw_sp, &config);
 
+       old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
+       new_inc_parsing_depth = config.inc_parsing_depth;
+       err = mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp,
+                                                   old_inc_parsing_depth,
+                                                   new_inc_parsing_depth);
+       if (err)
+               return err;
+
        for_each_set_bit(bit, config.headers, __MLXSW_REG_RECR2_HEADER_CNT)
                mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, bit, 1);
        for_each_set_bit(bit, config.fields, __MLXSW_REG_RECR2_FIELD_CNT)
@@ -9703,7 +9736,16 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
        for_each_set_bit(bit, config.inner_fields, __MLXSW_REG_RECR2_INNER_FIELD_CNT)
                mlxsw_reg_recr2_inner_header_fields_enable_set(recr2_pl, bit, 1);
 
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
+       if (err)
+               goto err_reg_write;
+
+       return 0;
+
+err_reg_write:
+       mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, new_inc_parsing_depth,
+                                             old_inc_parsing_depth);
+       return err;
 }
 #else
 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
index c5d7007..25d3eae 100644 (file)
@@ -81,6 +81,7 @@ struct mlxsw_sp_router {
        size_t adj_grp_size_ranges_count;
        struct delayed_work nh_grp_activity_dw;
        struct list_head nh_res_grp_list;
+       bool inc_parsing_depth;
 };
 
 struct mlxsw_sp_fib_entry_priv {
index 8f90cd3..22fede5 100644 (file)
@@ -335,14 +335,16 @@ mlxsw_sp_bridge_port_find(struct mlxsw_sp_bridge *bridge,
 
 static struct mlxsw_sp_bridge_port *
 mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device,
-                           struct net_device *brport_dev)
+                           struct net_device *brport_dev,
+                           struct netlink_ext_ack *extack)
 {
        struct mlxsw_sp_bridge_port *bridge_port;
        struct mlxsw_sp_port *mlxsw_sp_port;
+       int err;
 
        bridge_port = kzalloc(sizeof(*bridge_port), GFP_KERNEL);
        if (!bridge_port)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(brport_dev);
        bridge_port->lagged = mlxsw_sp_port->lagged;
@@ -359,12 +361,23 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device,
        list_add(&bridge_port->list, &bridge_device->ports_list);
        bridge_port->ref_count = 1;
 
+       err = switchdev_bridge_port_offload(brport_dev, mlxsw_sp_port->dev,
+                                           NULL, NULL, NULL, false, extack);
+       if (err)
+               goto err_switchdev_offload;
+
        return bridge_port;
+
+err_switchdev_offload:
+       list_del(&bridge_port->list);
+       kfree(bridge_port);
+       return ERR_PTR(err);
 }
 
 static void
 mlxsw_sp_bridge_port_destroy(struct mlxsw_sp_bridge_port *bridge_port)
 {
+       switchdev_bridge_port_unoffload(bridge_port->dev, NULL, NULL, NULL);
        list_del(&bridge_port->list);
        WARN_ON(!list_empty(&bridge_port->vlans_list));
        kfree(bridge_port);
@@ -390,9 +403,10 @@ mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge,
        if (IS_ERR(bridge_device))
                return ERR_CAST(bridge_device);
 
-       bridge_port = mlxsw_sp_bridge_port_create(bridge_device, brport_dev);
-       if (!bridge_port) {
-               err = -ENOMEM;
+       bridge_port = mlxsw_sp_bridge_port_create(bridge_device, brport_dev,
+                                                 extack);
+       if (IS_ERR(bridge_port)) {
+               err = PTR_ERR(bridge_port);
                goto err_bridge_port_create;
        }
 
@@ -1569,7 +1583,6 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
 {
        long *flood_bitmap;
        int num_of_ports;
-       int alloc_size;
        u16 mid_idx;
        int err;
 
@@ -1579,18 +1592,17 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
                return false;
 
        num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core);
-       alloc_size = sizeof(long) * BITS_TO_LONGS(num_of_ports);
-       flood_bitmap = kzalloc(alloc_size, GFP_KERNEL);
+       flood_bitmap = bitmap_alloc(num_of_ports, GFP_KERNEL);
        if (!flood_bitmap)
                return false;
 
-       bitmap_copy(flood_bitmap,  mid->ports_in_mid, num_of_ports);
+       bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports);
        mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp);
 
        mid->mid = mid_idx;
        err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap,
                                            bridge_device->mrouter);
-       kfree(flood_bitmap);
+       bitmap_free(flood_bitmap);
        if (err)
                return false;
 
index 8315184..3f69bb5 100644 (file)
@@ -689,7 +689,7 @@ static int ks8851_net_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 static const struct net_device_ops ks8851_netdev_ops = {
        .ndo_open               = ks8851_net_open,
        .ndo_stop               = ks8851_net_stop,
-       .ndo_do_ioctl           = ks8851_net_ioctl,
+       .ndo_eth_ioctl          = ks8851_net_ioctl,
        .ndo_start_xmit         = ks8851_start_xmit,
        .ndo_set_mac_address    = ks8851_set_mac_address,
        .ndo_set_rx_mode        = ks8851_set_rx_mode,
index 7945eb5..a0ee155 100644 (file)
@@ -6738,7 +6738,7 @@ static const struct net_device_ops netdev_ops = {
        .ndo_set_features       = netdev_set_features,
        .ndo_set_mac_address    = netdev_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = netdev_ioctl,
+       .ndo_eth_ioctl          = netdev_ioctl,
        .ndo_set_rx_mode        = netdev_set_rx_mode,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = netdev_netpoll,
index d54aa16..735eea1 100644 (file)
@@ -45,6 +45,7 @@ config ENCX24J600
 config LAN743X
        tristate "LAN743x support"
        depends on PCI
+       depends on PTP_1588_CLOCK_OPTIONAL
        select PHYLIB
        select CRC16
        select CRC32
index dae1032..9e8561c 100644 (file)
@@ -2655,7 +2655,7 @@ static const struct net_device_ops lan743x_netdev_ops = {
        .ndo_open               = lan743x_netdev_open,
        .ndo_stop               = lan743x_netdev_close,
        .ndo_start_xmit         = lan743x_netdev_xmit_frame,
-       .ndo_do_ioctl           = lan743x_netdev_ioctl,
+       .ndo_eth_ioctl          = lan743x_netdev_ioctl,
        .ndo_set_rx_mode        = lan743x_netdev_set_multicast,
        .ndo_change_mtu         = lan743x_netdev_change_mtu,
        .ndo_get_stats64        = lan743x_netdev_get_stats64,
index faa8f07..c271e86 100644 (file)
@@ -7,4 +7,4 @@ obj-$(CONFIG_SPARX5_SWITCH) += sparx5-switch.o
 
 sparx5-switch-objs  := sparx5_main.o sparx5_packet.o \
  sparx5_netdev.o sparx5_phylink.o sparx5_port.o sparx5_mactable.o sparx5_vlan.o \
- sparx5_switchdev.o sparx5_calendar.o sparx5_ethtool.o
+ sparx5_switchdev.o sparx5_calendar.o sparx5_ethtool.o sparx5_fdma.o
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c
new file mode 100644 (file)
index 0000000..7436f62
--- /dev/null
@@ -0,0 +1,593 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Microchip Sparx5 Switch driver
+ *
+ * Copyright (c) 2021 Microchip Technology Inc. and its subsidiaries.
+ *
+ * The Sparx5 Chip Register Model can be browsed at this location:
+ * https://github.com/microchip-ung/sparx-5_reginfo
+ */
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/interrupt.h>
+#include <linux/ip.h>
+#include <linux/dma-mapping.h>
+
+#include "sparx5_main_regs.h"
+#include "sparx5_main.h"
+#include "sparx5_port.h"
+
+#define FDMA_XTR_CHANNEL               6
+#define FDMA_INJ_CHANNEL               0
+
+#define FDMA_DCB_INFO_DATAL(x)         ((x) & GENMASK(15, 0))
+#define FDMA_DCB_INFO_TOKEN            BIT(17)
+#define FDMA_DCB_INFO_INTR             BIT(18)
+#define FDMA_DCB_INFO_SW(x)            (((x) << 24) & GENMASK(31, 24))
+
+#define FDMA_DCB_STATUS_BLOCKL(x)      ((x) & GENMASK(15, 0))
+#define FDMA_DCB_STATUS_SOF            BIT(16)
+#define FDMA_DCB_STATUS_EOF            BIT(17)
+#define FDMA_DCB_STATUS_INTR           BIT(18)
+#define FDMA_DCB_STATUS_DONE           BIT(19)
+#define FDMA_DCB_STATUS_BLOCKO(x)      (((x) << 20) & GENMASK(31, 20))
+#define FDMA_DCB_INVALID_DATA          0x1
+
+#define FDMA_XTR_BUFFER_SIZE           2048
+#define FDMA_WEIGHT                    4
+
+/* Frame DMA DCB format
+ *
+ * +---------------------------+
+ * |         Next Ptr          |
+ * +---------------------------+
+ * |   Reserved  |    Info     |
+ * +---------------------------+
+ * |         Data0 Ptr         |
+ * +---------------------------+
+ * |   Reserved  |    Status0  |
+ * +---------------------------+
+ * |         Data1 Ptr         |
+ * +---------------------------+
+ * |   Reserved  |    Status1  |
+ * +---------------------------+
+ * |         Data2 Ptr         |
+ * +---------------------------+
+ * |   Reserved  |    Status2  |
+ * |-------------|-------------|
+ * |                           |
+ * |                           |
+ * |                           |
+ * |                           |
+ * |                           |
+ * |---------------------------|
+ * |         Data14 Ptr        |
+ * +-------------|-------------+
+ * |   Reserved  |    Status14 |
+ * +-------------|-------------+
+ */
+
+/* For each hardware DB there is an entry in this list and when the HW DB
+ * entry is used, this SW DB entry is moved to the back of the list
+ */
+struct sparx5_db {
+       struct list_head list;
+       void *cpu_addr;
+};
+
+static void sparx5_fdma_rx_add_dcb(struct sparx5_rx *rx,
+                                  struct sparx5_rx_dcb_hw *dcb,
+                                  u64 nextptr)
+{
+       int idx = 0;
+
+       /* Reset the status of the DB */
+       for (idx = 0; idx < FDMA_RX_DCB_MAX_DBS; ++idx) {
+               struct sparx5_db_hw *db = &dcb->db[idx];
+
+               db->status = FDMA_DCB_STATUS_INTR;
+       }
+       dcb->nextptr = FDMA_DCB_INVALID_DATA;
+       dcb->info = FDMA_DCB_INFO_DATAL(FDMA_XTR_BUFFER_SIZE);
+       rx->last_entry->nextptr = nextptr;
+       rx->last_entry = dcb;
+}
+
+static void sparx5_fdma_tx_add_dcb(struct sparx5_tx *tx,
+                                  struct sparx5_tx_dcb_hw *dcb,
+                                  u64 nextptr)
+{
+       int idx = 0;
+
+       /* Reset the status of the DB */
+       for (idx = 0; idx < FDMA_TX_DCB_MAX_DBS; ++idx) {
+               struct sparx5_db_hw *db = &dcb->db[idx];
+
+               db->status = FDMA_DCB_STATUS_DONE;
+       }
+       dcb->nextptr = FDMA_DCB_INVALID_DATA;
+       dcb->info = FDMA_DCB_INFO_DATAL(FDMA_XTR_BUFFER_SIZE);
+}
+
+static void sparx5_fdma_rx_activate(struct sparx5 *sparx5, struct sparx5_rx *rx)
+{
+       /* Write the buffer address in the LLP and LLP1 regs */
+       spx5_wr(((u64)rx->dma) & GENMASK(31, 0), sparx5,
+               FDMA_DCB_LLP(rx->channel_id));
+       spx5_wr(((u64)rx->dma) >> 32, sparx5, FDMA_DCB_LLP1(rx->channel_id));
+
+       /* Set the number of RX DBs to be used, and DB end-of-frame interrupt */
+       spx5_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_RX_DCB_MAX_DBS) |
+               FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) |
+               FDMA_CH_CFG_CH_INJ_PORT_SET(XTR_QUEUE),
+               sparx5, FDMA_CH_CFG(rx->channel_id));
+
+       /* Set the RX Watermark to max */
+       spx5_rmw(FDMA_XTR_CFG_XTR_FIFO_WM_SET(31), FDMA_XTR_CFG_XTR_FIFO_WM,
+                sparx5,
+                FDMA_XTR_CFG);
+
+       /* Start RX fdma */
+       spx5_rmw(FDMA_PORT_CTRL_XTR_STOP_SET(0), FDMA_PORT_CTRL_XTR_STOP,
+                sparx5, FDMA_PORT_CTRL(0));
+
+       /* Enable RX channel DB interrupt */
+       spx5_rmw(BIT(rx->channel_id),
+                BIT(rx->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
+                sparx5, FDMA_INTR_DB_ENA);
+
+       /* Activate the RX channel */
+       spx5_wr(BIT(rx->channel_id), sparx5, FDMA_CH_ACTIVATE);
+}
+
+static void sparx5_fdma_rx_deactivate(struct sparx5 *sparx5, struct sparx5_rx *rx)
+{
+       /* Dectivate the RX channel */
+       spx5_rmw(0, BIT(rx->channel_id) & FDMA_CH_ACTIVATE_CH_ACTIVATE,
+                sparx5, FDMA_CH_ACTIVATE);
+
+       /* Disable RX channel DB interrupt */
+       spx5_rmw(0, BIT(rx->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
+                sparx5, FDMA_INTR_DB_ENA);
+
+       /* Stop RX fdma */
+       spx5_rmw(FDMA_PORT_CTRL_XTR_STOP_SET(1), FDMA_PORT_CTRL_XTR_STOP,
+                sparx5, FDMA_PORT_CTRL(0));
+}
+
+static void sparx5_fdma_tx_activate(struct sparx5 *sparx5, struct sparx5_tx *tx)
+{
+       /* Write the buffer address in the LLP and LLP1 regs */
+       spx5_wr(((u64)tx->dma) & GENMASK(31, 0), sparx5,
+               FDMA_DCB_LLP(tx->channel_id));
+       spx5_wr(((u64)tx->dma) >> 32, sparx5, FDMA_DCB_LLP1(tx->channel_id));
+
+       /* Set the number of TX DBs to be used, and DB end-of-frame interrupt */
+       spx5_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_TX_DCB_MAX_DBS) |
+               FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) |
+               FDMA_CH_CFG_CH_INJ_PORT_SET(INJ_QUEUE),
+               sparx5, FDMA_CH_CFG(tx->channel_id));
+
+       /* Start TX fdma */
+       spx5_rmw(FDMA_PORT_CTRL_INJ_STOP_SET(0), FDMA_PORT_CTRL_INJ_STOP,
+                sparx5, FDMA_PORT_CTRL(0));
+
+       /* Activate the channel */
+       spx5_wr(BIT(tx->channel_id), sparx5, FDMA_CH_ACTIVATE);
+}
+
+static void sparx5_fdma_tx_deactivate(struct sparx5 *sparx5, struct sparx5_tx *tx)
+{
+       /* Disable the channel */
+       spx5_rmw(0, BIT(tx->channel_id) & FDMA_CH_ACTIVATE_CH_ACTIVATE,
+                sparx5, FDMA_CH_ACTIVATE);
+}
+
+static void sparx5_fdma_rx_reload(struct sparx5 *sparx5, struct sparx5_rx *rx)
+{
+       /* Reload the RX channel */
+       spx5_wr(BIT(rx->channel_id), sparx5, FDMA_CH_RELOAD);
+}
+
+static void sparx5_fdma_tx_reload(struct sparx5 *sparx5, struct sparx5_tx *tx)
+{
+       /* Reload the TX channel */
+       spx5_wr(BIT(tx->channel_id), sparx5, FDMA_CH_RELOAD);
+}
+
+static struct sk_buff *sparx5_fdma_rx_alloc_skb(struct sparx5_rx *rx)
+{
+       return __netdev_alloc_skb(rx->ndev, FDMA_XTR_BUFFER_SIZE,
+                                 GFP_ATOMIC);
+}
+
+static bool sparx5_fdma_rx_get_frame(struct sparx5 *sparx5, struct sparx5_rx *rx)
+{
+       struct sparx5_db_hw *db_hw;
+       unsigned int packet_size;
+       struct sparx5_port *port;
+       struct sk_buff *new_skb;
+       struct frame_info fi;
+       struct sk_buff *skb;
+       dma_addr_t dma_addr;
+
+       /* Check if the DCB is done */
+       db_hw = &rx->dcb_entries[rx->dcb_index].db[rx->db_index];
+       if (unlikely(!(db_hw->status & FDMA_DCB_STATUS_DONE)))
+               return false;
+       skb = rx->skb[rx->dcb_index][rx->db_index];
+       /* Replace the DB entry with a new SKB */
+       new_skb = sparx5_fdma_rx_alloc_skb(rx);
+       if (unlikely(!new_skb))
+               return false;
+       /* Map the new skb data and set the new skb */
+       dma_addr = virt_to_phys(new_skb->data);
+       rx->skb[rx->dcb_index][rx->db_index] = new_skb;
+       db_hw->dataptr = dma_addr;
+       packet_size = FDMA_DCB_STATUS_BLOCKL(db_hw->status);
+       skb_put(skb, packet_size);
+       /* Now do the normal processing of the skb */
+       sparx5_ifh_parse((u32 *)skb->data, &fi);
+       /* Map to port netdev */
+       port = fi.src_port < SPX5_PORTS ?  sparx5->ports[fi.src_port] : NULL;
+       if (!port || !port->ndev) {
+               dev_err(sparx5->dev, "Data on inactive port %d\n", fi.src_port);
+               sparx5_xtr_flush(sparx5, XTR_QUEUE);
+               return false;
+       }
+       skb->dev = port->ndev;
+       skb_pull(skb, IFH_LEN * sizeof(u32));
+       if (likely(!(skb->dev->features & NETIF_F_RXFCS)))
+               skb_trim(skb, skb->len - ETH_FCS_LEN);
+       skb->protocol = eth_type_trans(skb, skb->dev);
+       /* Everything we see on an interface that is in the HW bridge
+        * has already been forwarded
+        */
+       if (test_bit(port->portno, sparx5->bridge_mask))
+               skb->offload_fwd_mark = 1;
+       skb->dev->stats.rx_bytes += skb->len;
+       skb->dev->stats.rx_packets++;
+       rx->packets++;
+       netif_receive_skb(skb);
+       return true;
+}
+
+static int sparx5_fdma_napi_callback(struct napi_struct *napi, int weight)
+{
+       struct sparx5_rx *rx = container_of(napi, struct sparx5_rx, napi);
+       struct sparx5 *sparx5 = container_of(rx, struct sparx5, rx);
+       int counter = 0;
+
+       while (counter < weight && sparx5_fdma_rx_get_frame(sparx5, rx)) {
+               struct sparx5_rx_dcb_hw *old_dcb;
+
+               rx->db_index++;
+               counter++;
+               /* Check if the DCB can be reused */
+               if (rx->db_index != FDMA_RX_DCB_MAX_DBS)
+                       continue;
+               /* As the DCB  can be reused, just advance the dcb_index
+                * pointer and set the nextptr in the DCB
+                */
+               rx->db_index = 0;
+               old_dcb = &rx->dcb_entries[rx->dcb_index];
+               rx->dcb_index++;
+               rx->dcb_index &= FDMA_DCB_MAX - 1;
+               sparx5_fdma_rx_add_dcb(rx, old_dcb,
+                                      rx->dma +
+                                      ((unsigned long)old_dcb -
+                                       (unsigned long)rx->dcb_entries));
+       }
+       if (counter < weight) {
+               napi_complete_done(&rx->napi, counter);
+               spx5_rmw(BIT(rx->channel_id),
+                        BIT(rx->channel_id) & FDMA_INTR_DB_ENA_INTR_DB_ENA,
+                        sparx5, FDMA_INTR_DB_ENA);
+       }
+       if (counter)
+               sparx5_fdma_rx_reload(sparx5, rx);
+       return counter;
+}
+
+static struct sparx5_tx_dcb_hw *sparx5_fdma_next_dcb(struct sparx5_tx *tx,
+                                                    struct sparx5_tx_dcb_hw *dcb)
+{
+       struct sparx5_tx_dcb_hw *next_dcb;
+
+       next_dcb = dcb;
+       next_dcb++;
+       /* Handle wrap-around */
+       if ((unsigned long)next_dcb >=
+           ((unsigned long)tx->first_entry + FDMA_DCB_MAX * sizeof(*dcb)))
+               next_dcb = tx->first_entry;
+       return next_dcb;
+}
+
+int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb)
+{
+       struct sparx5_tx_dcb_hw *next_dcb_hw;
+       struct sparx5_tx *tx = &sparx5->tx;
+       static bool first_time = true;
+       struct sparx5_db_hw *db_hw;
+       struct sparx5_db *db;
+
+       next_dcb_hw = sparx5_fdma_next_dcb(tx, tx->curr_entry);
+       db_hw = &next_dcb_hw->db[0];
+       if (!(db_hw->status & FDMA_DCB_STATUS_DONE))
+               tx->dropped++;
+       db = list_first_entry(&tx->db_list, struct sparx5_db, list);
+       list_move_tail(&db->list, &tx->db_list);
+       next_dcb_hw->nextptr = FDMA_DCB_INVALID_DATA;
+       tx->curr_entry->nextptr = tx->dma +
+               ((unsigned long)next_dcb_hw -
+                (unsigned long)tx->first_entry);
+       tx->curr_entry = next_dcb_hw;
+       memset(db->cpu_addr, 0, FDMA_XTR_BUFFER_SIZE);
+       memcpy(db->cpu_addr, ifh, IFH_LEN * 4);
+       memcpy(db->cpu_addr + IFH_LEN * 4, skb->data, skb->len);
+       db_hw->status = FDMA_DCB_STATUS_SOF |
+                       FDMA_DCB_STATUS_EOF |
+                       FDMA_DCB_STATUS_BLOCKO(0) |
+                       FDMA_DCB_STATUS_BLOCKL(skb->len + IFH_LEN * 4 + 4);
+       if (first_time) {
+               sparx5_fdma_tx_activate(sparx5, tx);
+               first_time = false;
+       } else {
+               sparx5_fdma_tx_reload(sparx5, tx);
+       }
+       return NETDEV_TX_OK;
+}
+
+static int sparx5_fdma_rx_alloc(struct sparx5 *sparx5)
+{
+       struct sparx5_rx *rx = &sparx5->rx;
+       struct sparx5_rx_dcb_hw *dcb;
+       int idx, jdx;
+       int size;
+
+       size = sizeof(struct sparx5_rx_dcb_hw) * FDMA_DCB_MAX;
+       size = ALIGN(size, PAGE_SIZE);
+       rx->dcb_entries = devm_kzalloc(sparx5->dev, size, GFP_KERNEL);
+       if (!rx->dcb_entries)
+               return -ENOMEM;
+       rx->dma = virt_to_phys(rx->dcb_entries);
+       rx->last_entry = rx->dcb_entries;
+       rx->db_index = 0;
+       rx->dcb_index = 0;
+       /* Now for each dcb allocate the db */
+       for (idx = 0; idx < FDMA_DCB_MAX; ++idx) {
+               dcb = &rx->dcb_entries[idx];
+               dcb->info = 0;
+               /* For each db allocate an skb and map skb data pointer to the DB
+                * dataptr. In this way when the frame is received the skb->data
+                * will contain the frame, so no memcpy is needed
+                */
+               for (jdx = 0; jdx < FDMA_RX_DCB_MAX_DBS; ++jdx) {
+                       struct sparx5_db_hw *db_hw = &dcb->db[jdx];
+                       dma_addr_t dma_addr;
+                       struct sk_buff *skb;
+
+                       skb = sparx5_fdma_rx_alloc_skb(rx);
+                       if (!skb)
+                               return -ENOMEM;
+
+                       dma_addr = virt_to_phys(skb->data);
+                       db_hw->dataptr = dma_addr;
+                       db_hw->status = 0;
+                       rx->skb[idx][jdx] = skb;
+               }
+               sparx5_fdma_rx_add_dcb(rx, dcb, rx->dma + sizeof(*dcb) * idx);
+       }
+       netif_napi_add(rx->ndev, &rx->napi, sparx5_fdma_napi_callback, FDMA_WEIGHT);
+       napi_enable(&rx->napi);
+       sparx5_fdma_rx_activate(sparx5, rx);
+       return 0;
+}
+
+static int sparx5_fdma_tx_alloc(struct sparx5 *sparx5)
+{
+       struct sparx5_tx *tx = &sparx5->tx;
+       struct sparx5_tx_dcb_hw *dcb;
+       int idx, jdx;
+       int size;
+
+       size = sizeof(struct sparx5_tx_dcb_hw) * FDMA_DCB_MAX;
+       size = ALIGN(size, PAGE_SIZE);
+       tx->curr_entry = devm_kzalloc(sparx5->dev, size, GFP_KERNEL);
+       if (!tx->curr_entry)
+               return -ENOMEM;
+       tx->dma = virt_to_phys(tx->curr_entry);
+       tx->first_entry = tx->curr_entry;
+       INIT_LIST_HEAD(&tx->db_list);
+       /* Now for each dcb allocate the db */
+       for (idx = 0; idx < FDMA_DCB_MAX; ++idx) {
+               dcb = &tx->curr_entry[idx];
+               dcb->info = 0;
+               /* TX databuffers must be 16byte aligned */
+               for (jdx = 0; jdx < FDMA_TX_DCB_MAX_DBS; ++jdx) {
+                       struct sparx5_db_hw *db_hw = &dcb->db[jdx];
+                       struct sparx5_db *db;
+                       dma_addr_t phys;
+                       void *cpu_addr;
+
+                       cpu_addr = devm_kzalloc(sparx5->dev,
+                                               FDMA_XTR_BUFFER_SIZE,
+                                               GFP_KERNEL);
+                       if (!cpu_addr)
+                               return -ENOMEM;
+                       phys = virt_to_phys(cpu_addr);
+                       db_hw->dataptr = phys;
+                       db_hw->status = 0;
+                       db = devm_kzalloc(sparx5->dev, sizeof(*db), GFP_KERNEL);
+                       db->cpu_addr = cpu_addr;
+                       list_add_tail(&db->list, &tx->db_list);
+               }
+               sparx5_fdma_tx_add_dcb(tx, dcb, tx->dma + sizeof(*dcb) * idx);
+               /* Let the curr_entry to point to the last allocated entry */
+               if (idx == FDMA_DCB_MAX - 1)
+                       tx->curr_entry = dcb;
+       }
+       return 0;
+}
+
+static void sparx5_fdma_rx_init(struct sparx5 *sparx5,
+                               struct sparx5_rx *rx, int channel)
+{
+       int idx;
+
+       rx->channel_id = channel;
+       /* Fetch a netdev for SKB and NAPI use, any will do */
+       for (idx = 0; idx < SPX5_PORTS; ++idx) {
+               struct sparx5_port *port = sparx5->ports[idx];
+
+               if (port && port->ndev) {
+                       rx->ndev = port->ndev;
+                       break;
+               }
+       }
+}
+
+static void sparx5_fdma_tx_init(struct sparx5 *sparx5,
+                               struct sparx5_tx *tx, int channel)
+{
+       tx->channel_id = channel;
+}
+
+irqreturn_t sparx5_fdma_handler(int irq, void *args)
+{
+       struct sparx5 *sparx5 = args;
+       u32 db = 0, err = 0;
+
+       db = spx5_rd(sparx5, FDMA_INTR_DB);
+       err = spx5_rd(sparx5, FDMA_INTR_ERR);
+       /* Clear interrupt */
+       if (db) {
+               spx5_wr(0, sparx5, FDMA_INTR_DB_ENA);
+               spx5_wr(db, sparx5, FDMA_INTR_DB);
+               napi_schedule(&sparx5->rx.napi);
+       }
+       if (err) {
+               u32 err_type = spx5_rd(sparx5, FDMA_ERRORS);
+
+               dev_err_ratelimited(sparx5->dev,
+                                   "ERR: int: %#x, type: %#x\n",
+                                   err, err_type);
+               spx5_wr(err, sparx5, FDMA_INTR_ERR);
+               spx5_wr(err_type, sparx5, FDMA_ERRORS);
+       }
+       return IRQ_HANDLED;
+}
+
+static void sparx5_fdma_injection_mode(struct sparx5 *sparx5)
+{
+       const int byte_swap = 1;
+       int portno;
+       int urgency;
+
+       /* Change mode to fdma extraction and injection */
+       spx5_wr(QS_XTR_GRP_CFG_MODE_SET(2) |
+               QS_XTR_GRP_CFG_STATUS_WORD_POS_SET(1) |
+               QS_XTR_GRP_CFG_BYTE_SWAP_SET(byte_swap),
+               sparx5, QS_XTR_GRP_CFG(XTR_QUEUE));
+       spx5_wr(QS_INJ_GRP_CFG_MODE_SET(2) |
+               QS_INJ_GRP_CFG_BYTE_SWAP_SET(byte_swap),
+               sparx5, QS_INJ_GRP_CFG(INJ_QUEUE));
+
+       /* CPU ports capture setup */
+       for (portno = SPX5_PORT_CPU_0; portno <= SPX5_PORT_CPU_1; portno++) {
+               /* ASM CPU port: No preamble, IFH, enable padding */
+               spx5_wr(ASM_PORT_CFG_PAD_ENA_SET(1) |
+                       ASM_PORT_CFG_NO_PREAMBLE_ENA_SET(1) |
+                       ASM_PORT_CFG_INJ_FORMAT_CFG_SET(1), /* 1 = IFH */
+                       sparx5, ASM_PORT_CFG(portno));
+
+               /* Reset WM cnt to unclog queued frames */
+               spx5_rmw(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR_SET(1),
+                        DSM_DEV_TX_STOP_WM_CFG_DEV_TX_CNT_CLR,
+                        sparx5,
+                        DSM_DEV_TX_STOP_WM_CFG(portno));
+
+               /* Set Disassembler Stop Watermark level */
+               spx5_rmw(DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM_SET(100),
+                        DSM_DEV_TX_STOP_WM_CFG_DEV_TX_STOP_WM,
+                        sparx5,
+                        DSM_DEV_TX_STOP_WM_CFG(portno));
+
+               /* Enable port in queue system */
+               urgency = sparx5_port_fwd_urg(sparx5, SPEED_2500);
+               spx5_rmw(QFWD_SWITCH_PORT_MODE_PORT_ENA_SET(1) |
+                        QFWD_SWITCH_PORT_MODE_FWD_URGENCY_SET(urgency),
+                        QFWD_SWITCH_PORT_MODE_PORT_ENA |
+                        QFWD_SWITCH_PORT_MODE_FWD_URGENCY,
+                        sparx5,
+                        QFWD_SWITCH_PORT_MODE(portno));
+
+               /* Disable Disassembler buffer underrun watchdog
+                * to avoid truncated packets in XTR
+                */
+               spx5_rmw(DSM_BUF_CFG_UNDERFLOW_WATCHDOG_DIS_SET(1),
+                        DSM_BUF_CFG_UNDERFLOW_WATCHDOG_DIS,
+                        sparx5,
+                        DSM_BUF_CFG(portno));
+
+               /* Disabling frame aging */
+               spx5_rmw(HSCH_PORT_MODE_AGE_DIS_SET(1),
+                        HSCH_PORT_MODE_AGE_DIS,
+                        sparx5,
+                        HSCH_PORT_MODE(portno));
+       }
+}
+
+int sparx5_fdma_start(struct sparx5 *sparx5)
+{
+       int err;
+
+       /* Reset FDMA state */
+       spx5_wr(FDMA_CTRL_NRESET_SET(0), sparx5, FDMA_CTRL);
+       spx5_wr(FDMA_CTRL_NRESET_SET(1), sparx5, FDMA_CTRL);
+
+       /* Force ACP caching but disable read/write allocation */
+       spx5_rmw(CPU_PROC_CTRL_ACP_CACHE_FORCE_ENA_SET(1) |
+                CPU_PROC_CTRL_ACP_AWCACHE_SET(0) |
+                CPU_PROC_CTRL_ACP_ARCACHE_SET(0),
+                CPU_PROC_CTRL_ACP_CACHE_FORCE_ENA |
+                CPU_PROC_CTRL_ACP_AWCACHE |
+                CPU_PROC_CTRL_ACP_ARCACHE,
+                sparx5, CPU_PROC_CTRL);
+
+       sparx5_fdma_injection_mode(sparx5);
+       sparx5_fdma_rx_init(sparx5, &sparx5->rx, FDMA_XTR_CHANNEL);
+       sparx5_fdma_tx_init(sparx5, &sparx5->tx, FDMA_INJ_CHANNEL);
+       err = sparx5_fdma_rx_alloc(sparx5);
+       if (err) {
+               dev_err(sparx5->dev, "Could not allocate RX buffers: %d\n", err);
+               return err;
+       }
+       err = sparx5_fdma_tx_alloc(sparx5);
+       if (err) {
+               dev_err(sparx5->dev, "Could not allocate TX buffers: %d\n", err);
+               return err;
+       }
+       return err;
+}
+
+static u32 sparx5_fdma_port_ctrl(struct sparx5 *sparx5)
+{
+       return spx5_rd(sparx5, FDMA_PORT_CTRL(0));
+}
+
+int sparx5_fdma_stop(struct sparx5 *sparx5)
+{
+       u32 val;
+
+       napi_disable(&sparx5->rx.napi);
+       /* Stop the fdma and channel interrupts */
+       sparx5_fdma_rx_deactivate(sparx5, &sparx5->rx);
+       sparx5_fdma_tx_deactivate(sparx5, &sparx5->tx);
+       /* Wait for the RX channel to stop */
+       read_poll_timeout(sparx5_fdma_port_ctrl, val,
+                         FDMA_PORT_CTRL_XTR_BUF_IS_EMPTY_GET(val) == 0,
+                         500, 10000, 0, sparx5);
+       return 0;
+}
index f666133..cbece6e 100644 (file)
@@ -640,8 +640,23 @@ static int sparx5_start(struct sparx5 *sparx5)
        sparx5_board_init(sparx5);
        err = sparx5_register_notifier_blocks(sparx5);
 
-       /* Start register based INJ/XTR */
+       /* Start Frame DMA with fallback to register based INJ/XTR */
        err = -ENXIO;
+       if (sparx5->fdma_irq >= 0) {
+               if (GCB_CHIP_ID_REV_ID_GET(sparx5->chip_id) > 0)
+                       err = devm_request_threaded_irq(sparx5->dev,
+                                                       sparx5->fdma_irq,
+                                                       NULL,
+                                                       sparx5_fdma_handler,
+                                                       IRQF_ONESHOT,
+                                                       "sparx5-fdma", sparx5);
+               if (!err)
+                       err = sparx5_fdma_start(sparx5);
+               if (err)
+                       sparx5->fdma_irq = -ENXIO;
+       } else {
+               sparx5->fdma_irq = -ENXIO;
+       }
        if (err && sparx5->xtr_irq >= 0) {
                err = devm_request_irq(sparx5->dev, sparx5->xtr_irq,
                                       sparx5_xtr_handler, IRQF_SHARED,
@@ -766,6 +781,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev)
                sparx5->base_mac[5] = 0;
        }
 
+       sparx5->fdma_irq = platform_get_irq_byname(sparx5->pdev, "fdma");
        sparx5->xtr_irq = platform_get_irq_byname(sparx5->pdev, "xtr");
 
        /* Read chip ID to check CPU interface */
@@ -824,6 +840,11 @@ static int mchp_sparx5_remove(struct platform_device *pdev)
                disable_irq(sparx5->xtr_irq);
                sparx5->xtr_irq = -ENXIO;
        }
+       if (sparx5->fdma_irq) {
+               disable_irq(sparx5->fdma_irq);
+               sparx5->fdma_irq = -ENXIO;
+       }
+       sparx5_fdma_stop(sparx5);
        sparx5_cleanup_ports(sparx5);
        /* Unregister netdevs */
        sparx5_unregister_notifier_blocks(sparx5);
index 4d5f44c..a1acc9b 100644 (file)
@@ -73,8 +73,61 @@ enum sparx5_vlan_port_type {
 #define XTR_QUEUE     0
 #define INJ_QUEUE     0
 
+#define FDMA_DCB_MAX                   64
+#define FDMA_RX_DCB_MAX_DBS            15
+#define FDMA_TX_DCB_MAX_DBS            1
+
 struct sparx5;
 
+struct sparx5_db_hw {
+       u64 dataptr;
+       u64 status;
+};
+
+struct sparx5_rx_dcb_hw {
+       u64 nextptr;
+       u64 info;
+       struct sparx5_db_hw db[FDMA_RX_DCB_MAX_DBS];
+};
+
+struct sparx5_tx_dcb_hw {
+       u64 nextptr;
+       u64 info;
+       struct sparx5_db_hw db[FDMA_TX_DCB_MAX_DBS];
+};
+
+/* Frame DMA receive state:
+ * For each DB, there is a SKB, and the skb data pointer is mapped in
+ * the DB. Once a frame is received the skb is given to the upper layers
+ * and a new skb is added to the dcb.
+ * When the db_index reached FDMA_RX_DCB_MAX_DBS the DB is reused.
+ */
+struct sparx5_rx {
+       struct sparx5_rx_dcb_hw *dcb_entries;
+       struct sparx5_rx_dcb_hw *last_entry;
+       struct sk_buff *skb[FDMA_DCB_MAX][FDMA_RX_DCB_MAX_DBS];
+       int db_index;
+       int dcb_index;
+       dma_addr_t dma;
+       struct napi_struct napi;
+       u32 channel_id;
+       struct net_device *ndev;
+       u64 packets;
+};
+
+/* Frame DMA transmit state:
+ * DCBs are chained using the DCBs nextptr field.
+ */
+struct sparx5_tx {
+       struct sparx5_tx_dcb_hw *curr_entry;
+       struct sparx5_tx_dcb_hw *first_entry;
+       struct list_head db_list;
+       dma_addr_t dma;
+       u32 channel_id;
+       u64 packets;
+       u64 dropped;
+};
+
 struct sparx5_port_config {
        phy_interface_t portmode;
        u32 bandwidth;
@@ -167,6 +220,10 @@ struct sparx5 {
        bool sd_sgpio_remapping;
        /* Register based inj/xtr */
        int xtr_irq;
+       /* Frame DMA */
+       int fdma_irq;
+       struct sparx5_rx rx;
+       struct sparx5_tx tx;
 };
 
 /* sparx5_switchdev.c */
@@ -174,11 +231,23 @@ int sparx5_register_notifier_blocks(struct sparx5 *sparx5);
 void sparx5_unregister_notifier_blocks(struct sparx5 *sparx5);
 
 /* sparx5_packet.c */
+struct frame_info {
+       int src_port;
+};
+
+void sparx5_xtr_flush(struct sparx5 *sparx5, u8 grp);
+void sparx5_ifh_parse(u32 *ifh, struct frame_info *info);
 irqreturn_t sparx5_xtr_handler(int irq, void *_priv);
 int sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev);
 int sparx5_manual_injection_mode(struct sparx5 *sparx5);
 void sparx5_port_inj_timer_setup(struct sparx5_port *port);
 
+/* sparx5_fdma.c */
+int sparx5_fdma_start(struct sparx5 *sparx5);
+int sparx5_fdma_stop(struct sparx5 *sparx5);
+int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb);
+irqreturn_t sparx5_fdma_handler(int irq, void *args);
+
 /* sparx5_mactable.c */
 void sparx5_mact_pull_work(struct work_struct *work);
 int sparx5_mact_learn(struct sparx5 *sparx5, int port,
index 09ca7a3..dc7e5ea 100644 (file)
 
 #define INJ_TIMEOUT_NS 50000
 
-struct frame_info {
-       int src_port;
-};
-
-static void sparx5_xtr_flush(struct sparx5 *sparx5, u8 grp)
+void sparx5_xtr_flush(struct sparx5 *sparx5, u8 grp)
 {
        /* Start flush */
        spx5_wr(QS_XTR_FLUSH_FLUSH_SET(BIT(grp)), sparx5, QS_XTR_FLUSH);
@@ -36,7 +32,7 @@ static void sparx5_xtr_flush(struct sparx5 *sparx5, u8 grp)
        spx5_wr(0, sparx5, QS_XTR_FLUSH);
 }
 
-static void sparx5_ifh_parse(u32 *ifh, struct frame_info *info)
+void sparx5_ifh_parse(u32 *ifh, struct frame_info *info)
 {
        u8 *xtr_hdr = (u8 *)ifh;
 
@@ -224,7 +220,10 @@ int sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev)
        struct sparx5 *sparx5 = port->sparx5;
        int ret;
 
-       ret = sparx5_inject(sparx5, port->ifh, skb, dev);
+       if (sparx5->fdma_irq > 0)
+               ret = sparx5_fdma_xmit(sparx5, port->ifh, skb);
+       else
+               ret = sparx5_inject(sparx5, port->ifh, skb, dev);
 
        if (ret == NETDEV_TX_OK) {
                stats->tx_bytes += skb->len;
index d2e3250..189a6a0 100644 (file)
@@ -596,7 +596,7 @@ static int sparx5_port_max_tags_set(struct sparx5 *sparx5,
        return 0;
 }
 
-static int sparx5_port_fwd_urg(struct sparx5 *sparx5, u32 speed)
+int sparx5_port_fwd_urg(struct sparx5 *sparx5, u32 speed)
 {
        u32 clk_period_ps = 1600; /* 625Mhz for now */
        u32 urg = 672000;
index fd05ab6..2f8043e 100644 (file)
@@ -89,5 +89,6 @@ int sparx5_get_port_status(struct sparx5 *sparx5,
                           struct sparx5_port_status *status);
 
 void sparx5_port_enable(struct sparx5_port *port, bool enable);
+int sparx5_port_fwd_urg(struct sparx5 *sparx5, u32 speed);
 
 #endif /* __SPARX5_PORT_H__ */
index a72e3b3..649ca60 100644 (file)
@@ -93,9 +93,12 @@ static int sparx5_port_attr_set(struct net_device *dev, const void *ctx,
 }
 
 static int sparx5_port_bridge_join(struct sparx5_port *port,
-                                  struct net_device *bridge)
+                                  struct net_device *bridge,
+                                  struct netlink_ext_ack *extack)
 {
        struct sparx5 *sparx5 = port->sparx5;
+       struct net_device *ndev = port->ndev;
+       int err;
 
        if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS))
                /* First bridged port */
@@ -109,12 +112,21 @@ static int sparx5_port_bridge_join(struct sparx5_port *port,
 
        set_bit(port->portno, sparx5->bridge_mask);
 
+       err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
+                                           false, extack);
+       if (err)
+               goto err_switchdev_offload;
+
        /* Port enters in bridge mode therefor don't need to copy to CPU
         * frames for multicast in case the bridge is not requesting them
         */
-       __dev_mc_unsync(port->ndev, sparx5_mc_unsync);
+       __dev_mc_unsync(ndev, sparx5_mc_unsync);
 
        return 0;
+
+err_switchdev_offload:
+       clear_bit(port->portno, sparx5->bridge_mask);
+       return err;
 }
 
 static void sparx5_port_bridge_leave(struct sparx5_port *port,
@@ -122,6 +134,8 @@ static void sparx5_port_bridge_leave(struct sparx5_port *port,
 {
        struct sparx5 *sparx5 = port->sparx5;
 
+       switchdev_bridge_port_unoffload(port->ndev, NULL, NULL, NULL);
+
        clear_bit(port->portno, sparx5->bridge_mask);
        if (bitmap_empty(sparx5->bridge_mask, SPX5_PORTS))
                sparx5->hw_bridge_dev = NULL;
@@ -139,11 +153,15 @@ static int sparx5_port_changeupper(struct net_device *dev,
                                   struct netdev_notifier_changeupper_info *info)
 {
        struct sparx5_port *port = netdev_priv(dev);
+       struct netlink_ext_ack *extack;
        int err = 0;
 
+       extack = netdev_notifier_info_to_extack(&info->info);
+
        if (netif_is_bridge_master(info->upper_dev)) {
                if (info->linking)
-                       err = sparx5_port_bridge_join(port, info->upper_dev);
+                       err = sparx5_port_bridge_join(port, info->upper_dev,
+                                                     extack);
                else
                        sparx5_port_bridge_leave(port, info->upper_dev);
 
index 33e53d3..41ecd15 100644 (file)
@@ -239,10 +239,8 @@ struct gdma_event {
 
 struct gdma_queue;
 
-#define CQE_POLLING_BUFFER 512
 struct mana_eq {
        struct gdma_queue *eq;
-       struct gdma_comp cqe_poll[CQE_POLLING_BUFFER];
 };
 
 typedef void gdma_eq_callback(void *context, struct gdma_queue *q,
@@ -291,11 +289,6 @@ struct gdma_queue {
                        unsigned int msix_index;
 
                        u32 log2_throttle_limit;
-
-                       /* NAPI data */
-                       struct napi_struct napi;
-                       int work_done;
-                       int budget;
                } eq;
 
                struct {
@@ -319,9 +312,6 @@ struct gdma_queue_spec {
                        void *context;
 
                        unsigned long log2_throttle_limit;
-
-                       /* Only used by the MANA device. */
-                       struct net_device *ndev;
                } eq;
 
                struct {
@@ -406,7 +396,7 @@ void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue);
 
 int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe);
 
-void mana_gd_arm_cq(struct gdma_queue *cq);
+void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit);
 
 struct gdma_wqe {
        u32 reserved    :24;
@@ -496,16 +486,28 @@ enum {
        GDMA_PROTOCOL_LAST      = GDMA_PROTOCOL_V1,
 };
 
+#define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0)
+
+#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT
+
+#define GDMA_DRV_CAP_FLAGS2 0
+
+#define GDMA_DRV_CAP_FLAGS3 0
+
+#define GDMA_DRV_CAP_FLAGS4 0
+
 struct gdma_verify_ver_req {
        struct gdma_req_hdr hdr;
 
        /* Mandatory fields required for protocol establishment */
        u64 protocol_ver_min;
        u64 protocol_ver_max;
-       u64 drv_cap_flags1;
-       u64 drv_cap_flags2;
-       u64 drv_cap_flags3;
-       u64 drv_cap_flags4;
+
+       /* Gdma Driver Capability Flags */
+       u64 gd_drv_cap_flags1;
+       u64 gd_drv_cap_flags2;
+       u64 gd_drv_cap_flags3;
+       u64 gd_drv_cap_flags4;
 
        /* Advisory fields */
        u64 drv_ver;
index 2f87bf9..cee75b5 100644 (file)
@@ -67,6 +67,10 @@ static int mana_gd_query_max_resources(struct pci_dev *pdev)
        if (gc->max_num_queues > resp.max_rq)
                gc->max_num_queues = resp.max_rq;
 
+       /* The Hardware Channel (HWC) used 1 MSI-X */
+       if (gc->max_num_queues > gc->num_msix_usable - 1)
+               gc->max_num_queues = gc->num_msix_usable - 1;
+
        return 0;
 }
 
@@ -267,7 +271,7 @@ void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue)
                              queue->id, queue->head * GDMA_WQE_BU_SIZE, 1);
 }
 
-void mana_gd_arm_cq(struct gdma_queue *cq)
+void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
 {
        struct gdma_context *gc = cq->gdma_dev->gdma_context;
 
@@ -276,7 +280,7 @@ void mana_gd_arm_cq(struct gdma_queue *cq)
        u32 head = cq->head % (num_cqe << GDMA_CQE_OWNER_BITS);
 
        mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id,
-                             head, SET_ARM_BIT);
+                             head, arm_bit);
 }
 
 static void mana_gd_process_eqe(struct gdma_queue *eq)
@@ -339,7 +343,6 @@ static void mana_gd_process_eq_events(void *arg)
        struct gdma_queue *eq = arg;
        struct gdma_context *gc;
        struct gdma_eqe *eqe;
-       unsigned int arm_bit;
        u32 head, num_eqe;
        int i;
 
@@ -370,92 +373,54 @@ static void mana_gd_process_eq_events(void *arg)
                eq->head++;
        }
 
-       /* Always rearm the EQ for HWC. For MANA, rearm it when NAPI is done. */
-       if (mana_gd_is_hwc(eq->gdma_dev)) {
-               arm_bit = SET_ARM_BIT;
-       } else if (eq->eq.work_done < eq->eq.budget &&
-                  napi_complete_done(&eq->eq.napi, eq->eq.work_done)) {
-               arm_bit = SET_ARM_BIT;
-       } else {
-               arm_bit = 0;
-       }
-
        head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
 
        mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id,
-                             head, arm_bit);
-}
-
-static int mana_poll(struct napi_struct *napi, int budget)
-{
-       struct gdma_queue *eq = container_of(napi, struct gdma_queue, eq.napi);
-
-       eq->eq.work_done = 0;
-       eq->eq.budget = budget;
-
-       mana_gd_process_eq_events(eq);
-
-       return min(eq->eq.work_done, budget);
-}
-
-static void mana_gd_schedule_napi(void *arg)
-{
-       struct gdma_queue *eq = arg;
-       struct napi_struct *napi;
-
-       napi = &eq->eq.napi;
-       napi_schedule_irqoff(napi);
+                             head, SET_ARM_BIT);
 }
 
 static int mana_gd_register_irq(struct gdma_queue *queue,
                                const struct gdma_queue_spec *spec)
 {
        struct gdma_dev *gd = queue->gdma_dev;
-       bool is_mana = mana_gd_is_mana(gd);
        struct gdma_irq_context *gic;
        struct gdma_context *gc;
        struct gdma_resource *r;
        unsigned int msi_index;
        unsigned long flags;
-       int err;
+       struct device *dev;
+       int err = 0;
 
        gc = gd->gdma_context;
        r = &gc->msix_resource;
+       dev = gc->dev;
 
        spin_lock_irqsave(&r->lock, flags);
 
        msi_index = find_first_zero_bit(r->map, r->size);
-       if (msi_index >= r->size) {
+       if (msi_index >= r->size || msi_index >= gc->num_msix_usable) {
                err = -ENOSPC;
        } else {
                bitmap_set(r->map, msi_index, 1);
                queue->eq.msix_index = msi_index;
-               err = 0;
        }
 
        spin_unlock_irqrestore(&r->lock, flags);
 
-       if (err)
-               return err;
+       if (err) {
+               dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
+                       err, msi_index, r->size, gc->num_msix_usable);
 
-       WARN_ON(msi_index >= gc->num_msix_usable);
+               return err;
+       }
 
        gic = &gc->irq_contexts[msi_index];
 
-       if (is_mana) {
-               netif_napi_add(spec->eq.ndev, &queue->eq.napi, mana_poll,
-                              NAPI_POLL_WEIGHT);
-               napi_enable(&queue->eq.napi);
-       }
-
        WARN_ON(gic->handler || gic->arg);
 
        gic->arg = queue;
 
-       if (is_mana)
-               gic->handler = mana_gd_schedule_napi;
-       else
-               gic->handler = mana_gd_process_eq_events;
+       gic->handler = mana_gd_process_eq_events;
 
        return 0;
 }
@@ -549,11 +514,6 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
 
        mana_gd_deregiser_irq(queue);
 
-       if (mana_gd_is_mana(queue->gdma_dev)) {
-               napi_disable(&queue->eq.napi);
-               netif_napi_del(&queue->eq.napi);
-       }
-
        if (queue->eq.disable_needed)
                mana_gd_disable_queue(queue);
 }
@@ -883,6 +843,11 @@ int mana_gd_verify_vf_version(struct pci_dev *pdev)
        req.protocol_ver_min = GDMA_PROTOCOL_FIRST;
        req.protocol_ver_max = GDMA_PROTOCOL_LAST;
 
+       req.gd_drv_cap_flags1 = GDMA_DRV_CAP_FLAGS1;
+       req.gd_drv_cap_flags2 = GDMA_DRV_CAP_FLAGS2;
+       req.gd_drv_cap_flags3 = GDMA_DRV_CAP_FLAGS3;
+       req.gd_drv_cap_flags4 = GDMA_DRV_CAP_FLAGS4;
+
        err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
        if (err || resp.hdr.status) {
                dev_err(gc->dev, "VfVerifyVersionOutput: %d, status=0x%x\n",
@@ -1128,7 +1093,7 @@ static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp)
 
        new_bits = (cq->head / num_cqe) & GDMA_CQE_OWNER_MASK;
        /* Return -1 if overflow detected. */
-       if (owner_bits != new_bits)
+       if (WARN_ON_ONCE(owner_bits != new_bits))
                return -1;
 
        comp->wq_num = cqe->cqe_info.wq_num;
@@ -1201,10 +1166,8 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
        if (max_queues_per_port > MANA_MAX_NUM_QUEUES)
                max_queues_per_port = MANA_MAX_NUM_QUEUES;
 
-       max_irqs = max_queues_per_port * MAX_PORTS_IN_MANA_DEV;
-
        /* Need 1 interrupt for the Hardware communication Channel (HWC) */
-       max_irqs++;
+       max_irqs = max_queues_per_port + 1;
 
        nvec = pci_alloc_irq_vectors(pdev, 2, max_irqs, PCI_IRQ_MSIX);
        if (nvec < 0)
@@ -1291,6 +1254,9 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        int bar = 0;
        int err;
 
+       /* Each port has 2 CQs, each CQ has at most 1 EQE at a time */
+       BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE);
+
        err = pci_enable_device(pdev);
        if (err)
                return -ENXIO;
index 1a923fd..c1310ea 100644 (file)
@@ -304,7 +304,7 @@ static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self)
                                                &comp_data);
        }
 
-       mana_gd_arm_cq(q_self);
+       mana_gd_ring_cq(q_self, SET_ARM_BIT);
 }
 
 static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq)
index a2c3f82..fc98a5b 100644 (file)
@@ -46,7 +46,7 @@ enum TRI_STATE {
 #define EQ_SIZE (8 * PAGE_SIZE)
 #define LOG2_EQ_THROTTLE 3
 
-#define MAX_PORTS_IN_MANA_DEV 16
+#define MAX_PORTS_IN_MANA_DEV 256
 
 struct mana_stats {
        u64 packets;
@@ -225,6 +225,8 @@ struct mana_tx_comp_oob {
 
 struct mana_rxq;
 
+#define CQE_POLLING_BUFFER 512
+
 struct mana_cq {
        struct gdma_queue *gdma_cq;
 
@@ -244,8 +246,13 @@ struct mana_cq {
         */
        struct mana_txq *txq;
 
-       /* Pointer to a buffer which the CQ handler can copy the CQE's into. */
-       struct gdma_comp *gdma_comp_buf;
+       /* Buffer which the CQ handler can copy the CQE's into. */
+       struct gdma_comp gdma_comp_buf[CQE_POLLING_BUFFER];
+
+       /* NAPI data */
+       struct napi_struct napi;
+       int work_done;
+       int budget;
 };
 
 #define GDMA_MAX_RQE_SGES 15
@@ -315,6 +322,8 @@ struct mana_context {
 
        u16 num_ports;
 
+       struct mana_eq *eqs;
+
        struct net_device *ports[MAX_PORTS_IN_MANA_DEV];
 };
 
@@ -324,8 +333,6 @@ struct mana_port_context {
 
        u8 mac_addr[ETH_ALEN];
 
-       struct mana_eq *eqs;
-
        enum TRI_STATE rss_state;
 
        mana_handle_t default_rxobj;
@@ -395,11 +402,11 @@ enum mana_command_code {
 struct mana_query_device_cfg_req {
        struct gdma_req_hdr hdr;
 
-       /* Driver Capability flags */
-       u64 drv_cap_flags1;
-       u64 drv_cap_flags2;
-       u64 drv_cap_flags3;
-       u64 drv_cap_flags4;
+       /* MANA Nic Driver Capability flags */
+       u64 mn_drv_cap_flags1;
+       u64 mn_drv_cap_flags2;
+       u64 mn_drv_cap_flags3;
+       u64 mn_drv_cap_flags4;
 
        u32 proto_major_ver;
        u32 proto_minor_ver;
@@ -516,7 +523,7 @@ struct mana_cfg_rx_steer_resp {
        struct gdma_resp_hdr hdr;
 }; /* HW DATA */
 
-#define MANA_MAX_NUM_QUEUES 16
+#define MANA_MAX_NUM_QUEUES 64
 
 #define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1)
 
index fff7890..1b21030 100644 (file)
@@ -696,66 +696,56 @@ static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
                           resp.hdr.status);
 }
 
-static void mana_init_cqe_poll_buf(struct gdma_comp *cqe_poll_buf)
-{
-       int i;
-
-       for (i = 0; i < CQE_POLLING_BUFFER; i++)
-               memset(&cqe_poll_buf[i], 0, sizeof(struct gdma_comp));
-}
-
-static void mana_destroy_eq(struct gdma_context *gc,
-                           struct mana_port_context *apc)
+static void mana_destroy_eq(struct mana_context *ac)
 {
+       struct gdma_context *gc = ac->gdma_dev->gdma_context;
        struct gdma_queue *eq;
        int i;
 
-       if (!apc->eqs)
+       if (!ac->eqs)
                return;
 
-       for (i = 0; i < apc->num_queues; i++) {
-               eq = apc->eqs[i].eq;
+       for (i = 0; i < gc->max_num_queues; i++) {
+               eq = ac->eqs[i].eq;
                if (!eq)
                        continue;
 
                mana_gd_destroy_queue(gc, eq);
        }
 
-       kfree(apc->eqs);
-       apc->eqs = NULL;
+       kfree(ac->eqs);
+       ac->eqs = NULL;
 }
 
-static int mana_create_eq(struct mana_port_context *apc)
+static int mana_create_eq(struct mana_context *ac)
 {
-       struct gdma_dev *gd = apc->ac->gdma_dev;
+       struct gdma_dev *gd = ac->gdma_dev;
+       struct gdma_context *gc = gd->gdma_context;
        struct gdma_queue_spec spec = {};
        int err;
        int i;
 
-       apc->eqs = kcalloc(apc->num_queues, sizeof(struct mana_eq),
-                          GFP_KERNEL);
-       if (!apc->eqs)
+       ac->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq),
+                         GFP_KERNEL);
+       if (!ac->eqs)
                return -ENOMEM;
 
        spec.type = GDMA_EQ;
        spec.monitor_avl_buf = false;
        spec.queue_size = EQ_SIZE;
        spec.eq.callback = NULL;
-       spec.eq.context = apc->eqs;
+       spec.eq.context = ac->eqs;
        spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
-       spec.eq.ndev = apc->ndev;
-
-       for (i = 0; i < apc->num_queues; i++) {
-               mana_init_cqe_poll_buf(apc->eqs[i].cqe_poll);
 
-               err = mana_gd_create_mana_eq(gd, &spec, &apc->eqs[i].eq);
+       for (i = 0; i < gc->max_num_queues; i++) {
+               err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq);
                if (err)
                        goto out;
        }
 
        return 0;
 out:
-       mana_destroy_eq(gd->gdma_context, apc);
+       mana_destroy_eq(ac);
        return err;
 }
 
@@ -790,7 +780,6 @@ static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
 
 static void mana_poll_tx_cq(struct mana_cq *cq)
 {
-       struct gdma_queue *gdma_eq = cq->gdma_cq->cq.parent;
        struct gdma_comp *completions = cq->gdma_comp_buf;
        struct gdma_posted_wqe_info *wqe_info;
        unsigned int pkt_transmitted = 0;
@@ -812,6 +801,9 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
        comp_read = mana_gd_poll_cq(cq->gdma_cq, completions,
                                    CQE_POLLING_BUFFER);
 
+       if (comp_read < 1)
+               return;
+
        for (i = 0; i < comp_read; i++) {
                struct mana_tx_comp_oob *cqe_oob;
 
@@ -861,7 +853,7 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
 
                mana_unmap_skb(skb, apc);
 
-               napi_consume_skb(skb, gdma_eq->eq.budget);
+               napi_consume_skb(skb, cq->budget);
 
                pkt_transmitted++;
        }
@@ -890,6 +882,8 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
 
        if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0)
                WARN_ON_ONCE(1);
+
+       cq->work_done = pkt_transmitted;
 }
 
 static void mana_post_pkt_rxq(struct mana_rxq *rxq)
@@ -918,17 +912,13 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
        struct mana_stats *rx_stats = &rxq->stats;
        struct net_device *ndev = rxq->ndev;
        uint pkt_len = cqe->ppi[0].pkt_len;
-       struct mana_port_context *apc;
        u16 rxq_idx = rxq->rxq_idx;
        struct napi_struct *napi;
-       struct gdma_queue *eq;
        struct sk_buff *skb;
        u32 hash_value;
 
-       apc = netdev_priv(ndev);
-       eq = apc->eqs[rxq_idx].eq;
-       eq->eq.work_done++;
-       napi = &eq->eq.napi;
+       rxq->rx_cq.work_done++;
+       napi = &rxq->rx_cq.napi;
 
        if (!buf_va) {
                ++ndev->stats.rx_dropped;
@@ -1081,6 +1071,7 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
 static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
 {
        struct mana_cq *cq = context;
+       u8 arm_bit;
 
        WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
 
@@ -1089,7 +1080,33 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
        else
                mana_poll_tx_cq(cq);
 
-       mana_gd_arm_cq(gdma_queue);
+       if (cq->work_done < cq->budget &&
+           napi_complete_done(&cq->napi, cq->work_done)) {
+               arm_bit = SET_ARM_BIT;
+       } else {
+               arm_bit = 0;
+       }
+
+       mana_gd_ring_cq(gdma_queue, arm_bit);
+}
+
+static int mana_poll(struct napi_struct *napi, int budget)
+{
+       struct mana_cq *cq = container_of(napi, struct mana_cq, napi);
+
+       cq->work_done = 0;
+       cq->budget = budget;
+
+       mana_cq_handler(cq, cq->gdma_cq);
+
+       return min(cq->work_done, budget);
+}
+
+static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue)
+{
+       struct mana_cq *cq = context;
+
+       napi_schedule_irqoff(&cq->napi);
 }
 
 static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq)
@@ -1114,12 +1131,18 @@ static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq)
 
 static void mana_destroy_txq(struct mana_port_context *apc)
 {
+       struct napi_struct *napi;
        int i;
 
        if (!apc->tx_qp)
                return;
 
        for (i = 0; i < apc->num_queues; i++) {
+               napi = &apc->tx_qp[i].tx_cq.napi;
+               napi_synchronize(napi);
+               napi_disable(napi);
+               netif_napi_del(napi);
+
                mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
 
                mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
@@ -1134,7 +1157,8 @@ static void mana_destroy_txq(struct mana_port_context *apc)
 static int mana_create_txq(struct mana_port_context *apc,
                           struct net_device *net)
 {
-       struct gdma_dev *gd = apc->ac->gdma_dev;
+       struct mana_context *ac = apc->ac;
+       struct gdma_dev *gd = ac->gdma_dev;
        struct mana_obj_spec wq_spec;
        struct mana_obj_spec cq_spec;
        struct gdma_queue_spec spec;
@@ -1186,7 +1210,6 @@ static int mana_create_txq(struct mana_port_context *apc,
 
                /* Create SQ's CQ */
                cq = &apc->tx_qp[i].tx_cq;
-               cq->gdma_comp_buf = apc->eqs[i].cqe_poll;
                cq->type = MANA_CQ_TYPE_TX;
 
                cq->txq = txq;
@@ -1195,8 +1218,8 @@ static int mana_create_txq(struct mana_port_context *apc,
                spec.type = GDMA_CQ;
                spec.monitor_avl_buf = false;
                spec.queue_size = cq_size;
-               spec.cq.callback = mana_cq_handler;
-               spec.cq.parent_eq = apc->eqs[i].eq;
+               spec.cq.callback = mana_schedule_napi;
+               spec.cq.parent_eq = ac->eqs[i].eq;
                spec.cq.context = cq;
                err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
                if (err)
@@ -1237,7 +1260,10 @@ static int mana_create_txq(struct mana_port_context *apc,
 
                gc->cq_table[cq->gdma_id] = cq->gdma_cq;
 
-               mana_gd_arm_cq(cq->gdma_cq);
+               netif_tx_napi_add(net, &cq->napi, mana_poll, NAPI_POLL_WEIGHT);
+               napi_enable(&cq->napi);
+
+               mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
        }
 
        return 0;
@@ -1246,21 +1272,6 @@ out:
        return err;
 }
 
-static void mana_napi_sync_for_rx(struct mana_rxq *rxq)
-{
-       struct net_device *ndev = rxq->ndev;
-       struct mana_port_context *apc;
-       u16 rxq_idx = rxq->rxq_idx;
-       struct napi_struct *napi;
-       struct gdma_queue *eq;
-
-       apc = netdev_priv(ndev);
-       eq = apc->eqs[rxq_idx].eq;
-       napi = &eq->eq.napi;
-
-       napi_synchronize(napi);
-}
-
 static void mana_destroy_rxq(struct mana_port_context *apc,
                             struct mana_rxq *rxq, bool validate_state)
 
@@ -1268,13 +1279,19 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
        struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
        struct mana_recv_buf_oob *rx_oob;
        struct device *dev = gc->dev;
+       struct napi_struct *napi;
        int i;
 
        if (!rxq)
                return;
 
+       napi = &rxq->rx_cq.napi;
+
        if (validate_state)
-               mana_napi_sync_for_rx(rxq);
+               napi_synchronize(napi);
+
+       napi_disable(napi);
+       netif_napi_del(napi);
 
        mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
 
@@ -1418,7 +1435,6 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 
        /* Create RQ's CQ */
        cq = &rxq->rx_cq;
-       cq->gdma_comp_buf = eq->cqe_poll;
        cq->type = MANA_CQ_TYPE_RX;
        cq->rxq = rxq;
 
@@ -1426,7 +1442,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
        spec.type = GDMA_CQ;
        spec.monitor_avl_buf = false;
        spec.queue_size = cq_size;
-       spec.cq.callback = mana_cq_handler;
+       spec.cq.callback = mana_schedule_napi;
        spec.cq.parent_eq = eq->eq;
        spec.cq.context = cq;
        err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
@@ -1466,7 +1482,10 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 
        gc->cq_table[cq->gdma_id] = cq->gdma_cq;
 
-       mana_gd_arm_cq(cq->gdma_cq);
+       netif_napi_add(ndev, &cq->napi, mana_poll, 1);
+       napi_enable(&cq->napi);
+
+       mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT);
 out:
        if (!err)
                return rxq;
@@ -1484,12 +1503,13 @@ out:
 static int mana_add_rx_queues(struct mana_port_context *apc,
                              struct net_device *ndev)
 {
+       struct mana_context *ac = apc->ac;
        struct mana_rxq *rxq;
        int err = 0;
        int i;
 
        for (i = 0; i < apc->num_queues; i++) {
-               rxq = mana_create_rxq(apc, i, &apc->eqs[i], ndev);
+               rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev);
                if (!rxq) {
                        err = -ENOMEM;
                        goto out;
@@ -1601,16 +1621,11 @@ reset_apc:
 int mana_alloc_queues(struct net_device *ndev)
 {
        struct mana_port_context *apc = netdev_priv(ndev);
-       struct gdma_dev *gd = apc->ac->gdma_dev;
        int err;
 
-       err = mana_create_eq(apc);
-       if (err)
-               return err;
-
        err = mana_create_vport(apc, ndev);
        if (err)
-               goto destroy_eq;
+               return err;
 
        err = netif_set_real_num_tx_queues(ndev, apc->num_queues);
        if (err)
@@ -1636,8 +1651,6 @@ int mana_alloc_queues(struct net_device *ndev)
 
 destroy_vport:
        mana_destroy_vport(apc);
-destroy_eq:
-       mana_destroy_eq(gd->gdma_context, apc);
        return err;
 }
 
@@ -1714,8 +1727,6 @@ static int mana_dealloc_queues(struct net_device *ndev)
 
        mana_destroy_vport(apc);
 
-       mana_destroy_eq(apc->ac->gdma_dev->gdma_context, apc);
-
        return 0;
 }
 
@@ -1768,7 +1779,7 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
        apc->ac = ac;
        apc->ndev = ndev;
        apc->max_queues = gc->max_num_queues;
-       apc->num_queues = min_t(uint, gc->max_num_queues, MANA_MAX_NUM_QUEUES);
+       apc->num_queues = gc->max_num_queues;
        apc->port_handle = INVALID_MANA_HANDLE;
        apc->port_idx = port_idx;
 
@@ -1839,6 +1850,10 @@ int mana_probe(struct gdma_dev *gd)
        ac->num_ports = 1;
        gd->driver_data = ac;
 
+       err = mana_create_eq(ac);
+       if (err)
+               goto out;
+
        err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
                                    MANA_MICRO_VERSION, &ac->num_ports);
        if (err)
@@ -1888,6 +1903,9 @@ void mana_remove(struct gdma_dev *gd)
 
                free_netdev(ndev);
        }
+
+       mana_destroy_eq(ac);
+
 out:
        mana_gd_deregister_device(gd);
        gd->driver_data = NULL;
index 2d3157e..b6a73d1 100644 (file)
@@ -16,7 +16,7 @@ config MSCC_OCELOT_SWITCH_LIB
        select NET_DEVLINK
        select REGMAP_MMIO
        select PACKING
-       select PHYLIB
+       select PHYLINK
        tristate
        help
          This is a hardware support library for Ocelot network switches. It is
@@ -24,6 +24,7 @@ config MSCC_OCELOT_SWITCH_LIB
 
 config MSCC_OCELOT_SWITCH
        tristate "Ocelot switch driver"
+       depends on PTP_1588_CLOCK_OPTIONAL
        depends on BRIDGE || BRIDGE=n
        depends on NET_SWITCHDEV
        depends on HAS_IOMEM
index 2948d73..c581b95 100644 (file)
@@ -222,8 +222,35 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port,
                       ANA_PORT_DROP_CFG, port);
 }
 
+static int ocelot_vlan_member_set(struct ocelot *ocelot, u32 vlan_mask, u16 vid)
+{
+       int err;
+
+       err = ocelot_vlant_set_mask(ocelot, vid, vlan_mask);
+       if (err)
+               return err;
+
+       ocelot->vlan_mask[vid] = vlan_mask;
+
+       return 0;
+}
+
+static int ocelot_vlan_member_add(struct ocelot *ocelot, int port, u16 vid)
+{
+       return ocelot_vlan_member_set(ocelot,
+                                     ocelot->vlan_mask[vid] | BIT(port),
+                                     vid);
+}
+
+static int ocelot_vlan_member_del(struct ocelot *ocelot, int port, u16 vid)
+{
+       return ocelot_vlan_member_set(ocelot,
+                                     ocelot->vlan_mask[vid] & ~BIT(port),
+                                     vid);
+}
+
 int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
-                              bool vlan_aware)
+                              bool vlan_aware, struct netlink_ext_ack *extack)
 {
        struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1];
        struct ocelot_port *ocelot_port = ocelot->ports[port];
@@ -233,8 +260,8 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
        list_for_each_entry(filter, &block->rules, list) {
                if (filter->ingress_port_mask & BIT(port) &&
                    filter->action.vid_replace_ena) {
-                       dev_err(ocelot->dev,
-                               "Cannot change VLAN state with vlan modify rules active\n");
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Cannot change VLAN state with vlan modify rules active");
                        return -EBUSY;
                }
        }
@@ -259,16 +286,15 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port,
 EXPORT_SYMBOL(ocelot_port_vlan_filtering);
 
 int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid,
-                       bool untagged)
+                       bool untagged, struct netlink_ext_ack *extack)
 {
        struct ocelot_port *ocelot_port = ocelot->ports[port];
 
        /* Deny changing the native VLAN, but always permit deleting it */
        if (untagged && ocelot_port->native_vlan.vid != vid &&
            ocelot_port->native_vlan.valid) {
-               dev_err(ocelot->dev,
-                       "Port already has a native VLAN: %d\n",
-                       ocelot_port->native_vlan.vid);
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Port already has a native VLAN");
                return -EBUSY;
        }
 
@@ -279,13 +305,11 @@ EXPORT_SYMBOL(ocelot_vlan_prepare);
 int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
                    bool untagged)
 {
-       int ret;
+       int err;
 
-       /* Make the port a member of the VLAN */
-       ocelot->vlan_mask[vid] |= BIT(port);
-       ret = ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
-       if (ret)
-               return ret;
+       err = ocelot_vlan_member_add(ocelot, port, vid);
+       if (err)
+               return err;
 
        /* Default ingress vlan classification */
        if (pvid) {
@@ -312,13 +336,11 @@ EXPORT_SYMBOL(ocelot_vlan_add);
 int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid)
 {
        struct ocelot_port *ocelot_port = ocelot->ports[port];
-       int ret;
+       int err;
 
-       /* Stop the port from being a member of the vlan */
-       ocelot->vlan_mask[vid] &= ~BIT(port);
-       ret = ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
-       if (ret)
-               return ret;
+       err = ocelot_vlan_member_del(ocelot, port, vid);
+       if (err)
+               return err;
 
        /* Ingress */
        if (ocelot_port->pvid_vlan.vid == vid) {
@@ -340,6 +362,7 @@ EXPORT_SYMBOL(ocelot_vlan_del);
 
 static void ocelot_vlan_init(struct ocelot *ocelot)
 {
+       unsigned long all_ports = GENMASK(ocelot->num_phys_ports - 1, 0);
        u16 port, vid;
 
        /* Clear VLAN table, by default all ports are members of all VLANs */
@@ -348,23 +371,19 @@ static void ocelot_vlan_init(struct ocelot *ocelot)
        ocelot_vlant_wait_for_completion(ocelot);
 
        /* Configure the port VLAN memberships */
-       for (vid = 1; vid < VLAN_N_VID; vid++) {
-               ocelot->vlan_mask[vid] = 0;
-               ocelot_vlant_set_mask(ocelot, vid, ocelot->vlan_mask[vid]);
-       }
+       for (vid = 1; vid < VLAN_N_VID; vid++)
+               ocelot_vlan_member_set(ocelot, 0, vid);
 
        /* Because VLAN filtering is enabled, we need VID 0 to get untagged
         * traffic.  It is added automatically if 8021q module is loaded, but
         * we can't rely on it since module may be not loaded.
         */
-       ocelot->vlan_mask[0] = GENMASK(ocelot->num_phys_ports - 1, 0);
-       ocelot_vlant_set_mask(ocelot, 0, ocelot->vlan_mask[0]);
+       ocelot_vlan_member_set(ocelot, all_ports, 0);
 
        /* Set vlan ingress filter mask to all ports but the CPU port by
         * default.
         */
-       ocelot_write(ocelot, GENMASK(ocelot->num_phys_ports - 1, 0),
-                    ANA_VLANMASK);
+       ocelot_write(ocelot, all_ports, ANA_VLANMASK);
 
        for (port = 0; port < ocelot->num_phys_ports; port++) {
                ocelot_write_gix(ocelot, 0, REW_PORT_VLAN_CFG, port);
@@ -377,7 +396,7 @@ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port)
        return ocelot_read_rix(ocelot, QSYS_SW_STATUS, port);
 }
 
-int ocelot_port_flush(struct ocelot *ocelot, int port)
+static int ocelot_port_flush(struct ocelot *ocelot, int port)
 {
        unsigned int pause_ena;
        int err, val;
@@ -429,63 +448,118 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
 
        return err;
 }
-EXPORT_SYMBOL(ocelot_port_flush);
 
-void ocelot_adjust_link(struct ocelot *ocelot, int port,
-                       struct phy_device *phydev)
+void ocelot_phylink_mac_link_down(struct ocelot *ocelot, int port,
+                                 unsigned int link_an_mode,
+                                 phy_interface_t interface,
+                                 unsigned long quirks)
 {
        struct ocelot_port *ocelot_port = ocelot->ports[port];
-       int speed, mode = 0;
+       int err;
+
+       ocelot_port_rmwl(ocelot_port, 0, DEV_MAC_ENA_CFG_RX_ENA,
+                        DEV_MAC_ENA_CFG);
+
+       ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0);
+
+       err = ocelot_port_flush(ocelot, port);
+       if (err)
+               dev_err(ocelot->dev, "failed to flush port %d: %d\n",
+                       port, err);
+
+       /* Put the port in reset. */
+       if (interface != PHY_INTERFACE_MODE_QSGMII ||
+           !(quirks & OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP))
+               ocelot_port_rmwl(ocelot_port,
+                                DEV_CLOCK_CFG_MAC_TX_RST |
+                                DEV_CLOCK_CFG_MAC_TX_RST,
+                                DEV_CLOCK_CFG_MAC_TX_RST |
+                                DEV_CLOCK_CFG_MAC_TX_RST,
+                                DEV_CLOCK_CFG);
+}
+EXPORT_SYMBOL_GPL(ocelot_phylink_mac_link_down);
+
+void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
+                               struct phy_device *phydev,
+                               unsigned int link_an_mode,
+                               phy_interface_t interface,
+                               int speed, int duplex,
+                               bool tx_pause, bool rx_pause,
+                               unsigned long quirks)
+{
+       struct ocelot_port *ocelot_port = ocelot->ports[port];
+       int mac_speed, mode = 0;
+       u32 mac_fc_cfg;
+
+       /* The MAC might be integrated in systems where the MAC speed is fixed
+        * and it's the PCS who is performing the rate adaptation, so we have
+        * to write "1000Mbps" into the LINK_SPEED field of DEV_CLOCK_CFG
+        * (which is also its default value).
+        */
+       if ((quirks & OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION) ||
+           speed == SPEED_1000) {
+               mac_speed = OCELOT_SPEED_1000;
+               mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
+       } else if (speed == SPEED_2500) {
+               mac_speed = OCELOT_SPEED_2500;
+               mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
+       } else if (speed == SPEED_100) {
+               mac_speed = OCELOT_SPEED_100;
+       } else {
+               mac_speed = OCELOT_SPEED_10;
+       }
 
-       switch (phydev->speed) {
+       if (duplex == DUPLEX_FULL)
+               mode |= DEV_MAC_MODE_CFG_FDX_ENA;
+
+       ocelot_port_writel(ocelot_port, mode, DEV_MAC_MODE_CFG);
+
+       /* Take port out of reset by clearing the MAC_TX_RST, MAC_RX_RST and
+        * PORT_RST bits in DEV_CLOCK_CFG.
+        */
+       ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(mac_speed),
+                          DEV_CLOCK_CFG);
+
+       switch (speed) {
        case SPEED_10:
-               speed = OCELOT_SPEED_10;
+               mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_10);
                break;
        case SPEED_100:
-               speed = OCELOT_SPEED_100;
+               mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_100);
                break;
        case SPEED_1000:
-               speed = OCELOT_SPEED_1000;
-               mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
-               break;
        case SPEED_2500:
-               speed = OCELOT_SPEED_2500;
-               mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA;
+               mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(OCELOT_SPEED_1000);
                break;
        default:
-               dev_err(ocelot->dev, "Unsupported PHY speed on port %d: %d\n",
-                       port, phydev->speed);
+               dev_err(ocelot->dev, "Unsupported speed on port %d: %d\n",
+                       port, speed);
                return;
        }
 
-       phy_print_status(phydev);
-
-       if (!phydev->link)
-               return;
-
-       /* Only full duplex supported for now */
-       ocelot_port_writel(ocelot_port, DEV_MAC_MODE_CFG_FDX_ENA |
-                          mode, DEV_MAC_MODE_CFG);
-
-       /* Disable HDX fast control */
-       ocelot_port_writel(ocelot_port, DEV_PORT_MISC_HDX_FAST_DIS,
-                          DEV_PORT_MISC);
+       /* Handle RX pause in all cases, with 2500base-X this is used for rate
+        * adaptation.
+        */
+       mac_fc_cfg |= SYS_MAC_FC_CFG_RX_FC_ENA;
 
-       /* SGMII only for now */
-       ocelot_port_writel(ocelot_port, PCS1G_MODE_CFG_SGMII_MODE_ENA,
-                          PCS1G_MODE_CFG);
-       ocelot_port_writel(ocelot_port, PCS1G_SD_CFG_SD_SEL, PCS1G_SD_CFG);
+       if (tx_pause)
+               mac_fc_cfg |= SYS_MAC_FC_CFG_TX_FC_ENA |
+                             SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) |
+                             SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) |
+                             SYS_MAC_FC_CFG_ZERO_PAUSE_ENA;
 
-       /* Enable PCS */
-       ocelot_port_writel(ocelot_port, PCS1G_CFG_PCS_ENA, PCS1G_CFG);
+       /* Flow control. Link speed is only used here to evaluate the time
+        * specification in incoming pause frames.
+        */
+       ocelot_write_rix(ocelot, mac_fc_cfg, SYS_MAC_FC_CFG, port);
 
-       /* No aneg on SGMII */
-       ocelot_port_writel(ocelot_port, 0, PCS1G_ANEG_CFG);
+       ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
 
-       /* No loopback */
-       ocelot_port_writel(ocelot_port, 0, PCS1G_LB_CFG);
+       ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, tx_pause);
 
-       /* Enable MAC module */
+       /* Undo the effects of ocelot_phylink_mac_link_down:
+        * enable MAC module
+        */
        ocelot_port_writel(ocelot_port, DEV_MAC_ENA_CFG_RX_ENA |
                           DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG);
 
@@ -502,39 +576,8 @@ void ocelot_adjust_link(struct ocelot *ocelot, int port,
        /* Core: Enable port for frame transfer */
        ocelot_fields_write(ocelot, port,
                            QSYS_SWITCH_PORT_MODE_PORT_ENA, 1);
-
-       /* Flow control */
-       ocelot_write_rix(ocelot, SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) |
-                        SYS_MAC_FC_CFG_RX_FC_ENA | SYS_MAC_FC_CFG_TX_FC_ENA |
-                        SYS_MAC_FC_CFG_ZERO_PAUSE_ENA |
-                        SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) |
-                        SYS_MAC_FC_CFG_FC_LINK_SPEED(speed),
-                        SYS_MAC_FC_CFG, port);
-       ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
-}
-EXPORT_SYMBOL(ocelot_adjust_link);
-
-void ocelot_port_enable(struct ocelot *ocelot, int port,
-                       struct phy_device *phy)
-{
-       /* Enable receiving frames on the port, and activate auto-learning of
-        * MAC addresses.
-        */
-       ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO |
-                        ANA_PORT_PORT_CFG_RECV_ENA |
-                        ANA_PORT_PORT_CFG_PORTID_VAL(port),
-                        ANA_PORT_PORT_CFG, port);
 }
-EXPORT_SYMBOL(ocelot_port_enable);
-
-void ocelot_port_disable(struct ocelot *ocelot, int port)
-{
-       struct ocelot_port *ocelot_port = ocelot->ports[port];
-
-       ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
-       ocelot_fields_write(ocelot, port, QSYS_SWITCH_PORT_MODE_PORT_ENA, 0);
-}
-EXPORT_SYMBOL(ocelot_port_disable);
+EXPORT_SYMBOL_GPL(ocelot_phylink_mac_link_up);
 
 static void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
                                         struct sk_buff *clone)
@@ -1957,6 +2000,15 @@ void ocelot_init_port(struct ocelot *ocelot, int port)
        /* Disable source address learning for standalone mode */
        ocelot_port_set_learning(ocelot, port, false);
 
+       /* Set the port's initial logical port ID value, enable receiving
+        * frames on it, and configure the MAC address learning type to
+        * automatic.
+        */
+       ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO |
+                        ANA_PORT_PORT_CFG_RECV_ENA |
+                        ANA_PORT_PORT_CFG_PORTID_VAL(port),
+                        ANA_PORT_PORT_CFG, port);
+
        /* Enable vcap lookups */
        ocelot_vcap_enable(ocelot, port);
 }
index db6b1a4..1952d6a 100644 (file)
@@ -12,8 +12,7 @@
 #include <linux/etherdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/net_tstamp.h>
-#include <linux/phy.h>
-#include <linux/phy/phy.h>
+#include <linux/phylink.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
@@ -42,11 +41,9 @@ struct ocelot_port_tc {
 struct ocelot_port_private {
        struct ocelot_port port;
        struct net_device *dev;
-       struct phy_device *phy;
+       struct phylink *phylink;
+       struct phylink_config phylink_config;
        u8 chip_port;
-
-       struct phy *serdes;
-
        struct ocelot_port_tc tc;
 };
 
@@ -107,7 +104,7 @@ u32 ocelot_port_readl(struct ocelot_port *port, u32 reg);
 void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg);
 
 int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target,
-                     struct phy_device *phy);
+                     struct device_node *portnp);
 void ocelot_release_port(struct ocelot_port *ocelot_port);
 int ocelot_devlink_init(struct ocelot *ocelot);
 void ocelot_devlink_teardown(struct ocelot *ocelot);
index e9d260d..c0c465a 100644 (file)
@@ -9,10 +9,14 @@
  */
 
 #include <linux/if_bridge.h>
+#include <linux/of_net.h>
+#include <linux/phy/phy.h>
 #include <net/pkt_cls.h>
 #include "ocelot.h"
 #include "ocelot_vcap.h"
 
+#define OCELOT_MAC_QUIRKS      OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP
+
 static struct ocelot *devlink_port_to_ocelot(struct devlink_port *dlp)
 {
        return devlink_priv(dlp->devlink);
@@ -160,6 +164,7 @@ int ocelot_port_devlink_init(struct ocelot *ocelot, int port,
        struct devlink *dl = ocelot->devlink;
        struct devlink_port_attrs attrs = {};
 
+       memset(dlp, 0, sizeof(*dlp));
        memcpy(attrs.switch_id.id, &ocelot->base_mac, id_len);
        attrs.switch_id.id_len = id_len;
        attrs.phys.port_number = port;
@@ -381,26 +386,6 @@ static int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type,
        return 0;
 }
 
-static void ocelot_port_adjust_link(struct net_device *dev)
-{
-       struct ocelot_port_private *priv = netdev_priv(dev);
-       struct ocelot *ocelot = priv->port.ocelot;
-       int port = priv->chip_port;
-
-       ocelot_adjust_link(ocelot, port, dev->phydev);
-}
-
-static int ocelot_vlan_vid_prepare(struct net_device *dev, u16 vid, bool pvid,
-                                  bool untagged)
-{
-       struct ocelot_port_private *priv = netdev_priv(dev);
-       struct ocelot_port *ocelot_port = &priv->port;
-       struct ocelot *ocelot = ocelot_port->ocelot;
-       int port = priv->chip_port;
-
-       return ocelot_vlan_prepare(ocelot, port, vid, pvid, untagged);
-}
-
 static int ocelot_vlan_vid_add(struct net_device *dev, u16 vid, bool pvid,
                               bool untagged)
 {
@@ -448,33 +433,8 @@ static int ocelot_vlan_vid_del(struct net_device *dev, u16 vid)
 static int ocelot_port_open(struct net_device *dev)
 {
        struct ocelot_port_private *priv = netdev_priv(dev);
-       struct ocelot_port *ocelot_port = &priv->port;
-       struct ocelot *ocelot = ocelot_port->ocelot;
-       int port = priv->chip_port;
-       int err;
 
-       if (priv->serdes) {
-               err = phy_set_mode_ext(priv->serdes, PHY_MODE_ETHERNET,
-                                      ocelot_port->phy_mode);
-               if (err) {
-                       netdev_err(dev, "Could not set mode of SerDes\n");
-                       return err;
-               }
-       }
-
-       err = phy_connect_direct(dev, priv->phy, &ocelot_port_adjust_link,
-                                ocelot_port->phy_mode);
-       if (err) {
-               netdev_err(dev, "Could not attach to PHY\n");
-               return err;
-       }
-
-       dev->phydev = priv->phy;
-
-       phy_attached_info(priv->phy);
-       phy_start(priv->phy);
-
-       ocelot_port_enable(ocelot, port, priv->phy);
+       phylink_start(priv->phylink);
 
        return 0;
 }
@@ -482,14 +442,8 @@ static int ocelot_port_open(struct net_device *dev)
 static int ocelot_port_stop(struct net_device *dev)
 {
        struct ocelot_port_private *priv = netdev_priv(dev);
-       struct ocelot *ocelot = priv->port.ocelot;
-       int port = priv->chip_port;
-
-       phy_disconnect(priv->phy);
-
-       dev->phydev = NULL;
 
-       ocelot_port_disable(ocelot, port);
+       phylink_stop(priv->phylink);
 
        return 0;
 }
@@ -823,7 +777,7 @@ static const struct net_device_ops ocelot_port_netdev_ops = {
        .ndo_vlan_rx_kill_vid           = ocelot_vlan_rx_kill_vid,
        .ndo_set_features               = ocelot_set_features,
        .ndo_setup_tc                   = ocelot_setup_tc,
-       .ndo_do_ioctl                   = ocelot_ioctl,
+       .ndo_eth_ioctl                  = ocelot_ioctl,
        .ndo_get_devlink_port           = ocelot_get_devlink_port,
 };
 
@@ -959,7 +913,8 @@ static int ocelot_port_attr_set(struct net_device *dev, const void *ctx,
                ocelot_port_attr_ageing_set(ocelot, port, attr->u.ageing_time);
                break;
        case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
-               ocelot_port_vlan_filtering(ocelot, port, attr->u.vlan_filtering);
+               ocelot_port_vlan_filtering(ocelot, port, attr->u.vlan_filtering,
+                                          extack);
                break;
        case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
                ocelot_port_attr_mc_set(ocelot, port, !attr->u.mc_disabled);
@@ -979,14 +934,26 @@ static int ocelot_port_attr_set(struct net_device *dev, const void *ctx,
        return err;
 }
 
+static int ocelot_vlan_vid_prepare(struct net_device *dev, u16 vid, bool pvid,
+                                  bool untagged, struct netlink_ext_ack *extack)
+{
+       struct ocelot_port_private *priv = netdev_priv(dev);
+       struct ocelot_port *ocelot_port = &priv->port;
+       struct ocelot *ocelot = ocelot_port->ocelot;
+       int port = priv->chip_port;
+
+       return ocelot_vlan_prepare(ocelot, port, vid, pvid, untagged, extack);
+}
+
 static int ocelot_port_obj_add_vlan(struct net_device *dev,
-                                   const struct switchdev_obj_port_vlan *vlan)
+                                   const struct switchdev_obj_port_vlan *vlan,
+                                   struct netlink_ext_ack *extack)
 {
        bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
        bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
        int ret;
 
-       ret = ocelot_vlan_vid_prepare(dev, vlan->vid, pvid, untagged);
+       ret = ocelot_vlan_vid_prepare(dev, vlan->vid, pvid, untagged, extack);
        if (ret)
                return ret;
 
@@ -1074,7 +1041,8 @@ static int ocelot_port_obj_add(struct net_device *dev, const void *ctx,
        switch (obj->id) {
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
                ret = ocelot_port_obj_add_vlan(dev,
-                                              SWITCHDEV_OBJ_PORT_VLAN(obj));
+                                              SWITCHDEV_OBJ_PORT_VLAN(obj),
+                                              extack);
                break;
        case SWITCHDEV_OBJ_ID_PORT_MDB:
                ret = ocelot_port_obj_add_mdb(dev, SWITCHDEV_OBJ_PORT_MDB(obj));
@@ -1154,45 +1122,27 @@ static int ocelot_switchdev_sync(struct ocelot *ocelot, int port,
                                 struct net_device *bridge_dev,
                                 struct netlink_ext_ack *extack)
 {
-       struct ocelot_port *ocelot_port = ocelot->ports[port];
-       struct ocelot_port_private *priv;
        clock_t ageing_time;
        u8 stp_state;
-       int err;
-
-       priv = container_of(ocelot_port, struct ocelot_port_private, port);
 
        ocelot_inherit_brport_flags(ocelot, port, brport_dev);
 
        stp_state = br_port_get_stp_state(brport_dev);
        ocelot_bridge_stp_state_set(ocelot, port, stp_state);
 
-       err = ocelot_port_vlan_filtering(ocelot, port,
-                                        br_vlan_enabled(bridge_dev));
-       if (err)
-               return err;
-
        ageing_time = br_get_ageing_time(bridge_dev);
        ocelot_port_attr_ageing_set(ocelot, port, ageing_time);
 
-       err = br_mdb_replay(bridge_dev, brport_dev, priv, true,
-                           &ocelot_switchdev_blocking_nb, extack);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
-       err = br_vlan_replay(bridge_dev, brport_dev, priv, true,
-                            &ocelot_switchdev_blocking_nb, extack);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
-       return 0;
+       return ocelot_port_vlan_filtering(ocelot, port,
+                                         br_vlan_enabled(bridge_dev),
+                                         extack);
 }
 
 static int ocelot_switchdev_unsync(struct ocelot *ocelot, int port)
 {
        int err;
 
-       err = ocelot_port_vlan_filtering(ocelot, port, false);
+       err = ocelot_port_vlan_filtering(ocelot, port, false, NULL);
        if (err)
                return err;
 
@@ -1216,6 +1166,13 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
 
        ocelot_port_bridge_join(ocelot, port, bridge);
 
+       err = switchdev_bridge_port_offload(brport_dev, dev, priv,
+                                           &ocelot_netdevice_nb,
+                                           &ocelot_switchdev_blocking_nb,
+                                           false, extack);
+       if (err)
+               goto err_switchdev_offload;
+
        err = ocelot_switchdev_sync(ocelot, port, brport_dev, bridge, extack);
        if (err)
                goto err_switchdev_sync;
@@ -1223,10 +1180,24 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
        return 0;
 
 err_switchdev_sync:
+       switchdev_bridge_port_unoffload(brport_dev, priv,
+                                       &ocelot_netdevice_nb,
+                                       &ocelot_switchdev_blocking_nb);
+err_switchdev_offload:
        ocelot_port_bridge_leave(ocelot, port, bridge);
        return err;
 }
 
+static void ocelot_netdevice_pre_bridge_leave(struct net_device *dev,
+                                             struct net_device *brport_dev)
+{
+       struct ocelot_port_private *priv = netdev_priv(dev);
+
+       switchdev_bridge_port_unoffload(brport_dev, priv,
+                                       &ocelot_netdevice_nb,
+                                       &ocelot_switchdev_blocking_nb);
+}
+
 static int ocelot_netdevice_bridge_leave(struct net_device *dev,
                                         struct net_device *brport_dev,
                                         struct net_device *bridge)
@@ -1279,6 +1250,18 @@ err_bridge_join:
        return err;
 }
 
+static void ocelot_netdevice_pre_lag_leave(struct net_device *dev,
+                                          struct net_device *bond)
+{
+       struct net_device *bridge_dev;
+
+       bridge_dev = netdev_master_upper_dev_get(bond);
+       if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
+               return;
+
+       ocelot_netdevice_pre_bridge_leave(dev, bond);
+}
+
 static int ocelot_netdevice_lag_leave(struct net_device *dev,
                                      struct net_device *bond)
 {
@@ -1355,6 +1338,43 @@ ocelot_netdevice_lag_changeupper(struct net_device *dev,
        return NOTIFY_DONE;
 }
 
+static int
+ocelot_netdevice_prechangeupper(struct net_device *dev,
+                               struct net_device *brport_dev,
+                               struct netdev_notifier_changeupper_info *info)
+{
+       if (netif_is_bridge_master(info->upper_dev) && !info->linking)
+               ocelot_netdevice_pre_bridge_leave(dev, brport_dev);
+
+       if (netif_is_lag_master(info->upper_dev) && !info->linking)
+               ocelot_netdevice_pre_lag_leave(dev, info->upper_dev);
+
+       return NOTIFY_DONE;
+}
+
+static int
+ocelot_netdevice_lag_prechangeupper(struct net_device *dev,
+                                   struct netdev_notifier_changeupper_info *info)
+{
+       struct net_device *lower;
+       struct list_head *iter;
+       int err = NOTIFY_DONE;
+
+       netdev_for_each_lower_dev(dev, lower, iter) {
+               struct ocelot_port_private *priv = netdev_priv(lower);
+               struct ocelot_port *ocelot_port = &priv->port;
+
+               if (ocelot_port->bond != dev)
+                       return NOTIFY_OK;
+
+               err = ocelot_netdevice_prechangeupper(dev, lower, info);
+               if (err)
+                       return err;
+       }
+
+       return NOTIFY_DONE;
+}
+
 static int
 ocelot_netdevice_changelowerstate(struct net_device *dev,
                                  struct netdev_lag_lower_state_info *info)
@@ -1382,6 +1402,17 @@ static int ocelot_netdevice_event(struct notifier_block *unused,
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
        switch (event) {
+       case NETDEV_PRECHANGEUPPER: {
+               struct netdev_notifier_changeupper_info *info = ptr;
+
+               if (ocelot_netdevice_dev_check(dev))
+                       return ocelot_netdevice_prechangeupper(dev, dev, info);
+
+               if (netif_is_lag_master(dev))
+                       return ocelot_netdevice_lag_prechangeupper(dev, info);
+
+               break;
+       }
        case NETDEV_CHANGEUPPER: {
                struct netdev_notifier_changeupper_info *info = ptr;
 
@@ -1466,8 +1497,188 @@ struct notifier_block ocelot_switchdev_blocking_nb __read_mostly = {
        .notifier_call = ocelot_switchdev_blocking_event,
 };
 
+static void vsc7514_phylink_validate(struct phylink_config *config,
+                                    unsigned long *supported,
+                                    struct phylink_link_state *state)
+{
+       struct net_device *ndev = to_net_dev(config->dev);
+       struct ocelot_port_private *priv = netdev_priv(ndev);
+       struct ocelot_port *ocelot_port = &priv->port;
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = {};
+
+       if (state->interface != PHY_INTERFACE_MODE_NA &&
+           state->interface != ocelot_port->phy_mode) {
+               bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+               return;
+       }
+
+       phylink_set_port_modes(mask);
+
+       phylink_set(mask, Pause);
+       phylink_set(mask, Autoneg);
+       phylink_set(mask, Asym_Pause);
+       phylink_set(mask, 10baseT_Half);
+       phylink_set(mask, 10baseT_Full);
+       phylink_set(mask, 100baseT_Half);
+       phylink_set(mask, 100baseT_Full);
+       phylink_set(mask, 1000baseT_Half);
+       phylink_set(mask, 1000baseT_Full);
+       phylink_set(mask, 1000baseX_Full);
+       phylink_set(mask, 2500baseT_Full);
+       phylink_set(mask, 2500baseX_Full);
+
+       bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
+       bitmap_and(state->advertising, state->advertising, mask,
+                  __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void vsc7514_phylink_mac_config(struct phylink_config *config,
+                                      unsigned int link_an_mode,
+                                      const struct phylink_link_state *state)
+{
+       struct net_device *ndev = to_net_dev(config->dev);
+       struct ocelot_port_private *priv = netdev_priv(ndev);
+       struct ocelot_port *ocelot_port = &priv->port;
+
+       /* Disable HDX fast control */
+       ocelot_port_writel(ocelot_port, DEV_PORT_MISC_HDX_FAST_DIS,
+                          DEV_PORT_MISC);
+
+       /* SGMII only for now */
+       ocelot_port_writel(ocelot_port, PCS1G_MODE_CFG_SGMII_MODE_ENA,
+                          PCS1G_MODE_CFG);
+       ocelot_port_writel(ocelot_port, PCS1G_SD_CFG_SD_SEL, PCS1G_SD_CFG);
+
+       /* Enable PCS */
+       ocelot_port_writel(ocelot_port, PCS1G_CFG_PCS_ENA, PCS1G_CFG);
+
+       /* No aneg on SGMII */
+       ocelot_port_writel(ocelot_port, 0, PCS1G_ANEG_CFG);
+
+       /* No loopback */
+       ocelot_port_writel(ocelot_port, 0, PCS1G_LB_CFG);
+}
+
+static void vsc7514_phylink_mac_link_down(struct phylink_config *config,
+                                         unsigned int link_an_mode,
+                                         phy_interface_t interface)
+{
+       struct net_device *ndev = to_net_dev(config->dev);
+       struct ocelot_port_private *priv = netdev_priv(ndev);
+       struct ocelot *ocelot = priv->port.ocelot;
+       int port = priv->chip_port;
+
+       ocelot_phylink_mac_link_down(ocelot, port, link_an_mode, interface,
+                                    OCELOT_MAC_QUIRKS);
+}
+
+static void vsc7514_phylink_mac_link_up(struct phylink_config *config,
+                                       struct phy_device *phydev,
+                                       unsigned int link_an_mode,
+                                       phy_interface_t interface,
+                                       int speed, int duplex,
+                                       bool tx_pause, bool rx_pause)
+{
+       struct net_device *ndev = to_net_dev(config->dev);
+       struct ocelot_port_private *priv = netdev_priv(ndev);
+       struct ocelot *ocelot = priv->port.ocelot;
+       int port = priv->chip_port;
+
+       ocelot_phylink_mac_link_up(ocelot, port, phydev, link_an_mode,
+                                  interface, speed, duplex,
+                                  tx_pause, rx_pause, OCELOT_MAC_QUIRKS);
+}
+
+static const struct phylink_mac_ops ocelot_phylink_ops = {
+       .validate               = vsc7514_phylink_validate,
+       .mac_config             = vsc7514_phylink_mac_config,
+       .mac_link_down          = vsc7514_phylink_mac_link_down,
+       .mac_link_up            = vsc7514_phylink_mac_link_up,
+};
+
+static int ocelot_port_phylink_create(struct ocelot *ocelot, int port,
+                                     struct device_node *portnp)
+{
+       struct ocelot_port *ocelot_port = ocelot->ports[port];
+       struct ocelot_port_private *priv;
+       struct device *dev = ocelot->dev;
+       phy_interface_t phy_mode;
+       struct phylink *phylink;
+       int err;
+
+       of_get_phy_mode(portnp, &phy_mode);
+       /* DT bindings of internal PHY ports are broken and don't
+        * specify a phy-mode
+        */
+       if (phy_mode == PHY_INTERFACE_MODE_NA)
+               phy_mode = PHY_INTERFACE_MODE_INTERNAL;
+
+       if (phy_mode != PHY_INTERFACE_MODE_SGMII &&
+           phy_mode != PHY_INTERFACE_MODE_QSGMII &&
+           phy_mode != PHY_INTERFACE_MODE_INTERNAL) {
+               dev_err(dev, "unsupported phy mode %s for port %d\n",
+                       phy_modes(phy_mode), port);
+               return -EINVAL;
+       }
+
+       /* Ensure clock signals and speed are set on all QSGMII links */
+       if (phy_mode == PHY_INTERFACE_MODE_QSGMII)
+               ocelot_port_rmwl(ocelot_port, 0,
+                                DEV_CLOCK_CFG_MAC_TX_RST |
+                                DEV_CLOCK_CFG_MAC_TX_RST,
+                                DEV_CLOCK_CFG);
+
+       ocelot_port->phy_mode = phy_mode;
+
+       if (phy_mode != PHY_INTERFACE_MODE_INTERNAL) {
+               struct phy *serdes = of_phy_get(portnp, NULL);
+
+               if (IS_ERR(serdes)) {
+                       err = PTR_ERR(serdes);
+                       dev_err_probe(dev, err,
+                                     "missing SerDes phys for port %d\n",
+                                     port);
+                       return err;
+               }
+
+               err = phy_set_mode_ext(serdes, PHY_MODE_ETHERNET, phy_mode);
+               of_phy_put(serdes);
+               if (err) {
+                       dev_err(dev, "Could not SerDes mode on port %d: %pe\n",
+                               port, ERR_PTR(err));
+                       return err;
+               }
+       }
+
+       priv = container_of(ocelot_port, struct ocelot_port_private, port);
+
+       priv->phylink_config.dev = &priv->dev->dev;
+       priv->phylink_config.type = PHYLINK_NETDEV;
+
+       phylink = phylink_create(&priv->phylink_config,
+                                of_fwnode_handle(portnp),
+                                phy_mode, &ocelot_phylink_ops);
+       if (IS_ERR(phylink)) {
+               err = PTR_ERR(phylink);
+               dev_err(dev, "Could not create phylink (%pe)\n", phylink);
+               return err;
+       }
+
+       priv->phylink = phylink;
+
+       err = phylink_of_phy_connect(phylink, portnp, 0);
+       if (err) {
+               dev_err(dev, "Could not connect to PHY: %pe\n", ERR_PTR(err));
+               phylink_destroy(phylink);
+               priv->phylink = NULL;
+               return err;
+       }
+
+       return 0;
+}
+
 int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target,
-                     struct phy_device *phy)
+                     struct device_node *portnp)
 {
        struct ocelot_port_private *priv;
        struct ocelot_port *ocelot_port;
@@ -1480,7 +1691,6 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target,
        SET_NETDEV_DEV(dev, ocelot->dev);
        priv = netdev_priv(dev);
        priv->dev = dev;
-       priv->phy = phy;
        priv->chip_port = port;
        ocelot_port = &priv->port;
        ocelot_port->ocelot = ocelot;
@@ -1501,15 +1711,23 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target,
 
        ocelot_init_port(ocelot, port);
 
+       err = ocelot_port_phylink_create(ocelot, port, portnp);
+       if (err)
+               goto out;
+
        err = register_netdev(dev);
        if (err) {
                dev_err(ocelot->dev, "register_netdev failed\n");
-               free_netdev(dev);
-               ocelot->ports[port] = NULL;
-               return err;
+               goto out;
        }
 
        return 0;
+
+out:
+       ocelot->ports[port] = NULL;
+       free_netdev(dev);
+
+       return err;
 }
 
 void ocelot_release_port(struct ocelot_port *ocelot_port)
@@ -1519,5 +1737,14 @@ void ocelot_release_port(struct ocelot_port *ocelot_port)
                                                port);
 
        unregister_netdev(priv->dev);
+
+       if (priv->phylink) {
+               rtnl_lock();
+               phylink_disconnect_phy(priv->phylink);
+               rtnl_unlock();
+
+               phylink_destroy(priv->phylink);
+       }
+
        free_netdev(priv->dev);
 }
index 4bd7e9d..291ae68 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/of_net.h>
 #include <linux/netdevice.h>
+#include <linux/phylink.h>
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 #include <linux/mfd/syscon.h>
@@ -945,13 +946,9 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev,
        for_each_available_child_of_node(ports, portnp) {
                struct ocelot_port_private *priv;
                struct ocelot_port *ocelot_port;
-               struct device_node *phy_node;
                struct devlink_port *dlp;
-               phy_interface_t phy_mode;
-               struct phy_device *phy;
                struct regmap *target;
                struct resource *res;
-               struct phy *serdes;
                char res_name[8];
 
                if (of_property_read_u32(portnp, "reg", &reg))
@@ -975,77 +972,26 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev,
                        goto out_teardown;
                }
 
-               phy_node = of_parse_phandle(portnp, "phy-handle", 0);
-               if (!phy_node)
-                       continue;
-
-               phy = of_phy_find_device(phy_node);
-               of_node_put(phy_node);
-               if (!phy)
-                       continue;
-
                err = ocelot_port_devlink_init(ocelot, port,
                                               DEVLINK_PORT_FLAVOUR_PHYSICAL);
                if (err) {
                        of_node_put(portnp);
                        goto out_teardown;
                }
-               devlink_ports_registered |= BIT(port);
 
-               err = ocelot_probe_port(ocelot, port, target, phy);
+               err = ocelot_probe_port(ocelot, port, target, portnp);
                if (err) {
-                       of_node_put(portnp);
-                       goto out_teardown;
+                       ocelot_port_devlink_teardown(ocelot, port);
+                       continue;
                }
 
+               devlink_ports_registered |= BIT(port);
+
                ocelot_port = ocelot->ports[port];
                priv = container_of(ocelot_port, struct ocelot_port_private,
                                    port);
                dlp = &ocelot->devlink_ports[port];
                devlink_port_type_eth_set(dlp, priv->dev);
-
-               of_get_phy_mode(portnp, &phy_mode);
-
-               ocelot_port->phy_mode = phy_mode;
-
-               switch (ocelot_port->phy_mode) {
-               case PHY_INTERFACE_MODE_NA:
-                       continue;
-               case PHY_INTERFACE_MODE_SGMII:
-                       break;
-               case PHY_INTERFACE_MODE_QSGMII:
-                       /* Ensure clock signals and speed is set on all
-                        * QSGMII links
-                        */
-                       ocelot_port_writel(ocelot_port,
-                                          DEV_CLOCK_CFG_LINK_SPEED
-                                          (OCELOT_SPEED_1000),
-                                          DEV_CLOCK_CFG);
-                       break;
-               default:
-                       dev_err(ocelot->dev,
-                               "invalid phy mode for port%d, (Q)SGMII only\n",
-                               port);
-                       of_node_put(portnp);
-                       err = -EINVAL;
-                       goto out_teardown;
-               }
-
-               serdes = devm_of_phy_get(ocelot->dev, portnp, NULL);
-               if (IS_ERR(serdes)) {
-                       err = PTR_ERR(serdes);
-                       if (err == -EPROBE_DEFER)
-                               dev_dbg(ocelot->dev, "deferring probe\n");
-                       else
-                               dev_err(ocelot->dev,
-                                       "missing SerDes phys for port%d\n",
-                                       port);
-
-                       of_node_put(portnp);
-                       goto out_teardown;
-               }
-
-               priv->serdes = serdes;
        }
 
        /* Initialize unused devlink ports at the end */
@@ -1103,7 +1049,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
        if (!np && !pdev->dev.platform_data)
                return -ENODEV;
 
-       devlink = devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot));
+       devlink =
+               devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot), &pdev->dev);
        if (!devlink)
                return -ENOMEM;
 
@@ -1187,7 +1134,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
        if (err)
                goto out_put_ports;
 
-       err = devlink_register(devlink, ocelot->dev);
+       err = devlink_register(devlink);
        if (err)
                goto out_ocelot_deinit;
 
index fc99ad8..c1a75b0 100644 (file)
@@ -850,9 +850,9 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
        dmatest_page = alloc_page(GFP_KERNEL);
        if (!dmatest_page)
                return -ENOMEM;
-       dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE,
-                                  DMA_BIDIRECTIONAL);
-       if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) {
+       dmatest_bus = dma_map_page(&mgp->pdev->dev, dmatest_page, 0,
+                                  PAGE_SIZE, DMA_BIDIRECTIONAL);
+       if (unlikely(dma_mapping_error(&mgp->pdev->dev, dmatest_bus))) {
                __free_page(dmatest_page);
                return -ENOMEM;
        }
@@ -899,7 +899,8 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
            (cmd.data0 & 0xffff);
 
 abort:
-       pci_unmap_page(mgp->pdev, dmatest_bus, PAGE_SIZE, DMA_BIDIRECTIONAL);
+       dma_unmap_page(&mgp->pdev->dev, dmatest_bus, PAGE_SIZE,
+                      DMA_BIDIRECTIONAL);
        put_page(dmatest_page);
 
        if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
@@ -1205,10 +1206,10 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
                                return;
                        }
 
-                       bus = pci_map_page(mgp->pdev, page, 0,
+                       bus = dma_map_page(&mgp->pdev->dev, page, 0,
                                           MYRI10GE_ALLOC_SIZE,
-                                          PCI_DMA_FROMDEVICE);
-                       if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+                                          DMA_FROM_DEVICE);
+                       if (unlikely(dma_mapping_error(&mgp->pdev->dev, bus))) {
                                __free_pages(page, MYRI10GE_ALLOC_ORDER);
                                if (rx->fill_cnt - rx->cnt < 16)
                                        rx->watchdog_needed = 1;
@@ -1256,9 +1257,9 @@ myri10ge_unmap_rx_page(struct pci_dev *pdev,
        /* unmap the recvd page if we're the only or last user of it */
        if (bytes >= MYRI10GE_ALLOC_SIZE / 2 ||
            (info->page_offset + 2 * bytes) > MYRI10GE_ALLOC_SIZE) {
-               pci_unmap_page(pdev, (dma_unmap_addr(info, bus)
-                                     & ~(MYRI10GE_ALLOC_SIZE - 1)),
-                              MYRI10GE_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
+               dma_unmap_page(&pdev->dev, (dma_unmap_addr(info, bus)
+                                           & ~(MYRI10GE_ALLOC_SIZE - 1)),
+                              MYRI10GE_ALLOC_SIZE, DMA_FROM_DEVICE);
        }
 }
 
@@ -1398,16 +1399,16 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
                        ss->stats.tx_packets++;
                        dev_consume_skb_irq(skb);
                        if (len)
-                               pci_unmap_single(pdev,
+                               dma_unmap_single(&pdev->dev,
                                                 dma_unmap_addr(&tx->info[idx],
                                                                bus), len,
-                                                PCI_DMA_TODEVICE);
+                                                DMA_TO_DEVICE);
                } else {
                        if (len)
-                               pci_unmap_page(pdev,
+                               dma_unmap_page(&pdev->dev,
                                               dma_unmap_addr(&tx->info[idx],
                                                              bus), len,
-                                              PCI_DMA_TODEVICE);
+                                              DMA_TO_DEVICE);
                }
        }
 
@@ -1651,8 +1652,10 @@ myri10ge_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info)
        strlcpy(info->bus_info, pci_name(mgp->pdev), sizeof(info->bus_info));
 }
 
-static int
-myri10ge_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal)
+static int myri10ge_get_coalesce(struct net_device *netdev,
+                                struct ethtool_coalesce *coal,
+                                struct kernel_ethtool_coalesce *kernel_coal,
+                                struct netlink_ext_ack *extack)
 {
        struct myri10ge_priv *mgp = netdev_priv(netdev);
 
@@ -1660,8 +1663,10 @@ myri10ge_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal)
        return 0;
 }
 
-static int
-myri10ge_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *coal)
+static int myri10ge_set_coalesce(struct net_device *netdev,
+                                struct ethtool_coalesce *coal,
+                                struct kernel_ethtool_coalesce *kernel_coal,
+                                struct netlink_ext_ack *extack)
 {
        struct myri10ge_priv *mgp = netdev_priv(netdev);
 
@@ -2110,16 +2115,16 @@ static void myri10ge_free_rings(struct myri10ge_slice_state *ss)
                        ss->stats.tx_dropped++;
                        dev_kfree_skb_any(skb);
                        if (len)
-                               pci_unmap_single(mgp->pdev,
+                               dma_unmap_single(&mgp->pdev->dev,
                                                 dma_unmap_addr(&tx->info[idx],
                                                                bus), len,
-                                                PCI_DMA_TODEVICE);
+                                                DMA_TO_DEVICE);
                } else {
                        if (len)
-                               pci_unmap_page(mgp->pdev,
+                               dma_unmap_page(&mgp->pdev->dev,
                                               dma_unmap_addr(&tx->info[idx],
                                                              bus), len,
-                                              PCI_DMA_TODEVICE);
+                                              DMA_TO_DEVICE);
                }
        }
        kfree(ss->rx_big.info);
@@ -2584,15 +2589,15 @@ static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp,
                len = dma_unmap_len(&tx->info[idx], len);
                if (len) {
                        if (tx->info[idx].skb != NULL)
-                               pci_unmap_single(mgp->pdev,
+                               dma_unmap_single(&mgp->pdev->dev,
                                                 dma_unmap_addr(&tx->info[idx],
                                                                bus), len,
-                                                PCI_DMA_TODEVICE);
+                                                DMA_TO_DEVICE);
                        else
-                               pci_unmap_page(mgp->pdev,
+                               dma_unmap_page(&mgp->pdev->dev,
                                               dma_unmap_addr(&tx->info[idx],
                                                              bus), len,
-                                              PCI_DMA_TODEVICE);
+                                              DMA_TO_DEVICE);
                        dma_unmap_len_set(&tx->info[idx], len, 0);
                        tx->info[idx].skb = NULL;
                }
@@ -2715,8 +2720,8 @@ again:
 
        /* map the skb for DMA */
        len = skb_headlen(skb);
-       bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
-       if (unlikely(pci_dma_mapping_error(mgp->pdev, bus)))
+       bus = dma_map_single(&mgp->pdev->dev, skb->data, len, DMA_TO_DEVICE);
+       if (unlikely(dma_mapping_error(&mgp->pdev->dev, bus)))
                goto drop;
 
        idx = tx->req & tx->mask;
@@ -2824,7 +2829,7 @@ again:
                len = skb_frag_size(frag);
                bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len,
                                       DMA_TO_DEVICE);
-               if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+               if (unlikely(dma_mapping_error(&mgp->pdev->dev, bus))) {
                        myri10ge_unmap_tx_dma(mgp, tx, idx);
                        goto drop;
                }
@@ -3776,19 +3781,17 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        myri10ge_mask_surprise_down(pdev);
        pci_set_master(pdev);
        dac_enabled = 1;
-       status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
        if (status != 0) {
                dac_enabled = 0;
                dev_err(&pdev->dev,
-                       "64-bit pci address mask was refused, "
-                       "trying 32-bit\n");
-               status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+                       "64-bit pci address mask was refused, trying 32-bit\n");
+               status = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
        }
        if (status != 0) {
                dev_err(&pdev->dev, "Error %d setting DMA mask\n", status);
                goto abort_with_enabled;
        }
-       (void)pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
        mgp->cmd = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->cmd),
                                      &mgp->cmd_bus, GFP_KERNEL);
        if (!mgp->cmd) {
index ce3eca5..d74a80f 100644 (file)
@@ -193,8 +193,6 @@ static int jazz_sonic_probe(struct platform_device *pdev)
        SET_NETDEV_DEV(dev, &pdev->dev);
        platform_set_drvdata(pdev, dev);
 
-       netdev_boot_setup_check(dev);
-
        dev->base_addr = res->start;
        dev->irq = platform_get_irq(pdev, 0);
        err = sonic_probe1(dev);
index 84f7dbe..3f98203 100644 (file)
@@ -790,7 +790,7 @@ static const struct net_device_ops natsemi_netdev_ops = {
        .ndo_get_stats          = get_stats,
        .ndo_set_rx_mode        = set_rx_mode,
        .ndo_change_mtu         = natsemi_change_mtu,
-       .ndo_do_ioctl           = netdev_ioctl,
+       .ndo_eth_ioctl          = netdev_ioctl,
        .ndo_tx_timeout         = ns_tx_timeout,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
index 28d9e98..ca46860 100644 (file)
@@ -215,7 +215,6 @@ int xtsonic_probe(struct platform_device *pdev)
        lp->device = &pdev->dev;
        platform_set_drvdata(pdev, dev);
        SET_NETDEV_DEV(dev, &pdev->dev);
-       netdev_boot_setup_check(dev);
 
        dev->base_addr = resmem->start;
        dev->irq = resirq->start;
index 0b017d4..09c0e83 100644 (file)
@@ -7625,7 +7625,7 @@ static const struct net_device_ops s2io_netdev_ops = {
        .ndo_start_xmit         = s2io_xmit,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = s2io_ndo_set_multicast,
-       .ndo_do_ioctl           = s2io_ioctl,
+       .ndo_eth_ioctl          = s2io_ioctl,
        .ndo_set_mac_address    = s2io_set_mac_addr,
        .ndo_change_mtu         = s2io_change_mtu,
        .ndo_set_features       = s2io_set_features,
index 7abd13e..df4a3f3 100644 (file)
@@ -3339,7 +3339,7 @@ static const struct net_device_ops vxge_netdev_ops = {
        .ndo_start_xmit         = vxge_xmit,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = vxge_set_multicast,
-       .ndo_do_ioctl           = vxge_ioctl,
+       .ndo_eth_ioctl           = vxge_ioctl,
        .ndo_set_mac_address    = vxge_set_mac_addr,
        .ndo_change_mtu         = vxge_change_mtu,
        .ndo_fix_features       = vxge_fix_features,
index b82758d..8844d1a 100644 (file)
@@ -23,6 +23,7 @@ config NFP
        depends on TLS && TLS_DEVICE || TLS_DEVICE=n
        select NET_DEVLINK
        select CRC32
+       select DIMLIB
        help
          This driver supports the Netronome(R) NFP4000/NFP6000 based
          cards working as a advanced Ethernet NIC.  It works with both
index 1cbe2c9..2a432de 100644 (file)
@@ -262,10 +262,10 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
 }
 
 static bool
-nfp_flower_tun_is_gre(struct flow_cls_offload *flow, int start_idx)
+nfp_flower_tun_is_gre(struct flow_rule *rule, int start_idx)
 {
-       struct flow_action_entry *act = flow->rule->action.entries;
-       int num_act = flow->rule->action.num_entries;
+       struct flow_action_entry *act = rule->action.entries;
+       int num_act = rule->action.num_entries;
        int act_idx;
 
        /* Preparse action list for next mirred or redirect action */
@@ -279,7 +279,7 @@ nfp_flower_tun_is_gre(struct flow_cls_offload *flow, int start_idx)
 
 static enum nfp_flower_tun_type
 nfp_fl_get_tun_from_act(struct nfp_app *app,
-                       struct flow_cls_offload *flow,
+                       struct flow_rule *rule,
                        const struct flow_action_entry *act, int act_idx)
 {
        const struct ip_tunnel_info *tun = act->tunnel;
@@ -288,7 +288,7 @@ nfp_fl_get_tun_from_act(struct nfp_app *app,
        /* Determine the tunnel type based on the egress netdev
         * in the mirred action for tunnels without l4.
         */
-       if (nfp_flower_tun_is_gre(flow, act_idx))
+       if (nfp_flower_tun_is_gre(rule, act_idx))
                return NFP_FL_TUNNEL_GRE;
 
        switch (tun->key.tp_dst) {
@@ -788,11 +788,10 @@ struct nfp_flower_pedit_acts {
 };
 
 static int
-nfp_fl_commit_mangle(struct flow_cls_offload *flow, char *nfp_action,
+nfp_fl_commit_mangle(struct flow_rule *rule, char *nfp_action,
                     int *a_len, struct nfp_flower_pedit_acts *set_act,
                     u32 *csum_updated)
 {
-       struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
        size_t act_size = 0;
        u8 ip_proto = 0;
 
@@ -890,7 +889,7 @@ nfp_fl_commit_mangle(struct flow_cls_offload *flow, char *nfp_action,
 
 static int
 nfp_fl_pedit(const struct flow_action_entry *act,
-            struct flow_cls_offload *flow, char *nfp_action, int *a_len,
+            char *nfp_action, int *a_len,
             u32 *csum_updated, struct nfp_flower_pedit_acts *set_act,
             struct netlink_ext_ack *extack)
 {
@@ -977,7 +976,7 @@ nfp_flower_output_action(struct nfp_app *app,
 
 static int
 nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
-                      struct flow_cls_offload *flow,
+                      struct flow_rule *rule,
                       struct nfp_fl_payload *nfp_fl, int *a_len,
                       struct net_device *netdev,
                       enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
@@ -1045,7 +1044,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
        case FLOW_ACTION_TUNNEL_ENCAP: {
                const struct ip_tunnel_info *ip_tun = act->tunnel;
 
-               *tun_type = nfp_fl_get_tun_from_act(app, flow, act, act_idx);
+               *tun_type = nfp_fl_get_tun_from_act(app, rule, act, act_idx);
                if (*tun_type == NFP_FL_TUNNEL_NONE) {
                        NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel type in action list");
                        return -EOPNOTSUPP;
@@ -1086,7 +1085,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
                /* Tunnel decap is handled by default so accept action. */
                return 0;
        case FLOW_ACTION_MANGLE:
-               if (nfp_fl_pedit(act, flow, &nfp_fl->action_data[*a_len],
+               if (nfp_fl_pedit(act, &nfp_fl->action_data[*a_len],
                                 a_len, csum_updated, set_act, extack))
                        return -EOPNOTSUPP;
                break;
@@ -1195,7 +1194,7 @@ static bool nfp_fl_check_mangle_end(struct flow_action *flow_act,
 }
 
 int nfp_flower_compile_action(struct nfp_app *app,
-                             struct flow_cls_offload *flow,
+                             struct flow_rule *rule,
                              struct net_device *netdev,
                              struct nfp_fl_payload *nfp_flow,
                              struct netlink_ext_ack *extack)
@@ -1207,7 +1206,7 @@ int nfp_flower_compile_action(struct nfp_app *app,
        bool pkt_host = false;
        u32 csum_updated = 0;
 
-       if (!flow_action_hw_stats_check(&flow->rule->action, extack,
+       if (!flow_action_hw_stats_check(&rule->action, extack,
                                        FLOW_ACTION_HW_STATS_DELAYED_BIT))
                return -EOPNOTSUPP;
 
@@ -1219,18 +1218,18 @@ int nfp_flower_compile_action(struct nfp_app *app,
        tun_out_cnt = 0;
        out_cnt = 0;
 
-       flow_action_for_each(i, act, &flow->rule->action) {
-               if (nfp_fl_check_mangle_start(&flow->rule->action, i))
+       flow_action_for_each(i, act, &rule->action) {
+               if (nfp_fl_check_mangle_start(&rule->action, i))
                        memset(&set_act, 0, sizeof(set_act));
-               err = nfp_flower_loop_action(app, act, flow, nfp_flow, &act_len,
+               err = nfp_flower_loop_action(app, act, rule, nfp_flow, &act_len,
                                             netdev, &tun_type, &tun_out_cnt,
                                             &out_cnt, &csum_updated,
                                             &set_act, &pkt_host, extack, i);
                if (err)
                        return err;
                act_cnt++;
-               if (nfp_fl_check_mangle_end(&flow->rule->action, i))
-                       nfp_fl_commit_mangle(flow,
+               if (nfp_fl_check_mangle_end(&rule->action, i))
+                       nfp_fl_commit_mangle(rule,
                                             &nfp_flow->action_data[act_len],
                                             &act_len, &set_act, &csum_updated);
        }
index 062bb2d..bfd7d1c 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (C) 2021 Corigine, Inc. */
 
 #include "conntrack.h"
+#include "../nfp_port.h"
 
 const struct rhashtable_params nfp_tc_ct_merge_params = {
        .head_offset            = offsetof(struct nfp_fl_ct_tc_merge,
@@ -407,15 +408,491 @@ static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry,
        return -EINVAL;
 }
 
+static int
+nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map)
+{
+       int key_size;
+
+       /* This field must always be present */
+       key_size = sizeof(struct nfp_flower_meta_tci);
+       map[FLOW_PAY_META_TCI] = 0;
+
+       if (in_key_ls.key_layer & NFP_FLOWER_LAYER_EXT_META) {
+               map[FLOW_PAY_EXT_META] = key_size;
+               key_size += sizeof(struct nfp_flower_ext_meta);
+       }
+       if (in_key_ls.key_layer & NFP_FLOWER_LAYER_PORT) {
+               map[FLOW_PAY_INPORT] = key_size;
+               key_size += sizeof(struct nfp_flower_in_port);
+       }
+       if (in_key_ls.key_layer & NFP_FLOWER_LAYER_MAC) {
+               map[FLOW_PAY_MAC_MPLS] = key_size;
+               key_size += sizeof(struct nfp_flower_mac_mpls);
+       }
+       if (in_key_ls.key_layer & NFP_FLOWER_LAYER_TP) {
+               map[FLOW_PAY_L4] = key_size;
+               key_size += sizeof(struct nfp_flower_tp_ports);
+       }
+       if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV4) {
+               map[FLOW_PAY_IPV4] = key_size;
+               key_size += sizeof(struct nfp_flower_ipv4);
+       }
+       if (in_key_ls.key_layer & NFP_FLOWER_LAYER_IPV6) {
+               map[FLOW_PAY_IPV6] = key_size;
+               key_size += sizeof(struct nfp_flower_ipv6);
+       }
+
+       if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GRE) {
+               map[FLOW_PAY_GRE] = key_size;
+               if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6)
+                       key_size += sizeof(struct nfp_flower_ipv6_gre_tun);
+               else
+                       key_size += sizeof(struct nfp_flower_ipv4_gre_tun);
+       }
+
+       if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) {
+               map[FLOW_PAY_QINQ] = key_size;
+               key_size += sizeof(struct nfp_flower_vlan);
+       }
+
+       if ((in_key_ls.key_layer & NFP_FLOWER_LAYER_VXLAN) ||
+           (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE)) {
+               map[FLOW_PAY_UDP_TUN] = key_size;
+               if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6)
+                       key_size += sizeof(struct nfp_flower_ipv6_udp_tun);
+               else
+                       key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+       }
+
+       if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
+               map[FLOW_PAY_GENEVE_OPT] = key_size;
+               key_size += sizeof(struct nfp_flower_geneve_options);
+       }
+
+       return key_size;
+}
+
+static int nfp_fl_merge_actions_offload(struct flow_rule **rules,
+                                       struct nfp_flower_priv *priv,
+                                       struct net_device *netdev,
+                                       struct nfp_fl_payload *flow_pay)
+{
+       struct flow_action_entry *a_in;
+       int i, j, num_actions, id;
+       struct flow_rule *a_rule;
+       int err = 0, offset = 0;
+
+       num_actions = rules[CT_TYPE_PRE_CT]->action.num_entries +
+                     rules[CT_TYPE_NFT]->action.num_entries +
+                     rules[CT_TYPE_POST_CT]->action.num_entries;
+
+       a_rule = flow_rule_alloc(num_actions);
+       if (!a_rule)
+               return -ENOMEM;
+
+       /* Actions need a BASIC dissector. */
+       a_rule->match = rules[CT_TYPE_PRE_CT]->match;
+
+       /* Copy actions */
+       for (j = 0; j < _CT_TYPE_MAX; j++) {
+               if (flow_rule_match_key(rules[j], FLOW_DISSECTOR_KEY_BASIC)) {
+                       struct flow_match_basic match;
+
+                       /* ip_proto is the only field that needed in later compile_action,
+                        * needed to set the correct checksum flags. It doesn't really matter
+                        * which input rule's ip_proto field we take as the earlier merge checks
+                        * would have made sure that they don't conflict. We do not know which
+                        * of the subflows would have the ip_proto filled in, so we need to iterate
+                        * through the subflows and assign the proper subflow to a_rule
+                        */
+                       flow_rule_match_basic(rules[j], &match);
+                       if (match.mask->ip_proto)
+                               a_rule->match = rules[j]->match;
+               }
+
+               for (i = 0; i < rules[j]->action.num_entries; i++) {
+                       a_in = &rules[j]->action.entries[i];
+                       id = a_in->id;
+
+                       /* Ignore CT related actions as these would already have
+                        * been taken care of by previous checks, and we do not send
+                        * any CT actions to the firmware.
+                        */
+                       switch (id) {
+                       case FLOW_ACTION_CT:
+                       case FLOW_ACTION_GOTO:
+                       case FLOW_ACTION_CT_METADATA:
+                               continue;
+                       default:
+                               memcpy(&a_rule->action.entries[offset++],
+                                      a_in, sizeof(struct flow_action_entry));
+                               break;
+                       }
+               }
+       }
+
+       /* Some actions would have been ignored, so update the num_entries field */
+       a_rule->action.num_entries = offset;
+       err = nfp_flower_compile_action(priv->app, a_rule, netdev, flow_pay, NULL);
+       kfree(a_rule);
+
+       return err;
+}
+
 static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry)
 {
-       return 0;
+       enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
+       struct nfp_fl_ct_zone_entry *zt = m_entry->zt;
+       struct nfp_fl_key_ls key_layer, tmp_layer;
+       struct nfp_flower_priv *priv = zt->priv;
+       u16 key_map[_FLOW_PAY_LAYERS_MAX];
+       struct nfp_fl_payload *flow_pay;
+
+       struct flow_rule *rules[_CT_TYPE_MAX];
+       u8 *key, *msk, *kdata, *mdata;
+       struct nfp_port *port = NULL;
+       struct net_device *netdev;
+       bool qinq_sup;
+       u32 port_id;
+       u16 offset;
+       int i, err;
+
+       netdev = m_entry->netdev;
+       qinq_sup = !!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ);
+
+       rules[CT_TYPE_PRE_CT] = m_entry->tc_m_parent->pre_ct_parent->rule;
+       rules[CT_TYPE_NFT] = m_entry->nft_parent->rule;
+       rules[CT_TYPE_POST_CT] = m_entry->tc_m_parent->post_ct_parent->rule;
+
+       memset(&key_layer, 0, sizeof(struct nfp_fl_key_ls));
+       memset(&key_map, 0, sizeof(key_map));
+
+       /* Calculate the resultant key layer and size for offload */
+       for (i = 0; i < _CT_TYPE_MAX; i++) {
+               err = nfp_flower_calculate_key_layers(priv->app,
+                                                     m_entry->netdev,
+                                                     &tmp_layer, rules[i],
+                                                     &tun_type, NULL);
+               if (err)
+                       return err;
+
+               key_layer.key_layer |= tmp_layer.key_layer;
+               key_layer.key_layer_two |= tmp_layer.key_layer_two;
+       }
+       key_layer.key_size = nfp_fl_calc_key_layers_sz(key_layer, key_map);
+
+       flow_pay = nfp_flower_allocate_new(&key_layer);
+       if (!flow_pay)
+               return -ENOMEM;
+
+       memset(flow_pay->unmasked_data, 0, key_layer.key_size);
+       memset(flow_pay->mask_data, 0, key_layer.key_size);
+
+       kdata = flow_pay->unmasked_data;
+       mdata = flow_pay->mask_data;
+
+       offset = key_map[FLOW_PAY_META_TCI];
+       key = kdata + offset;
+       msk = mdata + offset;
+       nfp_flower_compile_meta((struct nfp_flower_meta_tci *)key,
+                               (struct nfp_flower_meta_tci *)msk,
+                               key_layer.key_layer);
+
+       if (NFP_FLOWER_LAYER_EXT_META & key_layer.key_layer) {
+               offset =  key_map[FLOW_PAY_EXT_META];
+               key = kdata + offset;
+               msk = mdata + offset;
+               nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)key,
+                                           key_layer.key_layer_two);
+               nfp_flower_compile_ext_meta((struct nfp_flower_ext_meta *)msk,
+                                           key_layer.key_layer_two);
+       }
+
+       /* Using in_port from the -trk rule. The tc merge checks should already
+        * be checking that the ingress netdevs are the same
+        */
+       port_id = nfp_flower_get_port_id_from_netdev(priv->app, netdev);
+       offset = key_map[FLOW_PAY_INPORT];
+       key = kdata + offset;
+       msk = mdata + offset;
+       err = nfp_flower_compile_port((struct nfp_flower_in_port *)key,
+                                     port_id, false, tun_type, NULL);
+       if (err)
+               goto ct_offload_err;
+       err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
+                                     port_id, true, tun_type, NULL);
+       if (err)
+               goto ct_offload_err;
+
+       /* This following part works on the assumption that previous checks has
+        * already filtered out flows that has different values for the different
+        * layers. Here we iterate through all three rules and merge their respective
+        * masked value(cared bits), basic method is:
+        * final_key = (r1_key & r1_mask) | (r2_key & r2_mask) | (r3_key & r3_mask)
+        * final_mask = r1_mask | r2_mask | r3_mask
+        * If none of the rules contains a match that is also fine, that simply means
+        * that the layer is not present.
+        */
+       if (!qinq_sup) {
+               for (i = 0; i < _CT_TYPE_MAX; i++) {
+                       offset = key_map[FLOW_PAY_META_TCI];
+                       key = kdata + offset;
+                       msk = mdata + offset;
+                       nfp_flower_compile_tci((struct nfp_flower_meta_tci *)key,
+                                              (struct nfp_flower_meta_tci *)msk,
+                                              rules[i]);
+               }
+       }
+
+       if (NFP_FLOWER_LAYER_MAC & key_layer.key_layer) {
+               offset = key_map[FLOW_PAY_MAC_MPLS];
+               key = kdata + offset;
+               msk = mdata + offset;
+               for (i = 0; i < _CT_TYPE_MAX; i++) {
+                       nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)key,
+                                              (struct nfp_flower_mac_mpls *)msk,
+                                              rules[i]);
+                       err = nfp_flower_compile_mpls((struct nfp_flower_mac_mpls *)key,
+                                                     (struct nfp_flower_mac_mpls *)msk,
+                                                     rules[i], NULL);
+                       if (err)
+                               goto ct_offload_err;
+               }
+       }
+
+       if (NFP_FLOWER_LAYER_IPV4 & key_layer.key_layer) {
+               offset = key_map[FLOW_PAY_IPV4];
+               key = kdata + offset;
+               msk = mdata + offset;
+               for (i = 0; i < _CT_TYPE_MAX; i++) {
+                       nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)key,
+                                               (struct nfp_flower_ipv4 *)msk,
+                                               rules[i]);
+               }
+       }
+
+       if (NFP_FLOWER_LAYER_IPV6 & key_layer.key_layer) {
+               offset = key_map[FLOW_PAY_IPV6];
+               key = kdata + offset;
+               msk = mdata + offset;
+               for (i = 0; i < _CT_TYPE_MAX; i++) {
+                       nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)key,
+                                               (struct nfp_flower_ipv6 *)msk,
+                                               rules[i]);
+               }
+       }
+
+       if (NFP_FLOWER_LAYER_TP & key_layer.key_layer) {
+               offset = key_map[FLOW_PAY_L4];
+               key = kdata + offset;
+               msk = mdata + offset;
+               for (i = 0; i < _CT_TYPE_MAX; i++) {
+                       nfp_flower_compile_tport((struct nfp_flower_tp_ports *)key,
+                                                (struct nfp_flower_tp_ports *)msk,
+                                                rules[i]);
+               }
+       }
+
+       if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GRE) {
+               offset = key_map[FLOW_PAY_GRE];
+               key = kdata + offset;
+               msk = mdata + offset;
+               if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
+                       struct nfp_flower_ipv6_gre_tun *gre_match;
+                       struct nfp_ipv6_addr_entry *entry;
+                       struct in6_addr *dst;
+
+                       for (i = 0; i < _CT_TYPE_MAX; i++) {
+                               nfp_flower_compile_ipv6_gre_tun((void *)key,
+                                                               (void *)msk, rules[i]);
+                       }
+                       gre_match = (struct nfp_flower_ipv6_gre_tun *)key;
+                       dst = &gre_match->ipv6.dst;
+
+                       entry = nfp_tunnel_add_ipv6_off(priv->app, dst);
+                       if (!entry) {
+                               err = -ENOMEM;
+                               goto ct_offload_err;
+                       }
+
+                       flow_pay->nfp_tun_ipv6 = entry;
+               } else {
+                       __be32 dst;
+
+                       for (i = 0; i < _CT_TYPE_MAX; i++) {
+                               nfp_flower_compile_ipv4_gre_tun((void *)key,
+                                                               (void *)msk, rules[i]);
+                       }
+                       dst = ((struct nfp_flower_ipv4_gre_tun *)key)->ipv4.dst;
+
+                       /* Store the tunnel destination in the rule data.
+                        * This must be present and be an exact match.
+                        */
+                       flow_pay->nfp_tun_ipv4_addr = dst;
+                       nfp_tunnel_add_ipv4_off(priv->app, dst);
+               }
+       }
+
+       if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) {
+               offset = key_map[FLOW_PAY_QINQ];
+               key = kdata + offset;
+               msk = mdata + offset;
+               for (i = 0; i < _CT_TYPE_MAX; i++) {
+                       nfp_flower_compile_vlan((struct nfp_flower_vlan *)key,
+                                               (struct nfp_flower_vlan *)msk,
+                                               rules[i]);
+               }
+       }
+
+       if (key_layer.key_layer & NFP_FLOWER_LAYER_VXLAN ||
+           key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
+               offset = key_map[FLOW_PAY_UDP_TUN];
+               key = kdata + offset;
+               msk = mdata + offset;
+               if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
+                       struct nfp_flower_ipv6_udp_tun *udp_match;
+                       struct nfp_ipv6_addr_entry *entry;
+                       struct in6_addr *dst;
+
+                       for (i = 0; i < _CT_TYPE_MAX; i++) {
+                               nfp_flower_compile_ipv6_udp_tun((void *)key,
+                                                               (void *)msk, rules[i]);
+                       }
+                       udp_match = (struct nfp_flower_ipv6_udp_tun *)key;
+                       dst = &udp_match->ipv6.dst;
+
+                       entry = nfp_tunnel_add_ipv6_off(priv->app, dst);
+                       if (!entry) {
+                               err = -ENOMEM;
+                               goto ct_offload_err;
+                       }
+
+                       flow_pay->nfp_tun_ipv6 = entry;
+               } else {
+                       __be32 dst;
+
+                       for (i = 0; i < _CT_TYPE_MAX; i++) {
+                               nfp_flower_compile_ipv4_udp_tun((void *)key,
+                                                               (void *)msk, rules[i]);
+                       }
+                       dst = ((struct nfp_flower_ipv4_udp_tun *)key)->ipv4.dst;
+
+                       /* Store the tunnel destination in the rule data.
+                        * This must be present and be an exact match.
+                        */
+                       flow_pay->nfp_tun_ipv4_addr = dst;
+                       nfp_tunnel_add_ipv4_off(priv->app, dst);
+               }
+
+               if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
+                       offset = key_map[FLOW_PAY_GENEVE_OPT];
+                       key = kdata + offset;
+                       msk = mdata + offset;
+                       for (i = 0; i < _CT_TYPE_MAX; i++)
+                               nfp_flower_compile_geneve_opt(key, msk, rules[i]);
+               }
+       }
+
+       /* Merge actions into flow_pay */
+       err = nfp_fl_merge_actions_offload(rules, priv, netdev, flow_pay);
+       if (err)
+               goto ct_offload_err;
+
+       /* Use the pointer address as the cookie, but set the last bit to 1.
+        * This is to avoid the 'is_merge_flow' check from detecting this as
+        * an already merged flow. This works since address alignment means
+        * that the last bit for pointer addresses will be 0.
+        */
+       flow_pay->tc_flower_cookie = ((unsigned long)flow_pay) | 0x1;
+       err = nfp_compile_flow_metadata(priv->app, flow_pay->tc_flower_cookie,
+                                       flow_pay, netdev, NULL);
+       if (err)
+               goto ct_offload_err;
+
+       if (nfp_netdev_is_nfp_repr(netdev))
+               port = nfp_port_from_netdev(netdev);
+
+       err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node,
+                                    nfp_flower_table_params);
+       if (err)
+               goto ct_release_offload_meta_err;
+
+       err = nfp_flower_xmit_flow(priv->app, flow_pay,
+                                  NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
+       if (err)
+               goto ct_remove_rhash_err;
+
+       m_entry->tc_flower_cookie = flow_pay->tc_flower_cookie;
+       m_entry->flow_pay = flow_pay;
+
+       if (port)
+               port->tc_offload_cnt++;
+
+       return err;
+
+ct_remove_rhash_err:
+       WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+                                           &flow_pay->fl_node,
+                                           nfp_flower_table_params));
+ct_release_offload_meta_err:
+       nfp_modify_flow_metadata(priv->app, flow_pay);
+ct_offload_err:
+       if (flow_pay->nfp_tun_ipv4_addr)
+               nfp_tunnel_del_ipv4_off(priv->app, flow_pay->nfp_tun_ipv4_addr);
+       if (flow_pay->nfp_tun_ipv6)
+               nfp_tunnel_put_ipv6_off(priv->app, flow_pay->nfp_tun_ipv6);
+       kfree(flow_pay->action_data);
+       kfree(flow_pay->mask_data);
+       kfree(flow_pay->unmasked_data);
+       kfree(flow_pay);
+       return err;
 }
 
 static int nfp_fl_ct_del_offload(struct nfp_app *app, unsigned long cookie,
                                 struct net_device *netdev)
 {
-       return 0;
+       struct nfp_flower_priv *priv = app->priv;
+       struct nfp_fl_payload *flow_pay;
+       struct nfp_port *port = NULL;
+       int err = 0;
+
+       if (nfp_netdev_is_nfp_repr(netdev))
+               port = nfp_port_from_netdev(netdev);
+
+       flow_pay = nfp_flower_search_fl_table(app, cookie, netdev);
+       if (!flow_pay)
+               return -ENOENT;
+
+       err = nfp_modify_flow_metadata(app, flow_pay);
+       if (err)
+               goto err_free_merge_flow;
+
+       if (flow_pay->nfp_tun_ipv4_addr)
+               nfp_tunnel_del_ipv4_off(app, flow_pay->nfp_tun_ipv4_addr);
+
+       if (flow_pay->nfp_tun_ipv6)
+               nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6);
+
+       if (!flow_pay->in_hw) {
+               err = 0;
+               goto err_free_merge_flow;
+       }
+
+       err = nfp_flower_xmit_flow(app, flow_pay,
+                                  NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
+
+err_free_merge_flow:
+       nfp_flower_del_linked_merge_flows(app, flow_pay);
+       if (port)
+               port->tc_offload_cnt--;
+       kfree(flow_pay->action_data);
+       kfree(flow_pay->mask_data);
+       kfree(flow_pay->unmasked_data);
+       WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+                                           &flow_pay->fl_node,
+                                           nfp_flower_table_params));
+       kfree_rcu(flow_pay, rcu);
+       return err;
 }
 
 static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt,
@@ -1048,6 +1525,139 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv,
        return 0;
 }
 
+static void
+nfp_fl_ct_sub_stats(struct nfp_fl_nft_tc_merge *nft_merge,
+                   enum ct_entry_type type, u64 *m_pkts,
+                   u64 *m_bytes, u64 *m_used)
+{
+       struct nfp_flower_priv *priv = nft_merge->zt->priv;
+       struct nfp_fl_payload *nfp_flow;
+       u32 ctx_id;
+
+       nfp_flow = nft_merge->flow_pay;
+       if (!nfp_flow)
+               return;
+
+       ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
+       *m_pkts += priv->stats[ctx_id].pkts;
+       *m_bytes += priv->stats[ctx_id].bytes;
+       *m_used = max_t(u64, *m_used, priv->stats[ctx_id].used);
+
+       /* If request is for a sub_flow which is part of a tunnel merged
+        * flow then update stats from tunnel merged flows first.
+        */
+       if (!list_empty(&nfp_flow->linked_flows))
+               nfp_flower_update_merge_stats(priv->app, nfp_flow);
+
+       if (type != CT_TYPE_NFT) {
+               /* Update nft cached stats */
+               flow_stats_update(&nft_merge->nft_parent->stats,
+                                 priv->stats[ctx_id].bytes,
+                                 priv->stats[ctx_id].pkts,
+                                 0, priv->stats[ctx_id].used,
+                                 FLOW_ACTION_HW_STATS_DELAYED);
+       } else {
+               /* Update pre_ct cached stats */
+               flow_stats_update(&nft_merge->tc_m_parent->pre_ct_parent->stats,
+                                 priv->stats[ctx_id].bytes,
+                                 priv->stats[ctx_id].pkts,
+                                 0, priv->stats[ctx_id].used,
+                                 FLOW_ACTION_HW_STATS_DELAYED);
+               /* Update post_ct cached stats */
+               flow_stats_update(&nft_merge->tc_m_parent->post_ct_parent->stats,
+                                 priv->stats[ctx_id].bytes,
+                                 priv->stats[ctx_id].pkts,
+                                 0, priv->stats[ctx_id].used,
+                                 FLOW_ACTION_HW_STATS_DELAYED);
+       }
+       /* Reset stats from the nfp */
+       priv->stats[ctx_id].pkts = 0;
+       priv->stats[ctx_id].bytes = 0;
+}
+
+int nfp_fl_ct_stats(struct flow_cls_offload *flow,
+                   struct nfp_fl_ct_map_entry *ct_map_ent)
+{
+       struct nfp_fl_ct_flow_entry *ct_entry = ct_map_ent->ct_entry;
+       struct nfp_fl_nft_tc_merge *nft_merge, *nft_m_tmp;
+       struct nfp_fl_ct_tc_merge *tc_merge, *tc_m_tmp;
+
+       u64 pkts = 0, bytes = 0, used = 0;
+       u64 m_pkts, m_bytes, m_used;
+
+       spin_lock_bh(&ct_entry->zt->priv->stats_lock);
+
+       if (ct_entry->type == CT_TYPE_PRE_CT) {
+               /* Iterate tc_merge entries associated with this flow */
+               list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children,
+                                        pre_ct_list) {
+                       m_pkts = 0;
+                       m_bytes = 0;
+                       m_used = 0;
+                       /* Iterate nft_merge entries associated with this tc_merge flow */
+                       list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children,
+                                                tc_merge_list) {
+                               nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_PRE_CT,
+                                                   &m_pkts, &m_bytes, &m_used);
+                       }
+                       pkts += m_pkts;
+                       bytes += m_bytes;
+                       used = max_t(u64, used, m_used);
+                       /* Update post_ct partner */
+                       flow_stats_update(&tc_merge->post_ct_parent->stats,
+                                         m_bytes, m_pkts, 0, m_used,
+                                         FLOW_ACTION_HW_STATS_DELAYED);
+               }
+       } else if (ct_entry->type == CT_TYPE_POST_CT) {
+               /* Iterate tc_merge entries associated with this flow */
+               list_for_each_entry_safe(tc_merge, tc_m_tmp, &ct_entry->children,
+                                        post_ct_list) {
+                       m_pkts = 0;
+                       m_bytes = 0;
+                       m_used = 0;
+                       /* Iterate nft_merge entries associated with this tc_merge flow */
+                       list_for_each_entry_safe(nft_merge, nft_m_tmp, &tc_merge->children,
+                                                tc_merge_list) {
+                               nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_POST_CT,
+                                                   &m_pkts, &m_bytes, &m_used);
+                       }
+                       pkts += m_pkts;
+                       bytes += m_bytes;
+                       used = max_t(u64, used, m_used);
+                       /* Update pre_ct partner */
+                       flow_stats_update(&tc_merge->pre_ct_parent->stats,
+                                         m_bytes, m_pkts, 0, m_used,
+                                         FLOW_ACTION_HW_STATS_DELAYED);
+               }
+       } else  {
+               /* Iterate nft_merge entries associated with this nft flow */
+               list_for_each_entry_safe(nft_merge, nft_m_tmp, &ct_entry->children,
+                                        nft_flow_list) {
+                       nfp_fl_ct_sub_stats(nft_merge, CT_TYPE_NFT,
+                                           &pkts, &bytes, &used);
+               }
+       }
+
+       /* Add stats from this request to stats potentially cached by
+        * previous requests.
+        */
+       flow_stats_update(&ct_entry->stats, bytes, pkts, 0, used,
+                         FLOW_ACTION_HW_STATS_DELAYED);
+       /* Finally update the flow stats from the original stats request */
+       flow_stats_update(&flow->stats, ct_entry->stats.bytes,
+                         ct_entry->stats.pkts, 0,
+                         ct_entry->stats.lastused,
+                         FLOW_ACTION_HW_STATS_DELAYED);
+       /* Stats has been synced to original flow, can now clear
+        * the cache.
+        */
+       ct_entry->stats.pkts = 0;
+       ct_entry->stats.bytes = 0;
+       spin_unlock_bh(&ct_entry->zt->priv->stats_lock);
+
+       return 0;
+}
+
 static int
 nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow)
 {
@@ -1080,7 +1690,11 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl
                                                    nfp_ct_map_params);
                return nfp_fl_ct_del_flow(ct_map_ent);
        case FLOW_CLS_STATS:
-               return 0;
+               ct_map_ent = rhashtable_lookup_fast(&zt->priv->ct_map_table, &flow->cookie,
+                                                   nfp_ct_map_params);
+               if (ct_map_ent)
+                       return nfp_fl_ct_stats(flow, ct_map_ent);
+               break;
        default:
                break;
        }
index 170b6cd..beb6cce 100644 (file)
@@ -83,6 +83,24 @@ enum ct_entry_type {
        CT_TYPE_PRE_CT,
        CT_TYPE_NFT,
        CT_TYPE_POST_CT,
+       _CT_TYPE_MAX,
+};
+
+enum nfp_nfp_layer_name {
+       FLOW_PAY_META_TCI =    0,
+       FLOW_PAY_INPORT,
+       FLOW_PAY_EXT_META,
+       FLOW_PAY_MAC_MPLS,
+       FLOW_PAY_L4,
+       FLOW_PAY_IPV4,
+       FLOW_PAY_IPV6,
+       FLOW_PAY_CT,
+       FLOW_PAY_GRE,
+       FLOW_PAY_QINQ,
+       FLOW_PAY_UDP_TUN,
+       FLOW_PAY_GENEVE_OPT,
+
+       _FLOW_PAY_LAYERS_MAX
 };
 
 /**
@@ -228,4 +246,12 @@ int nfp_fl_ct_del_flow(struct nfp_fl_ct_map_entry *ct_map_ent);
  */
 int nfp_fl_ct_handle_nft_flow(enum tc_setup_type type, void *type_data,
                              void *cb_priv);
+
+/**
+ * nfp_fl_ct_stats() - Handle flower stats callbacks for ct flows
+ * @flow:      TC flower classifier offload structure.
+ * @ct_map_ent:        ct map entry for the flow that needs deleting
+ */
+int nfp_fl_ct_stats(struct flow_cls_offload *flow,
+                   struct nfp_fl_ct_map_entry *ct_map_ent);
 #endif
index 0fbd682..917c450 100644 (file)
@@ -413,20 +413,73 @@ int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
 int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
                                     struct nfp_fl_payload *sub_flow1,
                                     struct nfp_fl_payload *sub_flow2);
+void
+nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext,
+                       struct nfp_flower_meta_tci *msk, u8 key_type);
+void
+nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext,
+                      struct nfp_flower_meta_tci *msk,
+                      struct flow_rule *rule);
+void
+nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext);
+int
+nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
+                       bool mask_version, enum nfp_flower_tun_type tun_type,
+                       struct netlink_ext_ack *extack);
+void
+nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
+                      struct nfp_flower_mac_mpls *msk,
+                      struct flow_rule *rule);
+int
+nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext,
+                       struct nfp_flower_mac_mpls *msk,
+                       struct flow_rule *rule,
+                       struct netlink_ext_ack *extack);
+void
+nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
+                        struct nfp_flower_tp_ports *msk,
+                        struct flow_rule *rule);
+void
+nfp_flower_compile_vlan(struct nfp_flower_vlan *ext,
+                       struct nfp_flower_vlan *msk,
+                       struct flow_rule *rule);
+void
+nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
+                       struct nfp_flower_ipv4 *msk, struct flow_rule *rule);
+void
+nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
+                       struct nfp_flower_ipv6 *msk, struct flow_rule *rule);
+void
+nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule);
+void
+nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
+                               struct nfp_flower_ipv4_gre_tun *msk,
+                               struct flow_rule *rule);
+void
+nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
+                               struct nfp_flower_ipv4_udp_tun *msk,
+                               struct flow_rule *rule);
+void
+nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext,
+                               struct nfp_flower_ipv6_udp_tun *msk,
+                               struct flow_rule *rule);
+void
+nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext,
+                               struct nfp_flower_ipv6_gre_tun *msk,
+                               struct flow_rule *rule);
 int nfp_flower_compile_flow_match(struct nfp_app *app,
-                                 struct flow_cls_offload *flow,
+                                 struct flow_rule *rule,
                                  struct nfp_fl_key_ls *key_ls,
                                  struct net_device *netdev,
                                  struct nfp_fl_payload *nfp_flow,
                                  enum nfp_flower_tun_type tun_type,
                                  struct netlink_ext_ack *extack);
 int nfp_flower_compile_action(struct nfp_app *app,
-                             struct flow_cls_offload *flow,
+                             struct flow_rule *rule,
                              struct net_device *netdev,
                              struct nfp_fl_payload *nfp_flow,
                              struct netlink_ext_ack *extack);
-int nfp_compile_flow_metadata(struct nfp_app *app,
-                             struct flow_cls_offload *flow,
+int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie,
                              struct nfp_fl_payload *nfp_flow,
                              struct net_device *netdev,
                              struct netlink_ext_ack *extack);
@@ -498,4 +551,22 @@ int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app,
                                 struct nfp_fl_payload *flow);
 int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app,
                                     struct nfp_fl_payload *flow);
+
+struct nfp_fl_payload *
+nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer);
+int nfp_flower_calculate_key_layers(struct nfp_app *app,
+                                   struct net_device *netdev,
+                                   struct nfp_fl_key_ls *ret_key_ls,
+                                   struct flow_rule *flow,
+                                   enum nfp_flower_tun_type *tun_type,
+                                   struct netlink_ext_ack *extack);
+void
+nfp_flower_del_linked_merge_flows(struct nfp_app *app,
+                                 struct nfp_fl_payload *sub_flow);
+int
+nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow,
+                    u8 mtype);
+void
+nfp_flower_update_merge_stats(struct nfp_app *app,
+                             struct nfp_fl_payload *sub_flow);
 #endif
index 255a4df..9d86eea 100644 (file)
@@ -7,51 +7,68 @@
 #include "cmsg.h"
 #include "main.h"
 
-static void
-nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
-                           struct nfp_flower_meta_tci *msk,
-                           struct flow_rule *rule, u8 key_type, bool qinq_sup)
+void
+nfp_flower_compile_meta(struct nfp_flower_meta_tci *ext,
+                       struct nfp_flower_meta_tci *msk, u8 key_type)
 {
-       u16 tmp_tci;
-
-       memset(ext, 0, sizeof(struct nfp_flower_meta_tci));
-       memset(msk, 0, sizeof(struct nfp_flower_meta_tci));
-
        /* Populate the metadata frame. */
        ext->nfp_flow_key_layer = key_type;
        ext->mask_id = ~0;
 
        msk->nfp_flow_key_layer = key_type;
        msk->mask_id = ~0;
+}
 
-       if (!qinq_sup && flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+void
+nfp_flower_compile_tci(struct nfp_flower_meta_tci *ext,
+                      struct nfp_flower_meta_tci *msk,
+                      struct flow_rule *rule)
+{
+       u16 msk_tci, key_tci;
+
+       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
                struct flow_match_vlan match;
 
                flow_rule_match_vlan(rule, &match);
                /* Populate the tci field. */
-               tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
-               tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+               key_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+               key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
                                      match.key->vlan_priority) |
                           FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
                                      match.key->vlan_id);
-               ext->tci = cpu_to_be16(tmp_tci);
 
-               tmp_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
-               tmp_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+               msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+               msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
                                      match.mask->vlan_priority) |
                           FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
                                      match.mask->vlan_id);
-               msk->tci = cpu_to_be16(tmp_tci);
+
+               ext->tci |= cpu_to_be16((key_tci & msk_tci));
+               msk->tci |= cpu_to_be16(msk_tci);
        }
 }
 
 static void
+nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
+                           struct nfp_flower_meta_tci *msk,
+                           struct flow_rule *rule, u8 key_type, bool qinq_sup)
+{
+       memset(ext, 0, sizeof(struct nfp_flower_meta_tci));
+       memset(msk, 0, sizeof(struct nfp_flower_meta_tci));
+
+       nfp_flower_compile_meta(ext, msk, key_type);
+
+       if (!qinq_sup)
+               nfp_flower_compile_tci(ext, msk, rule);
+}
+
+void
 nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext)
 {
        frame->nfp_flow_key_layer2 = cpu_to_be32(key_ext);
 }
 
-static int
+int
 nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
                        bool mask_version, enum nfp_flower_tun_type tun_type,
                        struct netlink_ext_ack *extack)
@@ -74,28 +91,37 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
        return 0;
 }
 
-static int
+void
 nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
-                      struct nfp_flower_mac_mpls *msk, struct flow_rule *rule,
-                      struct netlink_ext_ack *extack)
+                      struct nfp_flower_mac_mpls *msk,
+                      struct flow_rule *rule)
 {
-       memset(ext, 0, sizeof(struct nfp_flower_mac_mpls));
-       memset(msk, 0, sizeof(struct nfp_flower_mac_mpls));
-
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
                struct flow_match_eth_addrs match;
+               int i;
 
                flow_rule_match_eth_addrs(rule, &match);
                /* Populate mac frame. */
-               ether_addr_copy(ext->mac_dst, &match.key->dst[0]);
-               ether_addr_copy(ext->mac_src, &match.key->src[0]);
-               ether_addr_copy(msk->mac_dst, &match.mask->dst[0]);
-               ether_addr_copy(msk->mac_src, &match.mask->src[0]);
+               for (i = 0; i < ETH_ALEN; i++) {
+                       ext->mac_dst[i] |= match.key->dst[i] &
+                                          match.mask->dst[i];
+                       msk->mac_dst[i] |= match.mask->dst[i];
+                       ext->mac_src[i] |= match.key->src[i] &
+                                          match.mask->src[i];
+                       msk->mac_src[i] |= match.mask->src[i];
+               }
        }
+}
 
+int
+nfp_flower_compile_mpls(struct nfp_flower_mac_mpls *ext,
+                       struct nfp_flower_mac_mpls *msk,
+                       struct flow_rule *rule,
+                       struct netlink_ext_ack *extack)
+{
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) {
                struct flow_match_mpls match;
-               u32 t_mpls;
+               u32 key_mpls, msk_mpls;
 
                flow_rule_match_mpls(rule, &match);
 
@@ -106,22 +132,24 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
                        return -EOPNOTSUPP;
                }
 
-               t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB,
-                                   match.key->ls[0].mpls_label) |
-                        FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC,
-                                   match.key->ls[0].mpls_tc) |
-                        FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS,
-                                   match.key->ls[0].mpls_bos) |
-                        NFP_FLOWER_MASK_MPLS_Q;
-               ext->mpls_lse = cpu_to_be32(t_mpls);
-               t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB,
-                                   match.mask->ls[0].mpls_label) |
-                        FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC,
-                                   match.mask->ls[0].mpls_tc) |
-                        FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS,
-                                   match.mask->ls[0].mpls_bos) |
-                        NFP_FLOWER_MASK_MPLS_Q;
-               msk->mpls_lse = cpu_to_be32(t_mpls);
+               key_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB,
+                                     match.key->ls[0].mpls_label) |
+                          FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC,
+                                     match.key->ls[0].mpls_tc) |
+                          FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS,
+                                     match.key->ls[0].mpls_bos) |
+                          NFP_FLOWER_MASK_MPLS_Q;
+
+               msk_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB,
+                                     match.mask->ls[0].mpls_label) |
+                          FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC,
+                                     match.mask->ls[0].mpls_tc) |
+                          FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS,
+                                     match.mask->ls[0].mpls_bos) |
+                          NFP_FLOWER_MASK_MPLS_Q;
+
+               ext->mpls_lse |= cpu_to_be32((key_mpls & msk_mpls));
+               msk->mpls_lse |= cpu_to_be32(msk_mpls);
        } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
                /* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q
                 * bit, which indicates an mpls ether type but without any
@@ -132,30 +160,41 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
                flow_rule_match_basic(rule, &match);
                if (match.key->n_proto == cpu_to_be16(ETH_P_MPLS_UC) ||
                    match.key->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) {
-                       ext->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
-                       msk->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
+                       ext->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
+                       msk->mpls_lse |= cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
                }
        }
 
        return 0;
 }
 
-static void
+static int
+nfp_flower_compile_mac_mpls(struct nfp_flower_mac_mpls *ext,
+                           struct nfp_flower_mac_mpls *msk,
+                           struct flow_rule *rule,
+                           struct netlink_ext_ack *extack)
+{
+       memset(ext, 0, sizeof(struct nfp_flower_mac_mpls));
+       memset(msk, 0, sizeof(struct nfp_flower_mac_mpls));
+
+       nfp_flower_compile_mac(ext, msk, rule);
+
+       return nfp_flower_compile_mpls(ext, msk, rule, extack);
+}
+
+void
 nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
                         struct nfp_flower_tp_ports *msk,
                         struct flow_rule *rule)
 {
-       memset(ext, 0, sizeof(struct nfp_flower_tp_ports));
-       memset(msk, 0, sizeof(struct nfp_flower_tp_ports));
-
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
                struct flow_match_ports match;
 
                flow_rule_match_ports(rule, &match);
-               ext->port_src = match.key->src;
-               ext->port_dst = match.key->dst;
-               msk->port_src = match.mask->src;
-               msk->port_dst = match.mask->dst;
+               ext->port_src |= match.key->src & match.mask->src;
+               ext->port_dst |= match.key->dst & match.mask->dst;
+               msk->port_src |= match.mask->src;
+               msk->port_dst |= match.mask->dst;
        }
 }
 
@@ -167,18 +206,18 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
                struct flow_match_basic match;
 
                flow_rule_match_basic(rule, &match);
-               ext->proto = match.key->ip_proto;
-               msk->proto = match.mask->ip_proto;
+               ext->proto |= match.key->ip_proto & match.mask->ip_proto;
+               msk->proto |= match.mask->ip_proto;
        }
 
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
                struct flow_match_ip match;
 
                flow_rule_match_ip(rule, &match);
-               ext->tos = match.key->tos;
-               ext->ttl = match.key->ttl;
-               msk->tos = match.mask->tos;
-               msk->ttl = match.mask->ttl;
+               ext->tos |= match.key->tos & match.mask->tos;
+               ext->ttl |= match.key->ttl & match.mask->ttl;
+               msk->tos |= match.mask->tos;
+               msk->ttl |= match.mask->ttl;
        }
 
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
@@ -231,99 +270,108 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
 }
 
 static void
-nfp_flower_fill_vlan(struct flow_dissector_key_vlan *key,
-                    struct nfp_flower_vlan *frame,
-                    bool outer_vlan)
+nfp_flower_fill_vlan(struct flow_match_vlan *match,
+                    struct nfp_flower_vlan *ext,
+                    struct nfp_flower_vlan *msk, bool outer_vlan)
 {
-       u16 tci;
-
-       tci = NFP_FLOWER_MASK_VLAN_PRESENT;
-       tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
-                         key->vlan_priority) |
-              FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
-                         key->vlan_id);
+       struct flow_dissector_key_vlan *mask = match->mask;
+       struct flow_dissector_key_vlan *key = match->key;
+       u16 msk_tci, key_tci;
+
+       key_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+       key_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+                             key->vlan_priority) |
+                  FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+                             key->vlan_id);
+       msk_tci = NFP_FLOWER_MASK_VLAN_PRESENT;
+       msk_tci |= FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+                             mask->vlan_priority) |
+                  FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+                             mask->vlan_id);
 
        if (outer_vlan) {
-               frame->outer_tci = cpu_to_be16(tci);
-               frame->outer_tpid = key->vlan_tpid;
+               ext->outer_tci |= cpu_to_be16((key_tci & msk_tci));
+               ext->outer_tpid |= key->vlan_tpid & mask->vlan_tpid;
+               msk->outer_tci |= cpu_to_be16(msk_tci);
+               msk->outer_tpid |= mask->vlan_tpid;
        } else {
-               frame->inner_tci = cpu_to_be16(tci);
-               frame->inner_tpid = key->vlan_tpid;
+               ext->inner_tci |= cpu_to_be16((key_tci & msk_tci));
+               ext->inner_tpid |= key->vlan_tpid & mask->vlan_tpid;
+               msk->inner_tci |= cpu_to_be16(msk_tci);
+               msk->inner_tpid |= mask->vlan_tpid;
        }
 }
 
-static void
+void
 nfp_flower_compile_vlan(struct nfp_flower_vlan *ext,
                        struct nfp_flower_vlan *msk,
                        struct flow_rule *rule)
 {
        struct flow_match_vlan match;
 
-       memset(ext, 0, sizeof(struct nfp_flower_vlan));
-       memset(msk, 0, sizeof(struct nfp_flower_vlan));
-
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
                flow_rule_match_vlan(rule, &match);
-               nfp_flower_fill_vlan(match.key, ext, true);
-               nfp_flower_fill_vlan(match.mask, msk, true);
+               nfp_flower_fill_vlan(&match, ext, msk, true);
        }
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
                flow_rule_match_cvlan(rule, &match);
-               nfp_flower_fill_vlan(match.key, ext, false);
-               nfp_flower_fill_vlan(match.mask, msk, false);
+               nfp_flower_fill_vlan(&match, ext, msk, false);
        }
 }
 
-static void
+void
 nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
                        struct nfp_flower_ipv4 *msk, struct flow_rule *rule)
 {
-       struct flow_match_ipv4_addrs match;
-
-       memset(ext, 0, sizeof(struct nfp_flower_ipv4));
-       memset(msk, 0, sizeof(struct nfp_flower_ipv4));
-
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+               struct flow_match_ipv4_addrs match;
+
                flow_rule_match_ipv4_addrs(rule, &match);
-               ext->ipv4_src = match.key->src;
-               ext->ipv4_dst = match.key->dst;
-               msk->ipv4_src = match.mask->src;
-               msk->ipv4_dst = match.mask->dst;
+               ext->ipv4_src |= match.key->src & match.mask->src;
+               ext->ipv4_dst |= match.key->dst & match.mask->dst;
+               msk->ipv4_src |= match.mask->src;
+               msk->ipv4_dst |= match.mask->dst;
        }
 
        nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
 }
 
-static void
+void
 nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
                        struct nfp_flower_ipv6 *msk, struct flow_rule *rule)
 {
-       memset(ext, 0, sizeof(struct nfp_flower_ipv6));
-       memset(msk, 0, sizeof(struct nfp_flower_ipv6));
-
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
                struct flow_match_ipv6_addrs match;
+               int i;
 
                flow_rule_match_ipv6_addrs(rule, &match);
-               ext->ipv6_src = match.key->src;
-               ext->ipv6_dst = match.key->dst;
-               msk->ipv6_src = match.mask->src;
-               msk->ipv6_dst = match.mask->dst;
+               for (i = 0; i < sizeof(ext->ipv6_src); i++) {
+                       ext->ipv6_src.s6_addr[i] |= match.key->src.s6_addr[i] &
+                                                   match.mask->src.s6_addr[i];
+                       ext->ipv6_dst.s6_addr[i] |= match.key->dst.s6_addr[i] &
+                                                   match.mask->dst.s6_addr[i];
+                       msk->ipv6_src.s6_addr[i] |= match.mask->src.s6_addr[i];
+                       msk->ipv6_dst.s6_addr[i] |= match.mask->dst.s6_addr[i];
+               }
        }
 
        nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
 }
 
-static int
-nfp_flower_compile_geneve_opt(void *ext, void *msk, struct flow_rule *rule)
+void
+nfp_flower_compile_geneve_opt(u8 *ext, u8 *msk, struct flow_rule *rule)
 {
        struct flow_match_enc_opts match;
+       int i;
 
-       flow_rule_match_enc_opts(rule, &match);
-       memcpy(ext, match.key->data, match.key->len);
-       memcpy(msk, match.mask->data, match.mask->len);
+       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+               flow_rule_match_enc_opts(rule, &match);
 
-       return 0;
+               for (i = 0; i < match.mask->len; i++) {
+                       ext[i] |= match.key->data[i] & match.mask->data[i];
+                       msk[i] |= match.mask->data[i];
+               }
+       }
 }
 
 static void
@@ -335,10 +383,10 @@ nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext,
                struct flow_match_ipv4_addrs match;
 
                flow_rule_match_enc_ipv4_addrs(rule, &match);
-               ext->src = match.key->src;
-               ext->dst = match.key->dst;
-               msk->src = match.mask->src;
-               msk->dst = match.mask->dst;
+               ext->src |= match.key->src & match.mask->src;
+               ext->dst |= match.key->dst & match.mask->dst;
+               msk->src |= match.mask->src;
+               msk->dst |= match.mask->dst;
        }
 }
 
@@ -349,12 +397,17 @@ nfp_flower_compile_tun_ipv6_addrs(struct nfp_flower_tun_ipv6 *ext,
 {
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
                struct flow_match_ipv6_addrs match;
+               int i;
 
                flow_rule_match_enc_ipv6_addrs(rule, &match);
-               ext->src = match.key->src;
-               ext->dst = match.key->dst;
-               msk->src = match.mask->src;
-               msk->dst = match.mask->dst;
+               for (i = 0; i < sizeof(ext->src); i++) {
+                       ext->src.s6_addr[i] |= match.key->src.s6_addr[i] &
+                                              match.mask->src.s6_addr[i];
+                       ext->dst.s6_addr[i] |= match.key->dst.s6_addr[i] &
+                                              match.mask->dst.s6_addr[i];
+                       msk->src.s6_addr[i] |= match.mask->src.s6_addr[i];
+                       msk->dst.s6_addr[i] |= match.mask->dst.s6_addr[i];
+               }
        }
 }
 
@@ -367,10 +420,10 @@ nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext,
                struct flow_match_ip match;
 
                flow_rule_match_enc_ip(rule, &match);
-               ext->tos = match.key->tos;
-               ext->ttl = match.key->ttl;
-               msk->tos = match.mask->tos;
-               msk->ttl = match.mask->ttl;
+               ext->tos |= match.key->tos & match.mask->tos;
+               ext->ttl |= match.key->ttl & match.mask->ttl;
+               msk->tos |= match.mask->tos;
+               msk->ttl |= match.mask->ttl;
        }
 }
 
@@ -383,10 +436,11 @@ nfp_flower_compile_tun_udp_key(__be32 *key, __be32 *key_msk,
                u32 vni;
 
                flow_rule_match_enc_keyid(rule, &match);
-               vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET;
-               *key = cpu_to_be32(vni);
+               vni = be32_to_cpu((match.key->keyid & match.mask->keyid)) <<
+                     NFP_FL_TUN_VNI_OFFSET;
+               *key |= cpu_to_be32(vni);
                vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET;
-               *key_msk = cpu_to_be32(vni);
+               *key_msk |= cpu_to_be32(vni);
        }
 }
 
@@ -398,22 +452,19 @@ nfp_flower_compile_tun_gre_key(__be32 *key, __be32 *key_msk, __be16 *flags,
                struct flow_match_enc_keyid match;
 
                flow_rule_match_enc_keyid(rule, &match);
-               *key = match.key->keyid;
-               *key_msk = match.mask->keyid;
+               *key |= match.key->keyid & match.mask->keyid;
+               *key_msk |= match.mask->keyid;
 
                *flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
                *flags_msk = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
        }
 }
 
-static void
+void
 nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
                                struct nfp_flower_ipv4_gre_tun *msk,
                                struct flow_rule *rule)
 {
-       memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
-       memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
-
        /* NVGRE is the only supported GRE tunnel type */
        ext->ethertype = cpu_to_be16(ETH_P_TEB);
        msk->ethertype = cpu_to_be16(~0);
@@ -424,40 +475,31 @@ nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
                                       &ext->tun_flags, &msk->tun_flags, rule);
 }
 
-static void
+void
 nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
                                struct nfp_flower_ipv4_udp_tun *msk,
                                struct flow_rule *rule)
 {
-       memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
-       memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
-
        nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule);
        nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
        nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule);
 }
 
-static void
+void
 nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext,
                                struct nfp_flower_ipv6_udp_tun *msk,
                                struct flow_rule *rule)
 {
-       memset(ext, 0, sizeof(struct nfp_flower_ipv6_udp_tun));
-       memset(msk, 0, sizeof(struct nfp_flower_ipv6_udp_tun));
-
        nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule);
        nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
        nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule);
 }
 
-static void
+void
 nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext,
                                struct nfp_flower_ipv6_gre_tun *msk,
                                struct flow_rule *rule)
 {
-       memset(ext, 0, sizeof(struct nfp_flower_ipv6_gre_tun));
-       memset(msk, 0, sizeof(struct nfp_flower_ipv6_gre_tun));
-
        /* NVGRE is the only supported GRE tunnel type */
        ext->ethertype = cpu_to_be16(ETH_P_TEB);
        msk->ethertype = cpu_to_be16(~0);
@@ -469,14 +511,13 @@ nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext,
 }
 
 int nfp_flower_compile_flow_match(struct nfp_app *app,
-                                 struct flow_cls_offload *flow,
+                                 struct flow_rule *rule,
                                  struct nfp_fl_key_ls *key_ls,
                                  struct net_device *netdev,
                                  struct nfp_fl_payload *nfp_flow,
                                  enum nfp_flower_tun_type tun_type,
                                  struct netlink_ext_ack *extack)
 {
-       struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
        struct nfp_flower_priv *priv = app->priv;
        bool qinq_sup;
        u32 port_id;
@@ -527,9 +568,9 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
        msk += sizeof(struct nfp_flower_in_port);
 
        if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) {
-               err = nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext,
-                                            (struct nfp_flower_mac_mpls *)msk,
-                                            rule, extack);
+               err = nfp_flower_compile_mac_mpls((struct nfp_flower_mac_mpls *)ext,
+                                                 (struct nfp_flower_mac_mpls *)msk,
+                                                 rule, extack);
                if (err)
                        return err;
 
@@ -640,9 +681,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
                }
 
                if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
-                       err = nfp_flower_compile_geneve_opt(ext, msk, rule);
-                       if (err)
-                               return err;
+                       nfp_flower_compile_geneve_opt(ext, msk, rule);
                }
        }
 
index 6211136..2af9fae 100644 (file)
@@ -290,8 +290,7 @@ nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len,
        return true;
 }
 
-int nfp_compile_flow_metadata(struct nfp_app *app,
-                             struct flow_cls_offload *flow,
+int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie,
                              struct nfp_fl_payload *nfp_flow,
                              struct net_device *netdev,
                              struct netlink_ext_ack *extack)
@@ -310,7 +309,7 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
        }
 
        nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt);
-       nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie);
+       nfp_flow->meta.host_cookie = cpu_to_be64(cookie);
        nfp_flow->ingress_dev = netdev;
 
        ctx_entry = kzalloc(sizeof(*ctx_entry), GFP_KERNEL);
@@ -357,7 +356,7 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
        priv->stats[stats_cxt].bytes = 0;
        priv->stats[stats_cxt].used = jiffies;
 
-       check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev);
+       check_entry = nfp_flower_search_fl_table(app, cookie, netdev);
        if (check_entry) {
                NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot offload duplicate flow entry");
                if (nfp_release_stats_entry(app, stats_cxt)) {
index 2406d33..556c349 100644 (file)
@@ -41,6 +41,8 @@
         BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | \
         BIT(FLOW_DISSECTOR_KEY_ENC_IP) | \
         BIT(FLOW_DISSECTOR_KEY_MPLS) | \
+        BIT(FLOW_DISSECTOR_KEY_CT) | \
+        BIT(FLOW_DISSECTOR_KEY_META) | \
         BIT(FLOW_DISSECTOR_KEY_IP))
 
 #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \
@@ -89,7 +91,7 @@ struct nfp_flower_merge_check {
        };
 };
 
-static int
+int
 nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow,
                     u8 mtype)
 {
@@ -134,20 +136,16 @@ nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow,
        return 0;
 }
 
-static bool nfp_flower_check_higher_than_mac(struct flow_cls_offload *f)
+static bool nfp_flower_check_higher_than_mac(struct flow_rule *rule)
 {
-       struct flow_rule *rule = flow_cls_offload_flow_rule(f);
-
        return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) ||
               flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) ||
               flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) ||
               flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP);
 }
 
-static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f)
+static bool nfp_flower_check_higher_than_l3(struct flow_rule *rule)
 {
-       struct flow_rule *rule = flow_cls_offload_flow_rule(f);
-
        return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) ||
               flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP);
 }
@@ -236,15 +234,14 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
        return 0;
 }
 
-static int
+int
 nfp_flower_calculate_key_layers(struct nfp_app *app,
                                struct net_device *netdev,
                                struct nfp_fl_key_ls *ret_key_ls,
-                               struct flow_cls_offload *flow,
+                               struct flow_rule *rule,
                                enum nfp_flower_tun_type *tun_type,
                                struct netlink_ext_ack *extack)
 {
-       struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
        struct flow_dissector *dissector = rule->match.dissector;
        struct flow_match_basic basic = { NULL, NULL};
        struct nfp_flower_priv *priv = app->priv;
@@ -452,7 +449,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
                        NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on given EtherType is not supported");
                        return -EOPNOTSUPP;
                }
-       } else if (nfp_flower_check_higher_than_mac(flow)) {
+       } else if (nfp_flower_check_higher_than_mac(rule)) {
                NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match above L2 without specified EtherType");
                return -EOPNOTSUPP;
        }
@@ -471,7 +468,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
        }
 
        if (!(key_layer & NFP_FLOWER_LAYER_TP) &&
-           nfp_flower_check_higher_than_l3(flow)) {
+           nfp_flower_check_higher_than_l3(rule)) {
                NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot match on L4 information without specified IP protocol type");
                return -EOPNOTSUPP;
        }
@@ -543,7 +540,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
        return 0;
 }
 
-static struct nfp_fl_payload *
+struct nfp_fl_payload *
 nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
 {
        struct nfp_fl_payload *flow_pay;
@@ -1005,9 +1002,7 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
                                     struct nfp_fl_payload *sub_flow1,
                                     struct nfp_fl_payload *sub_flow2)
 {
-       struct flow_cls_offload merge_tc_off;
        struct nfp_flower_priv *priv = app->priv;
-       struct netlink_ext_ack *extack = NULL;
        struct nfp_fl_payload *merge_flow;
        struct nfp_fl_key_ls merge_key_ls;
        struct nfp_merge_info *merge_info;
@@ -1016,7 +1011,6 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
 
        ASSERT_RTNL();
 
-       extack = merge_tc_off.common.extack;
        if (sub_flow1 == sub_flow2 ||
            nfp_flower_is_merge_flow(sub_flow1) ||
            nfp_flower_is_merge_flow(sub_flow2))
@@ -1061,9 +1055,8 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app,
        if (err)
                goto err_unlink_sub_flow1;
 
-       merge_tc_off.cookie = merge_flow->tc_flower_cookie;
-       err = nfp_compile_flow_metadata(app, &merge_tc_off, merge_flow,
-                                       merge_flow->ingress_dev, extack);
+       err = nfp_compile_flow_metadata(app, merge_flow->tc_flower_cookie, merge_flow,
+                                       merge_flow->ingress_dev, NULL);
        if (err)
                goto err_unlink_sub_flow2;
 
@@ -1305,6 +1298,7 @@ static int
 nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
                       struct flow_cls_offload *flow)
 {
+       struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
        enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
        struct nfp_flower_priv *priv = app->priv;
        struct netlink_ext_ack *extack = NULL;
@@ -1330,7 +1324,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
        if (!key_layer)
                return -ENOMEM;
 
-       err = nfp_flower_calculate_key_layers(app, netdev, key_layer, flow,
+       err = nfp_flower_calculate_key_layers(app, netdev, key_layer, rule,
                                              &tun_type, extack);
        if (err)
                goto err_free_key_ls;
@@ -1341,12 +1335,12 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
                goto err_free_key_ls;
        }
 
-       err = nfp_flower_compile_flow_match(app, flow, key_layer, netdev,
+       err = nfp_flower_compile_flow_match(app, rule, key_layer, netdev,
                                            flow_pay, tun_type, extack);
        if (err)
                goto err_destroy_flow;
 
-       err = nfp_flower_compile_action(app, flow, netdev, flow_pay, extack);
+       err = nfp_flower_compile_action(app, rule, netdev, flow_pay, extack);
        if (err)
                goto err_destroy_flow;
 
@@ -1356,7 +1350,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
                        goto err_destroy_flow;
        }
 
-       err = nfp_compile_flow_metadata(app, flow, flow_pay, netdev, extack);
+       err = nfp_compile_flow_metadata(app, flow->cookie, flow_pay, netdev, extack);
        if (err)
                goto err_destroy_flow;
 
@@ -1476,7 +1470,7 @@ err_free_links:
        kfree_rcu(merge_flow, rcu);
 }
 
-static void
+void
 nfp_flower_del_linked_merge_flows(struct nfp_app *app,
                                  struct nfp_fl_payload *sub_flow)
 {
@@ -1601,7 +1595,7 @@ __nfp_flower_update_merge_stats(struct nfp_app *app,
        }
 }
 
-static void
+void
 nfp_flower_update_merge_stats(struct nfp_app *app,
                              struct nfp_fl_payload *sub_flow)
 {
@@ -1628,10 +1622,17 @@ nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
                     struct flow_cls_offload *flow)
 {
        struct nfp_flower_priv *priv = app->priv;
+       struct nfp_fl_ct_map_entry *ct_map_ent;
        struct netlink_ext_ack *extack = NULL;
        struct nfp_fl_payload *nfp_flow;
        u32 ctx_id;
 
+       /* Check ct_map table first */
+       ct_map_ent = rhashtable_lookup_fast(&priv->ct_map_table, &flow->cookie,
+                                           nfp_ct_map_params);
+       if (ct_map_ent)
+               return nfp_fl_ct_stats(flow, ct_map_ent);
+
        extack = flow->common.extack;
        nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev);
        if (!nfp_flow) {
index 742a420..bb3b8a7 100644 (file)
@@ -692,7 +692,7 @@ static int nfp_pci_probe(struct pci_dev *pdev,
                goto err_pci_disable;
        }
 
-       devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf));
+       devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf), &pdev->dev);
        if (!devlink) {
                err = -ENOMEM;
                goto err_rel_regions;
index df5b748..df20373 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
+#include <linux/dim.h>
 #include <linux/io-64-nonatomic-hi-lo.h>
 #include <linux/semaphore.h>
 #include <linux/workqueue.h>
@@ -360,6 +361,9 @@ struct nfp_net_rx_ring {
  * @rx_ring:        Pointer to RX ring
  * @xdp_ring:      Pointer to an extra TX ring for XDP
  * @irq_entry:      MSI-X table entry (use for talking to the device)
+ * @event_ctr:     Number of interrupt
+ * @rx_dim:        Dynamic interrupt moderation structure for RX
+ * @tx_dim:        Dynamic interrupt moderation structure for TX
  * @rx_sync:       Seqlock for atomic updates of RX stats
  * @rx_pkts:        Number of received packets
  * @rx_bytes:      Number of received bytes
@@ -410,6 +414,10 @@ struct nfp_net_r_vector {
 
        u16 irq_entry;
 
+       u16 event_ctr;
+       struct dim rx_dim;
+       struct dim tx_dim;
+
        struct u64_stats_sync rx_sync;
        u64 rx_pkts;
        u64 rx_bytes;
@@ -571,6 +579,8 @@ struct nfp_net_dp {
  *                     mailbox area, crypto TLV
  * @link_up:            Is the link up?
  * @link_status_lock:  Protects @link_* and ensures atomicity with BAR reading
+ * @rx_coalesce_adapt_on:   Is RX interrupt moderation adaptive?
+ * @tx_coalesce_adapt_on:   Is TX interrupt moderation adaptive?
  * @rx_coalesce_usecs:      RX interrupt moderation usecs delay parameter
  * @rx_coalesce_max_frames: RX interrupt moderation frame count parameter
  * @tx_coalesce_usecs:      TX interrupt moderation usecs delay parameter
@@ -654,6 +664,8 @@ struct nfp_net {
 
        struct semaphore bar_lock;
 
+       bool rx_coalesce_adapt_on;
+       bool tx_coalesce_adapt_on;
        u32 rx_coalesce_usecs;
        u32 rx_coalesce_max_frames;
        u32 tx_coalesce_usecs;
@@ -919,6 +931,14 @@ static inline bool nfp_netdev_is_nfp_net(struct net_device *netdev)
        return netdev->netdev_ops == &nfp_net_netdev_ops;
 }
 
+static inline int nfp_net_coalesce_para_check(u32 usecs, u32 pkts)
+{
+       if ((usecs >= ((1 << 16) - 1)) || (pkts >= ((1 << 16) - 1)))
+               return -EINVAL;
+
+       return 0;
+}
+
 /* Prototypes */
 void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
                            void __iomem *ctrl_bar);
index 5dfa479..5bfa22a 100644 (file)
@@ -474,6 +474,12 @@ static irqreturn_t nfp_net_irq_rxtx(int irq, void *data)
 {
        struct nfp_net_r_vector *r_vec = data;
 
+       /* Currently we cannot tell if it's a rx or tx interrupt,
+        * since dim does not need accurate event_ctr to calculate,
+        * we just use this counter for both rx and tx dim.
+        */
+       r_vec->event_ctr++;
+
        napi_schedule_irqoff(&r_vec->napi);
 
        /* The FW auto-masks any interrupt, either via the MASK bit in
@@ -1697,7 +1703,7 @@ nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
                case NFP_NET_META_RESYNC_INFO:
                        if (nfp_net_tls_rx_resync_req(netdev, data, pkt,
                                                      pkt_len))
-                               return NULL;
+                               return false;
                        data += sizeof(struct nfp_net_tls_resync_req);
                        break;
                default:
@@ -2061,6 +2067,36 @@ static int nfp_net_poll(struct napi_struct *napi, int budget)
                if (napi_complete_done(napi, pkts_polled))
                        nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
 
+       if (r_vec->nfp_net->rx_coalesce_adapt_on) {
+               struct dim_sample dim_sample = {};
+               unsigned int start;
+               u64 pkts, bytes;
+
+               do {
+                       start = u64_stats_fetch_begin(&r_vec->rx_sync);
+                       pkts = r_vec->rx_pkts;
+                       bytes = r_vec->rx_bytes;
+               } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
+
+               dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
+               net_dim(&r_vec->rx_dim, dim_sample);
+       }
+
+       if (r_vec->nfp_net->tx_coalesce_adapt_on) {
+               struct dim_sample dim_sample = {};
+               unsigned int start;
+               u64 pkts, bytes;
+
+               do {
+                       start = u64_stats_fetch_begin(&r_vec->tx_sync);
+                       pkts = r_vec->tx_pkts;
+                       bytes = r_vec->tx_bytes;
+               } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
+
+               dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample);
+               net_dim(&r_vec->tx_dim, dim_sample);
+       }
+
        return pkts_polled;
 }
 
@@ -2873,6 +2909,7 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn)
  */
 static void nfp_net_close_stack(struct nfp_net *nn)
 {
+       struct nfp_net_r_vector *r_vec;
        unsigned int r;
 
        disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
@@ -2880,8 +2917,16 @@ static void nfp_net_close_stack(struct nfp_net *nn)
        nn->link_up = false;
 
        for (r = 0; r < nn->dp.num_r_vecs; r++) {
-               disable_irq(nn->r_vecs[r].irq_vector);
-               napi_disable(&nn->r_vecs[r].napi);
+               r_vec = &nn->r_vecs[r];
+
+               disable_irq(r_vec->irq_vector);
+               napi_disable(&r_vec->napi);
+
+               if (r_vec->rx_ring)
+                       cancel_work_sync(&r_vec->rx_dim.work);
+
+               if (r_vec->tx_ring)
+                       cancel_work_sync(&r_vec->tx_dim.work);
        }
 
        netif_tx_disable(nn->dp.netdev);
@@ -2948,17 +2993,92 @@ void nfp_ctrl_close(struct nfp_net *nn)
        rtnl_unlock();
 }
 
+static void nfp_net_rx_dim_work(struct work_struct *work)
+{
+       struct nfp_net_r_vector *r_vec;
+       unsigned int factor, value;
+       struct dim_cq_moder moder;
+       struct nfp_net *nn;
+       struct dim *dim;
+
+       dim = container_of(work, struct dim, work);
+       moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
+       r_vec = container_of(dim, struct nfp_net_r_vector, rx_dim);
+       nn = r_vec->nfp_net;
+
+       /* Compute factor used to convert coalesce '_usecs' parameters to
+        * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
+        * count.
+        */
+       factor = nn->tlv_caps.me_freq_mhz / 16;
+       if (nfp_net_coalesce_para_check(factor * moder.usec, moder.pkts))
+               return;
+
+       /* copy RX interrupt coalesce parameters */
+       value = (moder.pkts << 16) | (factor * moder.usec);
+       rtnl_lock();
+       nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(r_vec->rx_ring->idx), value);
+       (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
+       rtnl_unlock();
+
+       dim->state = DIM_START_MEASURE;
+}
+
+static void nfp_net_tx_dim_work(struct work_struct *work)
+{
+       struct nfp_net_r_vector *r_vec;
+       unsigned int factor, value;
+       struct dim_cq_moder moder;
+       struct nfp_net *nn;
+       struct dim *dim;
+
+       dim = container_of(work, struct dim, work);
+       moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
+       r_vec = container_of(dim, struct nfp_net_r_vector, tx_dim);
+       nn = r_vec->nfp_net;
+
+       /* Compute factor used to convert coalesce '_usecs' parameters to
+        * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
+        * count.
+        */
+       factor = nn->tlv_caps.me_freq_mhz / 16;
+       if (nfp_net_coalesce_para_check(factor * moder.usec, moder.pkts))
+               return;
+
+       /* copy TX interrupt coalesce parameters */
+       value = (moder.pkts << 16) | (factor * moder.usec);
+       rtnl_lock();
+       nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(r_vec->tx_ring->idx), value);
+       (void)nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
+       rtnl_unlock();
+
+       dim->state = DIM_START_MEASURE;
+}
+
 /**
  * nfp_net_open_stack() - Start the device from stack's perspective
  * @nn:      NFP Net device to reconfigure
  */
 static void nfp_net_open_stack(struct nfp_net *nn)
 {
+       struct nfp_net_r_vector *r_vec;
        unsigned int r;
 
        for (r = 0; r < nn->dp.num_r_vecs; r++) {
-               napi_enable(&nn->r_vecs[r].napi);
-               enable_irq(nn->r_vecs[r].irq_vector);
+               r_vec = &nn->r_vecs[r];
+
+               if (r_vec->rx_ring) {
+                       INIT_WORK(&r_vec->rx_dim.work, nfp_net_rx_dim_work);
+                       r_vec->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+               }
+
+               if (r_vec->tx_ring) {
+                       INIT_WORK(&r_vec->tx_dim.work, nfp_net_tx_dim_work);
+                       r_vec->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+               }
+
+               napi_enable(&r_vec->napi);
+               enable_irq(r_vec->irq_vector);
        }
 
        netif_tx_wake_all_queues(nn->dp.netdev);
@@ -3161,17 +3281,12 @@ static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp)
        for (r = 0; r < nn->max_r_vecs; r++)
                nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
 
-       err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings);
+       err = netif_set_real_num_queues(nn->dp.netdev,
+                                       nn->dp.num_stack_tx_rings,
+                                       nn->dp.num_rx_rings);
        if (err)
                return err;
 
-       if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) {
-               err = netif_set_real_num_tx_queues(nn->dp.netdev,
-                                                  nn->dp.num_stack_tx_rings);
-               if (err)
-                       return err;
-       }
-
        return nfp_net_set_config_and_enable(nn);
 }
 
@@ -3893,6 +4008,9 @@ static void nfp_net_irqmod_init(struct nfp_net *nn)
        nn->rx_coalesce_max_frames = 64;
        nn->tx_coalesce_usecs      = 50;
        nn->tx_coalesce_max_frames = 64;
+
+       nn->rx_coalesce_adapt_on   = true;
+       nn->tx_coalesce_adapt_on   = true;
 }
 
 static void nfp_net_netdev_init(struct nfp_net *nn)
index 8803faa..0685ece 100644 (file)
@@ -1078,13 +1078,18 @@ static void nfp_net_get_regs(struct net_device *netdev,
 }
 
 static int nfp_net_get_coalesce(struct net_device *netdev,
-                               struct ethtool_coalesce *ec)
+                               struct ethtool_coalesce *ec,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct nfp_net *nn = netdev_priv(netdev);
 
        if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD))
                return -EINVAL;
 
+       ec->use_adaptive_rx_coalesce = nn->rx_coalesce_adapt_on;
+       ec->use_adaptive_tx_coalesce = nn->tx_coalesce_adapt_on;
+
        ec->rx_coalesce_usecs       = nn->rx_coalesce_usecs;
        ec->rx_max_coalesced_frames = nn->rx_coalesce_max_frames;
        ec->tx_coalesce_usecs       = nn->tx_coalesce_usecs;
@@ -1327,7 +1332,9 @@ exit_close_nsp:
 }
 
 static int nfp_net_set_coalesce(struct net_device *netdev,
-                               struct ethtool_coalesce *ec)
+                               struct ethtool_coalesce *ec,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct nfp_net *nn = netdev_priv(netdev);
        unsigned int factor;
@@ -1361,19 +1368,18 @@ static int nfp_net_set_coalesce(struct net_device *netdev,
        if (!ec->tx_coalesce_usecs && !ec->tx_max_coalesced_frames)
                return -EINVAL;
 
-       if (ec->rx_coalesce_usecs * factor >= ((1 << 16) - 1))
-               return -EINVAL;
-
-       if (ec->tx_coalesce_usecs * factor >= ((1 << 16) - 1))
+       if (nfp_net_coalesce_para_check(ec->rx_coalesce_usecs * factor,
+                                       ec->rx_max_coalesced_frames))
                return -EINVAL;
 
-       if (ec->rx_max_coalesced_frames >= ((1 << 16) - 1))
-               return -EINVAL;
-
-       if (ec->tx_max_coalesced_frames >= ((1 << 16) - 1))
+       if (nfp_net_coalesce_para_check(ec->tx_coalesce_usecs * factor,
+                                       ec->tx_max_coalesced_frames))
                return -EINVAL;
 
        /* configuration is valid */
+       nn->rx_coalesce_adapt_on = !!ec->use_adaptive_rx_coalesce;
+       nn->tx_coalesce_adapt_on = !!ec->use_adaptive_tx_coalesce;
+
        nn->rx_coalesce_usecs      = ec->rx_coalesce_usecs;
        nn->rx_coalesce_max_frames = ec->rx_max_coalesced_frames;
        nn->tx_coalesce_usecs      = ec->tx_coalesce_usecs;
@@ -1445,7 +1451,8 @@ static int nfp_net_set_channels(struct net_device *netdev,
 
 static const struct ethtool_ops nfp_net_ethtool_ops = {
        .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
-                                    ETHTOOL_COALESCE_MAX_FRAMES,
+                                    ETHTOOL_COALESCE_MAX_FRAMES |
+                                    ETHTOOL_COALESCE_USE_ADAPTIVE,
        .get_drvinfo            = nfp_net_get_drvinfo,
        .get_link               = ethtool_op_get_link,
        .get_ringparam          = nfp_net_get_ringparam,
index 921db40..d10a938 100644 (file)
@@ -701,7 +701,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
        if (err)
                goto err_unmap;
 
-       err = devlink_register(devlink, &pf->pdev->dev);
+       err = devlink_register(devlink);
        if (err)
                goto err_app_clean;
 
index 2d097dc..346145d 100644 (file)
@@ -993,8 +993,11 @@ static void nixge_ethtools_get_drvinfo(struct net_device *ndev,
        strlcpy(ed->bus_info, "platform", sizeof(ed->bus_info));
 }
 
-static int nixge_ethtools_get_coalesce(struct net_device *ndev,
-                                      struct ethtool_coalesce *ecoalesce)
+static int
+nixge_ethtools_get_coalesce(struct net_device *ndev,
+                           struct ethtool_coalesce *ecoalesce,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct nixge_priv *priv = netdev_priv(ndev);
        u32 regval = 0;
@@ -1008,8 +1011,11 @@ static int nixge_ethtools_get_coalesce(struct net_device *ndev,
        return 0;
 }
 
-static int nixge_ethtools_set_coalesce(struct net_device *ndev,
-                                      struct ethtool_coalesce *ecoalesce)
+static int
+nixge_ethtools_set_coalesce(struct net_device *ndev,
+                           struct ethtool_coalesce *ecoalesce,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct nixge_priv *priv = netdev_priv(ndev);
 
@@ -1223,7 +1229,6 @@ static int nixge_of_get_resources(struct platform_device *pdev)
 {
        const struct of_device_id *of_id;
        enum nixge_version version;
-       struct resource *ctrlres;
        struct net_device *ndev;
        struct nixge_priv *priv;
 
@@ -1242,13 +1247,10 @@ static int nixge_of_get_resources(struct platform_device *pdev)
                netdev_err(ndev, "failed to map dma regs\n");
                return PTR_ERR(priv->dma_regs);
        }
-       if (version <= NIXGE_V2) {
+       if (version <= NIXGE_V2)
                priv->ctrl_regs = priv->dma_regs + NIXGE_REG_CTRL_OFFSET;
-       } else {
-               ctrlres = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-                                                      "ctrl");
-               priv->ctrl_regs = devm_ioremap_resource(&pdev->dev, ctrlres);
-       }
+       else
+               priv->ctrl_regs = devm_platform_ioremap_resource_byname(pdev, "ctrl");
        if (IS_ERR(priv->ctrl_regs)) {
                netdev_err(ndev, "failed to map ctrl regs\n");
                return PTR_ERR(priv->ctrl_regs);
index 8724d6a..ef3fb4c 100644 (file)
@@ -5782,15 +5782,11 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
                np->desc_ver = DESC_VER_3;
                np->txrxctl_bits = NVREG_TXRXCTL_DESC_3;
                if (dma_64bit) {
-                       if (pci_set_dma_mask(pci_dev, DMA_BIT_MASK(39)))
+                       if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(39)))
                                dev_info(&pci_dev->dev,
                                         "64-bit DMA failed, using 32-bit addressing\n");
                        else
                                dev->features |= NETIF_F_HIGHDMA;
-                       if (pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(39))) {
-                               dev_info(&pci_dev->dev,
-                                        "64-bit DMA (consistent) failed, using 32-bit ring buffers\n");
-                       }
                }
        } else if (id->driver_data & DEV_HAS_LARGEDESC) {
                /* packet format 2: supports jumbo frames */
index 64c6842..d29fe56 100644 (file)
@@ -1219,7 +1219,7 @@ static const struct net_device_ops lpc_netdev_ops = {
        .ndo_stop               = lpc_eth_close,
        .ndo_start_xmit         = lpc_eth_hard_start_xmit,
        .ndo_set_rx_mode        = lpc_eth_set_multicast_list,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_set_mac_address    = lpc_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
 };
index af84f72..4e18b64 100644 (file)
@@ -6,6 +6,7 @@
 config PCH_GBE
        tristate "OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE"
        depends on PCI && (X86_32 || COMPILE_TEST)
+       depends on PTP_1588_CLOCK
        select MII
        select PTP_1588_CLOCK_PCH
        select NET_PTP_CLASSIFY
index e351f3d..ec3e558 100644 (file)
@@ -1031,13 +1031,7 @@ static void pch_gbe_watchdog(struct timer_list *t)
                struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
                netdev->tx_queue_len = adapter->tx_queue_len;
                /* mii library handles link maintenance tasks */
-               if (mii_ethtool_gset(&adapter->mii, &cmd)) {
-                       netdev_err(netdev, "ethtool get setting Error\n");
-                       mod_timer(&adapter->watchdog_timer,
-                                 round_jiffies(jiffies +
-                                               PCH_GBE_WATCHDOG_PERIOD));
-                       return;
-               }
+               mii_ethtool_gset(&adapter->mii, &cmd);
                hw->mac.link_speed = ethtool_cmd_speed(&cmd);
                hw->mac.link_duplex = cmd.duplex;
                /* Set the RGMII control. */
@@ -2333,7 +2327,7 @@ static const struct net_device_ops pch_gbe_netdev_ops = {
        .ndo_tx_timeout = pch_gbe_tx_timeout,
        .ndo_change_mtu = pch_gbe_change_mtu,
        .ndo_set_features = pch_gbe_set_features,
-       .ndo_do_ioctl = pch_gbe_ioctl,
+       .ndo_eth_ioctl = pch_gbe_ioctl,
        .ndo_set_rx_mode = pch_gbe_set_multi,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller = pch_gbe_netpoll,
index ed83204..3426f6f 100644 (file)
@@ -301,9 +301,7 @@ void pch_gbe_phy_init_setting(struct pch_gbe_hw *hw)
        int ret;
        u16 mii_reg;
 
-       ret = mii_ethtool_gset(&adapter->mii, &cmd);
-       if (ret)
-               netdev_err(adapter->netdev, "Error: mii_ethtool_gset\n");
+       mii_ethtool_gset(&adapter->mii, &cmd);
 
        ethtool_cmd_speed_set(&cmd, hw->mac.link_speed);
        cmd.duplex = hw->mac.link_duplex;
index d058a63..1a6336a 100644 (file)
@@ -546,7 +546,9 @@ static int read_eeprom(void __iomem *ioaddr, int location);
 static int mdio_read(struct net_device *dev, int phy_id, int location);
 static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
 static int hamachi_open(struct net_device *dev);
-static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int hamachi_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int hamachi_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                                 void __user *data, int cmd);
 static void hamachi_timer(struct timer_list *t);
 static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue);
 static void hamachi_init_ring(struct net_device *dev);
@@ -571,7 +573,8 @@ static const struct net_device_ops hamachi_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_tx_timeout         = hamachi_tx_timeout,
-       .ndo_do_ioctl           = netdev_ioctl,
+       .ndo_eth_ioctl          = hamachi_ioctl,
+       .ndo_siocdevprivate     = hamachi_siocdevprivate,
 };
 
 
@@ -1867,7 +1870,36 @@ static const struct ethtool_ops ethtool_ops_no_mii = {
        .get_drvinfo = hamachi_get_drvinfo,
 };
 
-static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+/* private ioctl: set rx,tx intr params */
+static int hamachi_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                                 void __user *data, int cmd)
+{
+       struct hamachi_private *np = netdev_priv(dev);
+       u32 *d = (u32 *)&rq->ifr_ifru;
+
+       if (!netif_running(dev))
+               return -EINVAL;
+
+       if (cmd != SIOCDEVPRIVATE + 3)
+               return -EOPNOTSUPP;
+
+       /* Should add this check here or an ordinary user can do nasty
+        * things. -KDU
+        *
+        * TODO: Shut down the Rx and Tx engines while doing this.
+        */
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+       writel(d[0], np->base + TxIntrCtrl);
+       writel(d[1], np->base + RxIntrCtrl);
+       printk(KERN_NOTICE "%s: tx %08x, rx %08x intr\n", dev->name,
+              (u32)readl(np->base + TxIntrCtrl),
+              (u32)readl(np->base + RxIntrCtrl));
+
+       return 0;
+}
+
+static int hamachi_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
        struct hamachi_private *np = netdev_priv(dev);
        struct mii_ioctl_data *data = if_mii(rq);
@@ -1876,28 +1908,9 @@ static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
        if (!netif_running(dev))
                return -EINVAL;
 
-       if (cmd == (SIOCDEVPRIVATE+3)) { /* set rx,tx intr params */
-               u32 *d = (u32 *)&rq->ifr_ifru;
-               /* Should add this check here or an ordinary user can do nasty
-                * things. -KDU
-                *
-                * TODO: Shut down the Rx and Tx engines while doing this.
-                */
-               if (!capable(CAP_NET_ADMIN))
-                       return -EPERM;
-               writel(d[0], np->base + TxIntrCtrl);
-               writel(d[1], np->base + RxIntrCtrl);
-               printk(KERN_NOTICE "%s: tx %08x, rx %08x intr\n", dev->name,
-                 (u32) readl(np->base + TxIntrCtrl),
-                 (u32) readl(np->base + RxIntrCtrl));
-               rc = 0;
-       }
-
-       else {
-               spin_lock_irq(&np->lock);
-               rc = generic_mii_ioctl(&np->mii_if, data, cmd, NULL);
-               spin_unlock_irq(&np->lock);
-       }
+       spin_lock_irq(&np->lock);
+       rc = generic_mii_ioctl(&np->mii_if, data, cmd, NULL);
+       spin_unlock_irq(&np->lock);
 
        return rc;
 }
index d1dd9bc..f5cd8f5 100644 (file)
@@ -362,7 +362,7 @@ static const struct net_device_ops netdev_ops = {
        .ndo_set_rx_mode        = set_rx_mode,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_do_ioctl           = netdev_ioctl,
+       .ndo_eth_ioctl          = netdev_ioctl,
        .ndo_tx_timeout         = yellowfin_tx_timeout,
 };
 
index 040a15a..7e096b2 100644 (file)
@@ -247,12 +247,13 @@ static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac,
        int f;
        struct pci_dev *pdev = mac->dma_pdev;
 
-       pci_unmap_single(pdev, dmas[0], skb_headlen(skb), PCI_DMA_TODEVICE);
+       dma_unmap_single(&pdev->dev, dmas[0], skb_headlen(skb), DMA_TO_DEVICE);
 
        for (f = 0; f < nfrags; f++) {
                const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
 
-               pci_unmap_page(pdev, dmas[f+1], skb_frag_size(frag), PCI_DMA_TODEVICE);
+               dma_unmap_page(&pdev->dev, dmas[f + 1], skb_frag_size(frag),
+                              DMA_TO_DEVICE);
        }
        dev_kfree_skb_irq(skb);
 
@@ -548,10 +549,8 @@ static void pasemi_mac_free_rx_buffers(struct pasemi_mac *mac)
        for (i = 0; i < RX_RING_SIZE; i++) {
                info = &RX_DESC_INFO(rx, i);
                if (info->skb && info->dma) {
-                       pci_unmap_single(mac->dma_pdev,
-                                        info->dma,
-                                        info->skb->len,
-                                        PCI_DMA_FROMDEVICE);
+                       dma_unmap_single(&mac->dma_pdev->dev, info->dma,
+                                        info->skb->len, DMA_FROM_DEVICE);
                        dev_kfree_skb_any(info->skb);
                }
                info->dma = 0;
@@ -600,11 +599,11 @@ static void pasemi_mac_replenish_rx_ring(struct net_device *dev,
                if (unlikely(!skb))
                        break;
 
-               dma = pci_map_single(mac->dma_pdev, skb->data,
+               dma = dma_map_single(&mac->dma_pdev->dev, skb->data,
                                     mac->bufsz - LOCAL_SKB_ALIGN,
-                                    PCI_DMA_FROMDEVICE);
+                                    DMA_FROM_DEVICE);
 
-               if (unlikely(pci_dma_mapping_error(mac->dma_pdev, dma))) {
+               if (dma_mapping_error(&mac->dma_pdev->dev, dma)) {
                        dev_kfree_skb_irq(info->skb);
                        break;
                }
@@ -741,8 +740,9 @@ static int pasemi_mac_clean_rx(struct pasemi_mac_rxring *rx,
 
                len = (macrx & XCT_MACRX_LLEN_M) >> XCT_MACRX_LLEN_S;
 
-               pci_unmap_single(pdev, dma, mac->bufsz - LOCAL_SKB_ALIGN,
-                                PCI_DMA_FROMDEVICE);
+               dma_unmap_single(&pdev->dev, dma,
+                                mac->bufsz - LOCAL_SKB_ALIGN,
+                                DMA_FROM_DEVICE);
 
                if (macrx & XCT_MACRX_CRC) {
                        /* CRC error flagged */
@@ -1444,10 +1444,10 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 
        nfrags = skb_shinfo(skb)->nr_frags;
 
-       map[0] = pci_map_single(mac->dma_pdev, skb->data, skb_headlen(skb),
-                               PCI_DMA_TODEVICE);
+       map[0] = dma_map_single(&mac->dma_pdev->dev, skb->data,
+                               skb_headlen(skb), DMA_TO_DEVICE);
        map_size[0] = skb_headlen(skb);
-       if (pci_dma_mapping_error(mac->dma_pdev, map[0]))
+       if (dma_mapping_error(&mac->dma_pdev->dev, map[0]))
                goto out_err_nolock;
 
        for (i = 0; i < nfrags; i++) {
@@ -1534,8 +1534,8 @@ out_err:
        spin_unlock_irqrestore(&txring->lock, flags);
 out_err_nolock:
        while (nfrags--)
-               pci_unmap_single(mac->dma_pdev, map[nfrags], map_size[nfrags],
-                                PCI_DMA_TODEVICE);
+               dma_unmap_single(&mac->dma_pdev->dev, map[nfrags],
+                                map_size[nfrags], DMA_TO_DEVICE);
 
        return NETDEV_TX_BUSY;
 }
index 202973a..3f7519e 100644 (file)
@@ -20,7 +20,7 @@ if NET_VENDOR_PENSANDO
 config IONIC
        tristate "Pensando Ethernet IONIC Support"
        depends on 64BIT && PCI
-       depends on PTP_1588_CLOCK || !PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK_OPTIONAL
        select NET_DEVLINK
        select DIMLIB
        help
index e4a5416..7e296fa 100644 (file)
@@ -165,10 +165,10 @@ static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
                        goto out;
                }
 
+               ionic->num_vfs++;
                /* ignore failures from older FW, we just won't get stats */
                (void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR,
                                          (u8 *)&v->stats_pa);
-               ionic->num_vfs++;
        }
 
 out:
@@ -373,9 +373,6 @@ static void ionic_remove(struct pci_dev *pdev)
 {
        struct ionic *ionic = pci_get_drvdata(pdev);
 
-       if (!ionic)
-               return;
-
        del_timer_sync(&ionic->watchdog_timer);
 
        if (ionic->lif) {
index 1dfe962..0d6858a 100644 (file)
@@ -15,6 +15,7 @@ static void ionic_watchdog_cb(struct timer_list *t)
 {
        struct ionic *ionic = from_timer(ionic, t, watchdog_timer);
        struct ionic_lif *lif = ionic->lif;
+       struct ionic_deferred_work *work;
        int hb;
 
        mod_timer(&ionic->watchdog_timer,
@@ -31,6 +32,18 @@ static void ionic_watchdog_cb(struct timer_list *t)
        if (hb >= 0 &&
            !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
                ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
+
+       if (test_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state)) {
+               work = kzalloc(sizeof(*work), GFP_ATOMIC);
+               if (!work) {
+                       netdev_err(lif->netdev, "rxmode change dropped\n");
+                       return;
+               }
+
+               work->type = IONIC_DW_TYPE_RX_MODE;
+               netdev_dbg(lif->netdev, "deferred: rx_mode\n");
+               ionic_lif_deferred_enqueue(&lif->deferred, work);
+       }
 }
 
 void ionic_init_devinfo(struct ionic *ionic)
@@ -106,6 +119,8 @@ int ionic_dev_setup(struct ionic *ionic)
        idev->last_fw_hb = 0;
        idev->fw_hb_ready = true;
        idev->fw_status_ready = true;
+       idev->fw_generation = IONIC_FW_STS_F_GENERATION &
+                             ioread8(&idev->dev_info_regs->fw_status);
 
        mod_timer(&ionic->watchdog_timer,
                  round_jiffies(jiffies + ionic->watchdog_period));
@@ -121,7 +136,9 @@ int ionic_heartbeat_check(struct ionic *ionic)
 {
        struct ionic_dev *idev = &ionic->idev;
        unsigned long check_time, last_check_time;
-       bool fw_status_ready, fw_hb_ready;
+       bool fw_status_ready = true;
+       bool fw_hb_ready;
+       u8 fw_generation;
        u8 fw_status;
        u32 fw_hb;
 
@@ -140,9 +157,29 @@ do_check_time:
 
        /* firmware is useful only if the running bit is set and
         * fw_status != 0xff (bad PCI read)
+        * If fw_status is not ready don't bother with the generation.
         */
        fw_status = ioread8(&idev->dev_info_regs->fw_status);
-       fw_status_ready = (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
+
+       if (fw_status == 0xff || !(fw_status & IONIC_FW_STS_F_RUNNING)) {
+               fw_status_ready = false;
+       } else {
+               fw_generation = fw_status & IONIC_FW_STS_F_GENERATION;
+               if (idev->fw_generation != fw_generation) {
+                       dev_info(ionic->dev, "FW generation 0x%02x -> 0x%02x\n",
+                                idev->fw_generation, fw_generation);
+
+                       idev->fw_generation = fw_generation;
+
+                       /* If the generation changed, the fw status is not
+                        * ready so we need to trigger a fw-down cycle.  After
+                        * the down, the next watchdog will see the fw is up
+                        * and the generation value stable, so will trigger
+                        * the fw-up activity.
+                        */
+                       fw_status_ready = false;
+               }
+       }
 
        /* is this a transition? */
        if (fw_status_ready != idev->fw_status_ready) {
index c25cf9b..8311086 100644 (file)
@@ -143,6 +143,7 @@ struct ionic_dev {
        u32 last_fw_hb;
        bool fw_hb_ready;
        bool fw_status_ready;
+       u8 fw_generation;
 
        u64 __iomem *db_pages;
        dma_addr_t phy_db_pages;
@@ -160,8 +161,6 @@ struct ionic_dev {
 struct ionic_cq_info {
        union {
                void *cq_desc;
-               struct ionic_txq_comp *txcq;
-               struct ionic_rxq_comp *rxcq;
                struct ionic_admin_comp *admincq;
                struct ionic_notifyq_event *notifyq;
        };
index b41301a..c7d0e19 100644 (file)
@@ -64,7 +64,7 @@ struct ionic *ionic_devlink_alloc(struct device *dev)
 {
        struct devlink *dl;
 
-       dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic));
+       dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic), dev);
 
        return devlink_priv(dl);
 }
@@ -82,7 +82,7 @@ int ionic_devlink_register(struct ionic *ionic)
        struct devlink_port_attrs attrs = {};
        int err;
 
-       err = devlink_register(dl, ionic->dev);
+       err = devlink_register(dl);
        if (err) {
                dev_warn(ionic->dev, "devlink_register failed: %d\n", err);
                return err;
@@ -91,20 +91,20 @@ int ionic_devlink_register(struct ionic *ionic)
        attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
        devlink_port_attrs_set(&ionic->dl_port, &attrs);
        err = devlink_port_register(dl, &ionic->dl_port, 0);
-       if (err)
+       if (err) {
                dev_err(ionic->dev, "devlink_port_register failed: %d\n", err);
-       else
-               devlink_port_type_eth_set(&ionic->dl_port,
-                                         ionic->lif->netdev);
+               devlink_unregister(dl);
+               return err;
+       }
 
-       return err;
+       devlink_port_type_eth_set(&ionic->dl_port, ionic->lif->netdev);
+       return 0;
 }
 
 void ionic_devlink_unregister(struct ionic *ionic)
 {
        struct devlink *dl = priv_to_devlink(ionic);
 
-       if (ionic->dl_port.registered)
-               devlink_port_unregister(&ionic->dl_port);
+       devlink_port_unregister(&ionic->dl_port);
        devlink_unregister(dl);
 }
index 6583be5..e91b487 100644 (file)
@@ -32,6 +32,9 @@ static void ionic_get_stats(struct net_device *netdev,
        struct ionic_lif *lif = netdev_priv(netdev);
        u32 i;
 
+       if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+               return;
+
        memset(buf, 0, stats->n_stats * sizeof(*buf));
        for (i = 0; i < ionic_num_stats_grps; i++)
                ionic_stats_groups[i].get_values(lif, &buf);
@@ -274,6 +277,9 @@ static int ionic_set_link_ksettings(struct net_device *netdev,
        struct ionic *ionic = lif->ionic;
        int err = 0;
 
+       if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+               return -EBUSY;
+
        /* set autoneg */
        if (ks->base.autoneg != idev->port_info->config.an_enable) {
                mutex_lock(&ionic->dev_cmd_lock);
@@ -320,6 +326,9 @@ static int ionic_set_pauseparam(struct net_device *netdev,
        u32 requested_pause;
        int err;
 
+       if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+               return -EBUSY;
+
        if (pause->autoneg)
                return -EOPNOTSUPP;
 
@@ -372,6 +381,9 @@ static int ionic_set_fecparam(struct net_device *netdev,
        u8 fec_type;
        int ret = 0;
 
+       if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+               return -EBUSY;
+
        if (lif->ionic->idev.port_info->config.an_enable) {
                netdev_err(netdev, "FEC request not allowed while autoneg is enabled\n");
                return -EINVAL;
@@ -408,7 +420,9 @@ static int ionic_set_fecparam(struct net_device *netdev,
 }
 
 static int ionic_get_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *coalesce)
+                             struct ethtool_coalesce *coalesce,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct ionic_lif *lif = netdev_priv(netdev);
 
@@ -426,7 +440,9 @@ static int ionic_get_coalesce(struct net_device *netdev,
 }
 
 static int ionic_set_coalesce(struct net_device *netdev,
-                             struct ethtool_coalesce *coalesce)
+                             struct ethtool_coalesce *coalesce,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic_identity *ident;
@@ -528,6 +544,9 @@ static int ionic_set_ringparam(struct net_device *netdev,
        struct ionic_queue_params qparam;
        int err;
 
+       if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+               return -EBUSY;
+
        ionic_init_queue_params(lif, &qparam);
 
        if (ring->rx_mini_pending || ring->rx_jumbo_pending) {
@@ -597,6 +616,9 @@ static int ionic_set_channels(struct net_device *netdev,
        int max_cnt;
        int err;
 
+       if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+               return -EBUSY;
+
        ionic_init_queue_params(lif, &qparam);
 
        if (ch->rx_count != ch->tx_count) {
@@ -947,6 +969,9 @@ static int ionic_nway_reset(struct net_device *netdev)
        struct ionic *ionic = lif->ionic;
        int err = 0;
 
+       if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+               return -EBUSY;
+
        /* flap the link to force auto-negotiation */
 
        mutex_lock(&ionic->dev_cmd_lock);
index 0478b48..278610e 100644 (file)
@@ -2936,6 +2936,8 @@ struct ionic_hwstamp_regs {
  * @asic_type:       Asic type
  * @asic_rev:        Asic revision
  * @fw_status:       Firmware status
+ *                     bit 0   - 1 = fw running
+ *                     bit 4-7 - 4 bit generation number, changes on fw restart
  * @fw_heartbeat:    Firmware heartbeat counter
  * @serial_num:      Serial number
  * @fw_version:      Firmware version
@@ -2949,7 +2951,8 @@ union ionic_dev_info_regs {
                u8     version;
                u8     asic_type;
                u8     asic_rev;
-#define IONIC_FW_STS_F_RUNNING 0x1
+#define IONIC_FW_STS_F_RUNNING         0x01
+#define IONIC_FW_STS_F_GENERATION      0xF0
                u8     fw_status;
                u32    fw_heartbeat;
                char   fw_version[IONIC_DEVINFO_FWVERS_BUFLEN];
index e795fa6..23c9e19 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/cpumask.h>
+#include <linux/crash_dump.h>
 
 #include "ionic.h"
 #include "ionic_bus.h"
@@ -29,9 +30,6 @@ static const u8 ionic_qtype_versions[IONIC_QTYPE_MAX] = {
                                      */
 };
 
-static void ionic_lif_rx_mode(struct ionic_lif *lif);
-static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
-static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
 static void ionic_link_status_check(struct ionic_lif *lif);
 static void ionic_lif_handle_fw_down(struct ionic_lif *lif);
 static void ionic_lif_handle_fw_up(struct ionic_lif *lif);
@@ -91,20 +89,21 @@ static void ionic_lif_deferred_work(struct work_struct *work)
                case IONIC_DW_TYPE_RX_MODE:
                        ionic_lif_rx_mode(lif);
                        break;
-               case IONIC_DW_TYPE_RX_ADDR_ADD:
-                       ionic_lif_addr_add(lif, w->addr);
-                       break;
-               case IONIC_DW_TYPE_RX_ADDR_DEL:
-                       ionic_lif_addr_del(lif, w->addr);
-                       break;
                case IONIC_DW_TYPE_LINK_STATUS:
                        ionic_link_status_check(lif);
                        break;
                case IONIC_DW_TYPE_LIF_RESET:
-                       if (w->fw_status)
+                       if (w->fw_status) {
                                ionic_lif_handle_fw_up(lif);
-                       else
+                       } else {
                                ionic_lif_handle_fw_down(lif);
+
+                               /* Fire off another watchdog to see
+                                * if the FW is already back rather than
+                                * waiting another whole cycle
+                                */
+                               mod_timer(&lif->ionic->watchdog_timer, jiffies + 1);
+                       }
                        break;
                default:
                        break;
@@ -850,10 +849,8 @@ int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif)
        u64 features;
        int err;
 
-       mutex_lock(&lif->queue_lock);
-
        if (lif->hwstamp_txq)
-               goto out;
+               return 0;
 
        features = IONIC_Q_F_2X_CQ_DESC | IONIC_TXQ_F_HWSTAMP;
 
@@ -895,9 +892,6 @@ int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif)
                }
        }
 
-out:
-       mutex_unlock(&lif->queue_lock);
-
        return 0;
 
 err_qcq_enable:
@@ -908,7 +902,6 @@ err_qcq_init:
        ionic_qcq_free(lif, txq);
        devm_kfree(lif->ionic->dev, txq);
 err_qcq_alloc:
-       mutex_unlock(&lif->queue_lock);
        return err;
 }
 
@@ -920,10 +913,8 @@ int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif)
        u64 features;
        int err;
 
-       mutex_lock(&lif->queue_lock);
-
        if (lif->hwstamp_rxq)
-               goto out;
+               return 0;
 
        features = IONIC_Q_F_2X_CQ_DESC | IONIC_RXQ_F_HWSTAMP;
 
@@ -961,9 +952,6 @@ int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif)
                }
        }
 
-out:
-       mutex_unlock(&lif->queue_lock);
-
        return 0;
 
 err_qcq_enable:
@@ -974,7 +962,6 @@ err_qcq_init:
        ionic_qcq_free(lif, rxq);
        devm_kfree(lif->ionic->dev, rxq);
 err_qcq_alloc:
-       mutex_unlock(&lif->queue_lock);
        return err;
 }
 
@@ -1077,7 +1064,11 @@ static int ionic_lif_add_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class)
        if (err && err != -EEXIST)
                return err;
 
-       return ionic_rx_filter_save(lif, 0, qid, 0, &ctx);
+       spin_lock_bh(&lif->rx_filters.lock);
+       err = ionic_rx_filter_save(lif, 0, qid, 0, &ctx, IONIC_FILTER_STATE_SYNCED);
+       spin_unlock_bh(&lif->rx_filters.lock);
+
+       return err;
 }
 
 int ionic_lif_set_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class)
@@ -1250,7 +1241,7 @@ void ionic_get_stats64(struct net_device *netdev,
        ns->tx_errors = ns->tx_aborted_errors;
 }
 
-static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr)
+int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr)
 {
        struct ionic_admin_ctx ctx = {
                .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
@@ -1260,27 +1251,83 @@ static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr)
                        .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC),
                },
        };
+       int nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
+       bool mc = is_multicast_ether_addr(addr);
        struct ionic_rx_filter *f;
-       int err;
+       int err = 0;
+
+       memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN);
 
-       /* don't bother if we already have it */
        spin_lock_bh(&lif->rx_filters.lock);
        f = ionic_rx_filter_by_addr(lif, addr);
+       if (f) {
+               /* don't bother if we already have it and it is sync'd */
+               if (f->state == IONIC_FILTER_STATE_SYNCED) {
+                       spin_unlock_bh(&lif->rx_filters.lock);
+                       return 0;
+               }
+
+               /* mark preemptively as sync'd to block any parallel attempts */
+               f->state = IONIC_FILTER_STATE_SYNCED;
+       } else {
+               /* save as SYNCED to catch any DEL requests while processing */
+               err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx,
+                                          IONIC_FILTER_STATE_SYNCED);
+       }
        spin_unlock_bh(&lif->rx_filters.lock);
-       if (f)
-               return 0;
+       if (err)
+               return err;
 
        netdev_dbg(lif->netdev, "rx_filter add ADDR %pM\n", addr);
 
-       memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN);
-       err = ionic_adminq_post_wait(lif, &ctx);
-       if (err && err != -EEXIST)
-               return err;
+       /* Don't bother with the write to FW if we know there's no room,
+        * we can try again on the next sync attempt.
+        */
+       if ((lif->nucast + lif->nmcast) >= nfilters)
+               err = -ENOSPC;
+       else
+               err = ionic_adminq_post_wait(lif, &ctx);
+
+       spin_lock_bh(&lif->rx_filters.lock);
+       if (err && err != -EEXIST) {
+               /* set the state back to NEW so we can try again later */
+               f = ionic_rx_filter_by_addr(lif, addr);
+               if (f && f->state == IONIC_FILTER_STATE_SYNCED)
+                       f->state = IONIC_FILTER_STATE_NEW;
+
+               spin_unlock_bh(&lif->rx_filters.lock);
+
+               if (err == -ENOSPC)
+                       return 0;
+               else
+                       return err;
+       }
 
-       return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx);
+       if (mc)
+               lif->nmcast++;
+       else
+               lif->nucast++;
+
+       f = ionic_rx_filter_by_addr(lif, addr);
+       if (f && f->state == IONIC_FILTER_STATE_OLD) {
+               /* Someone requested a delete while we were adding
+                * so update the filter info with the results from the add
+                * and the data will be there for the delete on the next
+                * sync cycle.
+                */
+               err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx,
+                                          IONIC_FILTER_STATE_OLD);
+       } else {
+               err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx,
+                                          IONIC_FILTER_STATE_SYNCED);
+       }
+
+       spin_unlock_bh(&lif->rx_filters.lock);
+
+       return err;
 }
 
-static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
+int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
 {
        struct ionic_admin_ctx ctx = {
                .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
@@ -1290,6 +1337,7 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
                },
        };
        struct ionic_rx_filter *f;
+       int state;
        int err;
 
        spin_lock_bh(&lif->rx_filters.lock);
@@ -1302,65 +1350,37 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
        netdev_dbg(lif->netdev, "rx_filter del ADDR %pM (id %d)\n",
                   addr, f->filter_id);
 
+       state = f->state;
        ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(f->filter_id);
        ionic_rx_filter_free(lif, f);
-       spin_unlock_bh(&lif->rx_filters.lock);
-
-       err = ionic_adminq_post_wait(lif, &ctx);
-       if (err && err != -EEXIST)
-               return err;
 
-       return 0;
-}
+       if (is_multicast_ether_addr(addr) && lif->nmcast)
+               lif->nmcast--;
+       else if (!is_multicast_ether_addr(addr) && lif->nucast)
+               lif->nucast--;
 
-static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
-{
-       unsigned int nmfilters;
-       unsigned int nufilters;
+       spin_unlock_bh(&lif->rx_filters.lock);
 
-       if (add) {
-               /* Do we have space for this filter?  We test the counters
-                * here before checking the need for deferral so that we
-                * can return an overflow error to the stack.
-                */
-               nmfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters);
-               nufilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
-
-               if ((is_multicast_ether_addr(addr) && lif->nmcast < nmfilters))
-                       lif->nmcast++;
-               else if (!is_multicast_ether_addr(addr) &&
-                        lif->nucast < nufilters)
-                       lif->nucast++;
-               else
-                       return -ENOSPC;
-       } else {
-               if (is_multicast_ether_addr(addr) && lif->nmcast)
-                       lif->nmcast--;
-               else if (!is_multicast_ether_addr(addr) && lif->nucast)
-                       lif->nucast--;
+       if (state != IONIC_FILTER_STATE_NEW) {
+               err = ionic_adminq_post_wait(lif, &ctx);
+               if (err && err != -EEXIST)
+                       return err;
        }
 
-       netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
-                  add ? "add" : "del", addr);
-       if (add)
-               return ionic_lif_addr_add(lif, addr);
-       else
-               return ionic_lif_addr_del(lif, addr);
-
        return 0;
 }
 
 static int ionic_addr_add(struct net_device *netdev, const u8 *addr)
 {
-       return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR);
+       return ionic_lif_list_addr(netdev_priv(netdev), addr, ADD_ADDR);
 }
 
 static int ionic_addr_del(struct net_device *netdev, const u8 *addr)
 {
-       return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR);
+       return ionic_lif_list_addr(netdev_priv(netdev), addr, DEL_ADDR);
 }
 
-static void ionic_lif_rx_mode(struct ionic_lif *lif)
+void ionic_lif_rx_mode(struct ionic_lif *lif)
 {
        struct net_device *netdev = lif->netdev;
        unsigned int nfilters;
@@ -1381,32 +1401,26 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif)
        rx_mode |= (nd_flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
        rx_mode |= (nd_flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
 
-       /* sync unicast addresses
-        * next check to see if we're in an overflow state
+       /* sync the mac filters */
+       ionic_rx_filter_sync(lif);
+
+       /* check for overflow state
         *    if so, we track that we overflowed and enable NIC PROMISC
         *    else if the overflow is set and not needed
         *       we remove our overflow flag and check the netdev flags
         *       to see if we can disable NIC PROMISC
         */
-       __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
        nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
-       if (netdev_uc_count(netdev) + 1 > nfilters) {
+       if ((lif->nucast + lif->nmcast) >= nfilters) {
                rx_mode |= IONIC_RX_MODE_F_PROMISC;
+               rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
                lif->uc_overflow = true;
+               lif->mc_overflow = true;
        } else if (lif->uc_overflow) {
                lif->uc_overflow = false;
+               lif->mc_overflow = false;
                if (!(nd_flags & IFF_PROMISC))
                        rx_mode &= ~IONIC_RX_MODE_F_PROMISC;
-       }
-
-       /* same for multicast */
-       __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
-       nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters);
-       if (netdev_mc_count(netdev) > nfilters) {
-               rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
-               lif->mc_overflow = true;
-       } else if (lif->mc_overflow) {
-               lif->mc_overflow = false;
                if (!(nd_flags & IFF_ALLMULTI))
                        rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI;
        }
@@ -1449,28 +1463,26 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif)
        mutex_unlock(&lif->config_lock);
 }
 
-static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep)
+static void ionic_ndo_set_rx_mode(struct net_device *netdev)
 {
        struct ionic_lif *lif = netdev_priv(netdev);
        struct ionic_deferred_work *work;
 
-       if (!can_sleep) {
-               work = kzalloc(sizeof(*work), GFP_ATOMIC);
-               if (!work) {
-                       netdev_err(lif->netdev, "rxmode change dropped\n");
-                       return;
-               }
-               work->type = IONIC_DW_TYPE_RX_MODE;
-               netdev_dbg(lif->netdev, "deferred: rx_mode\n");
-               ionic_lif_deferred_enqueue(&lif->deferred, work);
-       } else {
-               ionic_lif_rx_mode(lif);
-       }
-}
+       /* Sync the kernel filter list with the driver filter list */
+       __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
+       __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
 
-static void ionic_ndo_set_rx_mode(struct net_device *netdev)
-{
-       ionic_set_rx_mode(netdev, CAN_NOT_SLEEP);
+       /* Shove off the rest of the rxmode work to the work task
+        * which will include syncing the filters to the firmware.
+        */
+       work = kzalloc(sizeof(*work), GFP_ATOMIC);
+       if (!work) {
+               netdev_err(lif->netdev, "rxmode change dropped\n");
+               return;
+       }
+       work->type = IONIC_DW_TYPE_RX_MODE;
+       netdev_dbg(lif->netdev, "deferred: rx_mode\n");
+       ionic_lif_deferred_enqueue(&lif->deferred, work);
 }
 
 static __le64 ionic_netdev_features_to_nic(netdev_features_t features)
@@ -1599,7 +1611,6 @@ static int ionic_init_nic_features(struct ionic_lif *lif)
        features = NETIF_F_HW_VLAN_CTAG_TX |
                   NETIF_F_HW_VLAN_CTAG_RX |
                   NETIF_F_HW_VLAN_CTAG_FILTER |
-                  NETIF_F_RXHASH |
                   NETIF_F_SG |
                   NETIF_F_HW_CSUM |
                   NETIF_F_RXCSUM |
@@ -1607,6 +1618,9 @@ static int ionic_init_nic_features(struct ionic_lif *lif)
                   NETIF_F_TSO6 |
                   NETIF_F_TSO_ECN;
 
+       if (lif->nxqs > 1)
+               features |= NETIF_F_RXHASH;
+
        err = ionic_set_nic_features(lif, features);
        if (err)
                return err;
@@ -1689,13 +1703,13 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
        if (!is_zero_ether_addr(netdev->dev_addr)) {
                netdev_info(netdev, "deleting mac addr %pM\n",
                            netdev->dev_addr);
-               ionic_addr_del(netdev, netdev->dev_addr);
+               ionic_lif_addr_del(netdev_priv(netdev), netdev->dev_addr);
        }
 
        eth_commit_mac_addr_change(netdev, addr);
        netdev_info(netdev, "updating mac addr %pM\n", mac);
 
-       return ionic_addr_add(netdev, mac);
+       return ionic_lif_addr_add(netdev_priv(netdev), mac);
 }
 
 static void ionic_stop_queues_reconfig(struct ionic_lif *lif)
@@ -1801,7 +1815,12 @@ static int ionic_vlan_rx_add_vid(struct net_device *netdev, __be16 proto,
        if (err)
                return err;
 
-       return ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx);
+       spin_lock_bh(&lif->rx_filters.lock);
+       err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx,
+                                  IONIC_FILTER_STATE_SYNCED);
+       spin_unlock_bh(&lif->rx_filters.lock);
+
+       return err;
 }
 
 static int ionic_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto,
@@ -2104,7 +2123,7 @@ static int ionic_txrx_init(struct ionic_lif *lif)
        if (lif->netdev->features & NETIF_F_RXHASH)
                ionic_lif_rss_init(lif);
 
-       ionic_set_rx_mode(lif->netdev, CAN_SLEEP);
+       ionic_lif_rx_mode(lif);
 
        return 0;
 
@@ -2202,9 +2221,11 @@ static int ionic_open(struct net_device *netdev)
        if (test_and_clear_bit(IONIC_LIF_F_BROKEN, lif->state))
                netdev_info(netdev, "clearing broken state\n");
 
+       mutex_lock(&lif->queue_lock);
+
        err = ionic_txrx_alloc(lif);
        if (err)
-               return err;
+               goto err_unlock;
 
        err = ionic_txrx_init(lif);
        if (err)
@@ -2225,12 +2246,21 @@ static int ionic_open(struct net_device *netdev)
                        goto err_txrx_deinit;
        }
 
+       /* If hardware timestamping is enabled, but the queues were freed by
+        * ionic_stop, those need to be reallocated and initialized, too.
+        */
+       ionic_lif_hwstamp_recreate_queues(lif);
+
+       mutex_unlock(&lif->queue_lock);
+
        return 0;
 
 err_txrx_deinit:
        ionic_txrx_deinit(lif);
 err_txrx_free:
        ionic_txrx_free(lif);
+err_unlock:
+       mutex_unlock(&lif->queue_lock);
        return err;
 }
 
@@ -2250,14 +2280,16 @@ static int ionic_stop(struct net_device *netdev)
        if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
                return 0;
 
+       mutex_lock(&lif->queue_lock);
        ionic_stop_queues(lif);
        ionic_txrx_deinit(lif);
        ionic_txrx_free(lif);
+       mutex_unlock(&lif->queue_lock);
 
        return 0;
 }
 
-static int ionic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+static int ionic_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 {
        struct ionic_lif *lif = netdev_priv(netdev);
 
@@ -2519,7 +2551,7 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
 static const struct net_device_ops ionic_netdev_ops = {
        .ndo_open               = ionic_open,
        .ndo_stop               = ionic_stop,
-       .ndo_do_ioctl           = ionic_do_ioctl,
+       .ndo_eth_ioctl          = ionic_eth_ioctl,
        .ndo_start_xmit         = ionic_start_xmit,
        .ndo_get_stats64        = ionic_get_stats64,
        .ndo_set_rx_mode        = ionic_ndo_set_rx_mode,
@@ -2580,22 +2612,26 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
        struct ionic_qcq **tx_qcqs = NULL;
        struct ionic_qcq **rx_qcqs = NULL;
        unsigned int flags, i;
-       int err = -ENOMEM;
+       int err = 0;
 
        /* allocate temporary qcq arrays to hold new queue structs */
        if (qparam->nxqs != lif->nxqs || qparam->ntxq_descs != lif->ntxq_descs) {
                tx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->ntxqs_per_lif,
                                       sizeof(struct ionic_qcq *), GFP_KERNEL);
-               if (!tx_qcqs)
+               if (!tx_qcqs) {
+                       err = -ENOMEM;
                        goto err_out;
+               }
        }
        if (qparam->nxqs != lif->nxqs ||
            qparam->nrxq_descs != lif->nrxq_descs ||
            qparam->rxq_features != lif->rxq_features) {
                rx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->nrxqs_per_lif,
                                       sizeof(struct ionic_qcq *), GFP_KERNEL);
-               if (!rx_qcqs)
+               if (!rx_qcqs) {
+                       err = -ENOMEM;
                        goto err_out;
+               }
        }
 
        /* allocate new desc_info and rings, but leave the interrupt setup
@@ -2774,6 +2810,9 @@ err_out:
                ionic_qcq_free(lif, lif->rxqcqs[i]);
        }
 
+       if (err)
+               netdev_info(lif->netdev, "%s: failed %d\n", __func__, err);
+
        return err;
 }
 
@@ -2827,8 +2866,14 @@ int ionic_lif_alloc(struct ionic *ionic)
 
        lif->ionic = ionic;
        lif->index = 0;
-       lif->ntxq_descs = IONIC_DEF_TXRX_DESC;
-       lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
+
+       if (is_kdump_kernel()) {
+               lif->ntxq_descs = IONIC_MIN_TXRX_DESC;
+               lif->nrxq_descs = IONIC_MIN_TXRX_DESC;
+       } else {
+               lif->ntxq_descs = IONIC_DEF_TXRX_DESC;
+               lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
+       }
 
        /* Convert the default coalesce value to actual hw resolution */
        lif->rx_coalesce_usecs = IONIC_ITR_COAL_USEC_DEFAULT;
@@ -3179,7 +3224,7 @@ static int ionic_station_set(struct ionic_lif *lif)
                 */
                if (!ether_addr_equal(ctx.comp.lif_getattr.mac,
                                      netdev->dev_addr))
-                       ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
+                       ionic_lif_addr_add(lif, netdev->dev_addr);
        } else {
                /* Update the netdev mac with the device's mac */
                memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
@@ -3196,7 +3241,7 @@ static int ionic_station_set(struct ionic_lif *lif)
 
        netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
                   netdev->dev_addr);
-       ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
+       ionic_lif_addr_add(lif, netdev->dev_addr);
 
        return 0;
 }
@@ -3514,6 +3559,7 @@ int ionic_lif_size(struct ionic *ionic)
        unsigned int min_intrs;
        int err;
 
+       /* retrieve basic values from FW */
        lc = &ident->lif.eth.config;
        dev_nintrs = le32_to_cpu(ident->dev.nintrs);
        neqs_per_lif = le32_to_cpu(ident->lif.rdma.eq_qtype.qid_count);
@@ -3521,6 +3567,15 @@ int ionic_lif_size(struct ionic *ionic)
        ntxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_TXQ]);
        nrxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_RXQ]);
 
+       /* limit values to play nice with kdump */
+       if (is_kdump_kernel()) {
+               dev_nintrs = 2;
+               neqs_per_lif = 0;
+               nnqs_per_lif = 0;
+               ntxqs_per_lif = 1;
+               nrxqs_per_lif = 1;
+       }
+
        /* reserve last queue id for hardware timestamping */
        if (lc->features & cpu_to_le64(IONIC_ETH_HW_TIMESTAMP)) {
                if (ntxqs_per_lif <= 1 || nrxqs_per_lif <= 1) {
index 69ab59f..4915184 100644 (file)
@@ -98,8 +98,6 @@ struct ionic_qcq {
 
 enum ionic_deferred_work_type {
        IONIC_DW_TYPE_RX_MODE,
-       IONIC_DW_TYPE_RX_ADDR_ADD,
-       IONIC_DW_TYPE_RX_ADDR_DEL,
        IONIC_DW_TYPE_LINK_STATUS,
        IONIC_DW_TYPE_LIF_RESET,
 };
@@ -147,6 +145,7 @@ enum ionic_lif_state_flags {
        IONIC_LIF_F_SW_DEBUG_STATS,
        IONIC_LIF_F_UP,
        IONIC_LIF_F_LINK_CHECK_REQUESTED,
+       IONIC_LIF_F_FILTER_SYNC_NEEDED,
        IONIC_LIF_F_FW_RESET,
        IONIC_LIF_F_SPLIT_INTR,
        IONIC_LIF_F_BROKEN,
@@ -295,6 +294,10 @@ int ionic_lif_alloc(struct ionic *ionic);
 int ionic_lif_init(struct ionic_lif *lif);
 void ionic_lif_free(struct ionic_lif *lif);
 void ionic_lif_deinit(struct ionic_lif *lif);
+
+int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
+int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
+
 int ionic_lif_register(struct ionic_lif *lif);
 void ionic_lif_unregister(struct ionic_lif *lif);
 int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
@@ -303,6 +306,7 @@ int ionic_lif_size(struct ionic *ionic);
 
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 void ionic_lif_hwstamp_replay(struct ionic_lif *lif);
+void ionic_lif_hwstamp_recreate_queues(struct ionic_lif *lif);
 int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr);
 int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr);
 ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter);
@@ -312,6 +316,7 @@ void ionic_lif_alloc_phc(struct ionic_lif *lif);
 void ionic_lif_free_phc(struct ionic_lif *lif);
 #else
 static inline void ionic_lif_hwstamp_replay(struct ionic_lif *lif) {}
+static inline void ionic_lif_hwstamp_recreate_queues(struct ionic_lif *lif) {}
 
 static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
 {
@@ -342,6 +347,7 @@ int ionic_lif_set_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class);
 
 int ionic_lif_rss_config(struct ionic_lif *lif, u16 types,
                         const u8 *key, const u32 *indir);
+void ionic_lif_rx_mode(struct ionic_lif *lif);
 int ionic_reconfigure_queues(struct ionic_lif *lif,
                             struct ionic_queue_params *qparam);
 
index 61cfe21..6f07bf5 100644 (file)
@@ -375,8 +375,8 @@ try_again:
                 * heartbeat check but is still alive and will process this
                 * request, so don't clean the dev_cmd in this case.
                 */
-               dev_warn(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n",
-                        ionic_opcode_to_str(opcode), opcode);
+               dev_dbg(ionic->dev, "DEVCMD %s (%d) failed - FW halted\n",
+                       ionic_opcode_to_str(opcode), opcode);
                return -ENXIO;
        }
 
@@ -450,6 +450,8 @@ int ionic_identify(struct ionic *ionic)
        }
        mutex_unlock(&ionic->dev_cmd_lock);
 
+       dev_info(ionic->dev, "FW: %s\n", idev->dev_info.fw_version);
+
        if (err) {
                dev_err(ionic->dev, "Cannot identify ionic: %dn", err);
                goto err_out;
index 6e2403c..eed2db6 100644 (file)
@@ -119,8 +119,8 @@ static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif,
                config->rx_filter = HWTSTAMP_FILTER_ALL;
        }
 
-       dev_dbg(ionic->dev, "config_rx_filter %d rx_filt %#llx rx_all %d\n",
-               config->rx_filter, rx_filt, rx_all);
+       dev_dbg(ionic->dev, "%s: config_rx_filter %d rx_filt %#llx rx_all %d\n",
+               __func__, config->rx_filter, rx_filt, rx_all);
 
        if (tx_mode) {
                err = ionic_lif_create_hwstamp_txq(lif);
@@ -194,7 +194,9 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
        if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
                return -EFAULT;
 
+       mutex_lock(&lif->queue_lock);
        err = ionic_lif_hwstamp_set_ts_config(lif, &config);
+       mutex_unlock(&lif->queue_lock);
        if (err) {
                netdev_info(lif->netdev, "hwstamp set failed: %d\n", err);
                return err;
@@ -213,11 +215,37 @@ void ionic_lif_hwstamp_replay(struct ionic_lif *lif)
        if (!lif->phc || !lif->phc->ptp)
                return;
 
+       mutex_lock(&lif->queue_lock);
        err = ionic_lif_hwstamp_set_ts_config(lif, NULL);
+       mutex_unlock(&lif->queue_lock);
        if (err)
                netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err);
 }
 
+void ionic_lif_hwstamp_recreate_queues(struct ionic_lif *lif)
+{
+       int err;
+
+       if (!lif->phc || !lif->phc->ptp)
+               return;
+
+       mutex_lock(&lif->phc->config_lock);
+
+       if (lif->phc->ts_config_tx_mode) {
+               err = ionic_lif_create_hwstamp_txq(lif);
+               if (err)
+                       netdev_info(lif->netdev, "hwstamp recreate txq failed: %d\n", err);
+       }
+
+       if (lif->phc->ts_config_rx_filt) {
+               err = ionic_lif_create_hwstamp_rxq(lif);
+               if (err)
+                       netdev_info(lif->netdev, "hwstamp recreate rxq failed: %d\n", err);
+       }
+
+       mutex_unlock(&lif->phc->config_lock);
+}
+
 int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr)
 {
        struct hwtstamp_config config;
index d71316d..7e3a563 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/netdevice.h>
 #include <linux/dynamic_debug.h>
 #include <linux/etherdevice.h>
+#include <linux/list.h>
 
 #include "ionic.h"
 #include "ionic_lif.h"
@@ -120,11 +121,12 @@ void ionic_rx_filters_deinit(struct ionic_lif *lif)
 }
 
 int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
-                        u32 hash, struct ionic_admin_ctx *ctx)
+                        u32 hash, struct ionic_admin_ctx *ctx,
+                        enum ionic_filter_state state)
 {
        struct device *dev = lif->ionic->dev;
        struct ionic_rx_filter_add_cmd *ac;
-       struct ionic_rx_filter *f;
+       struct ionic_rx_filter *f = NULL;
        struct hlist_head *head;
        unsigned int key;
 
@@ -133,9 +135,11 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
        switch (le16_to_cpu(ac->match)) {
        case IONIC_RX_FILTER_MATCH_VLAN:
                key = le16_to_cpu(ac->vlan.vlan);
+               f = ionic_rx_filter_by_vlan(lif, le16_to_cpu(ac->vlan.vlan));
                break;
        case IONIC_RX_FILTER_MATCH_MAC:
                key = *(u32 *)ac->mac.addr;
+               f = ionic_rx_filter_by_addr(lif, ac->mac.addr);
                break;
        case IONIC_RX_FILTER_MATCH_MAC_VLAN:
                key = le16_to_cpu(ac->mac_vlan.vlan);
@@ -147,12 +151,19 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
                return -EINVAL;
        }
 
-       f = devm_kzalloc(dev, sizeof(*f), GFP_KERNEL);
-       if (!f)
-               return -ENOMEM;
+       if (f) {
+               /* remove from current linking so we can refresh it */
+               hlist_del(&f->by_id);
+               hlist_del(&f->by_hash);
+       } else {
+               f = devm_kzalloc(dev, sizeof(*f), GFP_ATOMIC);
+               if (!f)
+                       return -ENOMEM;
+       }
 
        f->flow_id = flow_id;
        f->filter_id = le32_to_cpu(ctx->comp.rx_filter_add.filter_id);
+       f->state = state;
        f->rxq_index = rxq_index;
        memcpy(&f->cmd, ac, sizeof(f->cmd));
        netdev_dbg(lif->netdev, "rx_filter add filter_id %d\n", f->filter_id);
@@ -160,8 +171,6 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
        INIT_HLIST_NODE(&f->by_hash);
        INIT_HLIST_NODE(&f->by_id);
 
-       spin_lock_bh(&lif->rx_filters.lock);
-
        key = hash_32(key, IONIC_RX_FILTER_HASH_BITS);
        head = &lif->rx_filters.by_hash[key];
        hlist_add_head(&f->by_hash, head);
@@ -170,8 +179,6 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
        head = &lif->rx_filters.by_id[key];
        hlist_add_head(&f->by_id, head);
 
-       spin_unlock_bh(&lif->rx_filters.lock);
-
        return 0;
 }
 
@@ -231,3 +238,121 @@ struct ionic_rx_filter *ionic_rx_filter_rxsteer(struct ionic_lif *lif)
 
        return NULL;
 }
+
+int ionic_lif_list_addr(struct ionic_lif *lif, const u8 *addr, bool mode)
+{
+       struct ionic_rx_filter *f;
+       int err;
+
+       spin_lock_bh(&lif->rx_filters.lock);
+
+       f = ionic_rx_filter_by_addr(lif, addr);
+       if (mode == ADD_ADDR && !f) {
+               struct ionic_admin_ctx ctx = {
+                       .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+                       .cmd.rx_filter_add = {
+                               .opcode = IONIC_CMD_RX_FILTER_ADD,
+                               .lif_index = cpu_to_le16(lif->index),
+                               .match = cpu_to_le16(IONIC_RX_FILTER_MATCH_MAC),
+                       },
+               };
+
+               memcpy(ctx.cmd.rx_filter_add.mac.addr, addr, ETH_ALEN);
+               err = ionic_rx_filter_save(lif, 0, IONIC_RXQ_INDEX_ANY, 0, &ctx,
+                                          IONIC_FILTER_STATE_NEW);
+               if (err) {
+                       spin_unlock_bh(&lif->rx_filters.lock);
+                       return err;
+               }
+
+       } else if (mode == ADD_ADDR && f) {
+               if (f->state == IONIC_FILTER_STATE_OLD)
+                       f->state = IONIC_FILTER_STATE_SYNCED;
+
+       } else if (mode == DEL_ADDR && f) {
+               if (f->state == IONIC_FILTER_STATE_NEW)
+                       ionic_rx_filter_free(lif, f);
+               else if (f->state == IONIC_FILTER_STATE_SYNCED)
+                       f->state = IONIC_FILTER_STATE_OLD;
+       } else if (mode == DEL_ADDR && !f) {
+               spin_unlock_bh(&lif->rx_filters.lock);
+               return -ENOENT;
+       }
+
+       spin_unlock_bh(&lif->rx_filters.lock);
+
+       set_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state);
+
+       return 0;
+}
+
+struct sync_item {
+       struct list_head list;
+       struct ionic_rx_filter f;
+};
+
+void ionic_rx_filter_sync(struct ionic_lif *lif)
+{
+       struct device *dev = lif->ionic->dev;
+       struct list_head sync_add_list;
+       struct list_head sync_del_list;
+       struct sync_item *sync_item;
+       struct ionic_rx_filter *f;
+       struct hlist_head *head;
+       struct hlist_node *tmp;
+       struct sync_item *spos;
+       unsigned int i;
+
+       INIT_LIST_HEAD(&sync_add_list);
+       INIT_LIST_HEAD(&sync_del_list);
+
+       clear_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state);
+
+       /* Copy the filters to be added and deleted
+        * into a separate local list that needs no locking.
+        */
+       spin_lock_bh(&lif->rx_filters.lock);
+       for (i = 0; i < IONIC_RX_FILTER_HLISTS; i++) {
+               head = &lif->rx_filters.by_id[i];
+               hlist_for_each_entry_safe(f, tmp, head, by_id) {
+                       if (f->state == IONIC_FILTER_STATE_NEW ||
+                           f->state == IONIC_FILTER_STATE_OLD) {
+                               sync_item = devm_kzalloc(dev, sizeof(*sync_item),
+                                                        GFP_KERNEL);
+                               if (!sync_item)
+                                       goto loop_out;
+
+                               sync_item->f = *f;
+
+                               if (f->state == IONIC_FILTER_STATE_NEW)
+                                       list_add(&sync_item->list, &sync_add_list);
+                               else
+                                       list_add(&sync_item->list, &sync_del_list);
+                       }
+               }
+       }
+loop_out:
+       spin_unlock_bh(&lif->rx_filters.lock);
+
+       /* If the add or delete fails, it won't get marked as sync'd
+        * and will be tried again in the next sync action.
+        * Do the deletes first in case we're in an overflow state and
+        * they can clear room for some new filters
+        */
+       list_for_each_entry_safe(sync_item, spos, &sync_del_list, list) {
+               (void)ionic_lif_addr_del(lif, sync_item->f.cmd.mac.addr);
+
+               list_del(&sync_item->list);
+               devm_kfree(dev, sync_item);
+       }
+
+       list_for_each_entry_safe(sync_item, spos, &sync_add_list, list) {
+               (void)ionic_lif_addr_add(lif, sync_item->f.cmd.mac.addr);
+
+               if (sync_item->f.state != IONIC_FILTER_STATE_SYNCED)
+                       set_bit(IONIC_LIF_F_FILTER_SYNC_NEEDED, lif->state);
+
+               list_del(&sync_item->list);
+               devm_kfree(dev, sync_item);
+       }
+}
index 1ead48b..a66e35f 100644 (file)
@@ -5,10 +5,18 @@
 #define _IONIC_RX_FILTER_H_
 
 #define IONIC_RXQ_INDEX_ANY            (0xFFFF)
+
+enum ionic_filter_state {
+       IONIC_FILTER_STATE_SYNCED,
+       IONIC_FILTER_STATE_NEW,
+       IONIC_FILTER_STATE_OLD,
+};
+
 struct ionic_rx_filter {
        u32 flow_id;
        u32 filter_id;
        u16 rxq_index;
+       enum ionic_filter_state state;
        struct ionic_rx_filter_add_cmd cmd;
        struct hlist_node by_hash;
        struct hlist_node by_id;
@@ -28,9 +36,13 @@ void ionic_rx_filter_replay(struct ionic_lif *lif);
 int ionic_rx_filters_init(struct ionic_lif *lif);
 void ionic_rx_filters_deinit(struct ionic_lif *lif);
 int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
-                        u32 hash, struct ionic_admin_ctx *ctx);
+                        u32 hash, struct ionic_admin_ctx *ctx,
+                        enum ionic_filter_state state);
 struct ionic_rx_filter *ionic_rx_filter_by_vlan(struct ionic_lif *lif, u16 vid);
 struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif, const u8 *addr);
 struct ionic_rx_filter *ionic_rx_filter_rxsteer(struct ionic_lif *lif);
+void ionic_rx_filter_sync(struct ionic_lif *lif);
+int ionic_lif_list_addr(struct ionic_lif *lif, const u8 *addr, bool mode);
+int ionic_rx_filters_need_sync(struct ionic_lif *lif);
 
 #endif /* _IONIC_RX_FILTER_H_ */
index 0887019..37c3958 100644 (file)
@@ -32,19 +32,13 @@ static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
        return netdev_get_tx_queue(q->lif->netdev, q->index);
 }
 
-static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
-{
-       buf_info->page = NULL;
-       buf_info->page_offset = 0;
-       buf_info->dma_addr = 0;
-}
-
 static int ionic_rx_page_alloc(struct ionic_queue *q,
                               struct ionic_buf_info *buf_info)
 {
        struct net_device *netdev = q->lif->netdev;
        struct ionic_rx_stats *stats;
        struct device *dev;
+       struct page *page;
 
        dev = q->dev;
        stats = q_to_rx_stats(q);
@@ -55,26 +49,27 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
                return -EINVAL;
        }
 
-       buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
-       if (unlikely(!buf_info->page)) {
+       page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
+       if (unlikely(!page)) {
                net_err_ratelimited("%s: %s page alloc failed\n",
                                    netdev->name, q->name);
                stats->alloc_err++;
                return -ENOMEM;
        }
-       buf_info->page_offset = 0;
 
-       buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset,
+       buf_info->dma_addr = dma_map_page(dev, page, 0,
                                          IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
        if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) {
-               __free_pages(buf_info->page, 0);
-               ionic_rx_buf_reset(buf_info);
+               __free_pages(page, 0);
                net_err_ratelimited("%s: %s dma map failed\n",
                                    netdev->name, q->name);
                stats->dma_map_err++;
                return -EIO;
        }
 
+       buf_info->page = page;
+       buf_info->page_offset = 0;
+
        return 0;
 }
 
@@ -95,7 +90,7 @@ static void ionic_rx_page_free(struct ionic_queue *q,
 
        dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
        __free_pages(buf_info->page, 0);
-       ionic_rx_buf_reset(buf_info);
+       buf_info->page = NULL;
 }
 
 static bool ionic_rx_buf_recycle(struct ionic_queue *q,
@@ -139,7 +134,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
        buf_info = &desc_info->bufs[0];
        len = le16_to_cpu(comp->len);
 
-       prefetch(buf_info->page);
+       prefetchw(buf_info->page);
 
        skb = napi_get_frags(&q_to_qcq(q)->napi);
        if (unlikely(!skb)) {
@@ -170,7 +165,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
                if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) {
                        dma_unmap_page(dev, buf_info->dma_addr,
                                       IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
-                       ionic_rx_buf_reset(buf_info);
+                       buf_info->page = NULL;
                }
 
                buf_info++;
index 98f4309..1203353 100644 (file)
@@ -99,7 +99,7 @@ config QED_SRIOV
 config QEDE
        tristate "QLogic QED 25/40/100Gb Ethernet NIC"
        depends on QED
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK_OPTIONAL
        help
          This enables the support for Marvell FastLinQ adapters family,
          ethernet driver.
index e5c5125..f13fa73 100644 (file)
@@ -1863,7 +1863,6 @@ static inline u32 netxen_tx_avail(struct nx_host_tx_ring *tx_ring)
 int netxen_get_flash_mac_addr(struct netxen_adapter *adapter, u64 *mac);
 int netxen_p3_get_mac_addr(struct netxen_adapter *adapter, u64 *mac);
 void netxen_change_ringparam(struct netxen_adapter *adapter);
-int netxen_rom_fast_read(struct netxen_adapter *adapter, int addr, int *valp);
 
 extern const struct ethtool_ops netxen_nic_ethtool_ops;
 
index dd22cb0..a075643 100644 (file)
@@ -731,7 +731,9 @@ netxen_nic_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
  * firmware coalescing to default.
  */
 static int netxen_set_intr_coalesce(struct net_device *netdev,
-                       struct ethtool_coalesce *ethcoal)
+                                   struct ethtool_coalesce *ethcoal,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct netxen_adapter *adapter = netdev_priv(netdev);
 
@@ -775,7 +777,9 @@ static int netxen_set_intr_coalesce(struct net_device *netdev,
 }
 
 static int netxen_get_intr_coalesce(struct net_device *netdev,
-                       struct ethtool_coalesce *ethcoal)
+                                   struct ethtool_coalesce *ethcoal,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct netxen_adapter *adapter = netdev_priv(netdev);
 
index b590c70..d58e021 100644 (file)
 
 extern const struct qed_common_ops qed_common_ops_pass;
 
-#define QED_MAJOR_VERSION              8
-#define QED_MINOR_VERSION              37
-#define QED_REVISION_VERSION           0
-#define QED_ENGINEERING_VERSION                20
-
-#define QED_VERSION                                             \
-       ((QED_MAJOR_VERSION << 24) | (QED_MINOR_VERSION << 16) | \
-        (QED_REVISION_VERSION << 8) | QED_ENGINEERING_VERSION)
-
 #define STORM_FW_VERSION                                      \
        ((FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) | \
         (FW_REVISION_VERSION << 8) | FW_ENGINEERING_VERSION)
@@ -517,12 +508,6 @@ enum qed_hsi_def_type {
        QED_NUM_HSI_DEFS
 };
 
-#define DRV_MODULE_VERSION                   \
-       __stringify(QED_MAJOR_VERSION) "."    \
-       __stringify(QED_MINOR_VERSION) "."    \
-       __stringify(QED_REVISION_VERSION) "." \
-       __stringify(QED_ENGINEERING_VERSION)
-
 struct qed_simd_fp_handler {
        void    *token;
        void    (*func)(void *);
index e81dd34..dc93dde 100644 (file)
@@ -741,7 +741,6 @@ static int
 qed_dcbx_read_local_lldp_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        struct qed_dcbx_mib_meta_data data;
-       int rc = 0;
 
        memset(&data, 0, sizeof(data));
        data.addr = p_hwfn->mcp_info->port_addr + offsetof(struct public_port,
@@ -750,7 +749,7 @@ qed_dcbx_read_local_lldp_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        data.size = sizeof(struct lldp_config_params_s);
        qed_memcpy_from(p_hwfn, p_ptt, data.lldp_local, data.addr, data.size);
 
-       return rc;
+       return 0;
 }
 
 static int
@@ -810,7 +809,6 @@ static int
 qed_dcbx_read_local_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        struct qed_dcbx_mib_meta_data data;
-       int rc = 0;
 
        memset(&data, 0, sizeof(data));
        data.addr = p_hwfn->mcp_info->port_addr +
@@ -819,7 +817,7 @@ qed_dcbx_read_local_mib(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        data.size = sizeof(struct dcbx_local_params);
        qed_memcpy_from(p_hwfn, p_ptt, data.local_admin, data.addr, data.size);
 
-       return rc;
+       return 0;
 }
 
 static int qed_dcbx_read_mib(struct qed_hwfn *p_hwfn,
index cf7f4da..7807068 100644 (file)
@@ -93,7 +93,7 @@ static const struct devlink_health_reporter_ops qed_fw_fatal_reporter_ops = {
                .dump = qed_fw_fatal_reporter_dump,
 };
 
-#define QED_REPORTER_FW_GRACEFUL_PERIOD 1200000
+#define QED_REPORTER_FW_GRACEFUL_PERIOD 0
 
 void qed_fw_reporters_create(struct devlink *devlink)
 {
@@ -207,14 +207,15 @@ struct devlink *qed_devlink_register(struct qed_dev *cdev)
        struct devlink *dl;
        int rc;
 
-       dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink));
+       dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink),
+                          &cdev->pdev->dev);
        if (!dl)
                return ERR_PTR(-ENOMEM);
 
        qdevlink = devlink_priv(dl);
        qdevlink->cdev = cdev;
 
-       rc = devlink_register(dl, &cdev->pdev->dev);
+       rc = devlink_register(dl);
        if (rc)
                goto err_free;
 
index 578935f..f78e605 100644 (file)
@@ -351,6 +351,9 @@ static int qed_fw_assertion(struct qed_hwfn *p_hwfn)
        qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_FW_ASSERT,
                          "FW assertion!\n");
 
+       /* Clear assert indications */
+       qed_wr(p_hwfn, p_hwfn->p_dpc_ptt, MISC_REG_AEU_GENERAL_ATTN_32, 0);
+
        return -EINVAL;
 }
 
@@ -464,12 +467,19 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
        u32 int_sts, first_drop_reason, details, address, all_drops_reason;
        struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
 
+       int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
+       if (int_sts == 0xdeadbeaf) {
+               DP_NOTICE(p_hwfn->cdev,
+                         "DORQ is being reset, skipping int_sts handler\n");
+
+               return 0;
+       }
+
        /* int_sts may be zero since all PFs were interrupted for doorbell
         * overflow but another one already handled it. Can abort here. If
         * This PF also requires overflow recovery we will be interrupted again.
         * The masked almost full indication may also be set. Ignoring.
         */
-       int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
        if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
                return 0;
 
@@ -528,6 +538,9 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
 
 static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
 {
+       if (p_hwfn->cdev->recov_in_prog)
+               return 0;
+
        p_hwfn->db_recovery_info.dorq_attn = true;
        qed_dorq_attn_overflow(p_hwfn);
 
@@ -943,6 +956,13 @@ qed_int_deassertion_aeu_bit(struct qed_hwfn *p_hwfn,
        DP_INFO(p_hwfn, "`%s' - Disabled future attentions\n",
                p_bit_name);
 
+       /* Re-enable FW aassertion (Gen 32) interrupts */
+       val = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
+                    MISC_REG_AEU_ENABLE4_IGU_OUT_0);
+       val |= MISC_REG_AEU_ENABLE4_IGU_OUT_0_GENERAL_ATTN32;
+       qed_wr(p_hwfn, p_hwfn->p_dpc_ptt,
+              MISC_REG_AEU_ENABLE4_IGU_OUT_0, val);
+
 out:
        return rc;
 }
index a998611..fc8b3e6 100644 (file)
@@ -1624,8 +1624,6 @@ qed_iwarp_get_listener(struct qed_hwfn *p_hwfn,
        static const u32 ip_zero[4] = { 0, 0, 0, 0 };
        bool found = false;
 
-       qed_iwarp_print_cm_info(p_hwfn, cm_info);
-
        list_for_each_entry(listener,
                            &p_hwfn->p_rdma_info->iwarp.listen_list,
                            list_entry) {
index 6bb9ec9..15ef59a 100644 (file)
 #define QED_NVM_CFG_MAX_ATTRS          50
 
 static char version[] =
-       "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n";
+       "QLogic FastLinQ 4xxxx Core Module qed\n";
 
 MODULE_DESCRIPTION("QLogic FastLinQ 4xxxx Core Module");
 MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_MODULE_VERSION);
 
 #define FW_FILE_VERSION                                \
        __stringify(FW_MAJOR_VERSION) "."       \
@@ -1221,6 +1220,10 @@ static void qed_slowpath_task(struct work_struct *work)
 
        if (test_and_clear_bit(QED_SLOWPATH_PERIODIC_DB_REC,
                               &hwfn->slowpath_task_flags)) {
+               /* skip qed_db_rec_handler during recovery/unload */
+               if (hwfn->cdev->recov_in_prog || !hwfn->slowpath_wq_active)
+                       goto out;
+
                qed_db_rec_handler(hwfn, ptt);
                if (hwfn->periodic_db_rec_count--)
                        qed_slowpath_delayed_work(hwfn,
@@ -1228,6 +1231,7 @@ static void qed_slowpath_task(struct work_struct *work)
                                                  QED_PERIODIC_DB_REC_INTERVAL);
        }
 
+out:
        qed_ptt_release(hwfn, ptt);
 }
 
index 4387292..6e5a6cc 100644 (file)
@@ -944,7 +944,6 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
 
        memset(&in_params, 0, sizeof(in_params));
        in_params.hsi_ver = QED_LOAD_REQ_HSI_VER_DEFAULT;
-       in_params.drv_ver_0 = QED_VERSION;
        in_params.drv_ver_1 = qed_get_config_bitmap();
        in_params.fw_ver = STORM_FW_VERSION;
        rc = eocre_get_mfw_drv_role(p_hwfn, p_params->drv_role, &mfw_drv_role);
index c1dd71d..3b84d00 100644 (file)
@@ -4,7 +4,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/mm.h>
 #include <linux/types.h>
index 9db22be..da1b7fd 100644 (file)
        0x180824UL
 #define  MISC_REG_AEU_GENERAL_ATTN_0 \
        0x008400UL
+#define MISC_REG_AEU_GENERAL_ATTN_32 \
+       0x008480UL
 #define MISC_REG_AEU_GENERAL_ATTN_35 \
        0x00848cUL
 #define  CAU_REG_SB_ADDR_MEMORY \
        0x180804UL
 #define  MISC_REG_AEU_ENABLE1_IGU_OUT_0 \
        0x00849cUL
+#define MISC_REG_AEU_ENABLE4_IGU_OUT_0 \
+       0x0084a8UL
+#define MISC_REG_AEU_ENABLE4_IGU_OUT_0_GENERAL_ATTN32      \
+       (0x1UL << 0)
+#define MISC_REG_AEU_ENABLE4_IGU_OUT_0_GENERAL_ATTN32_SHIFT \
+       0
 #define MISC_REG_AEU_AFTER_INVERT_1_IGU        \
        0x0087b4UL
 #define  MISC_REG_AEU_MASK_ATTN_IGU \
index 5630008..f90dcfe 100644 (file)
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_gact.h>
 
-#define QEDE_MAJOR_VERSION             8
-#define QEDE_MINOR_VERSION             37
-#define QEDE_REVISION_VERSION          0
-#define QEDE_ENGINEERING_VERSION       20
-#define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \
-               __stringify(QEDE_MINOR_VERSION) "."             \
-               __stringify(QEDE_REVISION_VERSION) "."          \
-               __stringify(QEDE_ENGINEERING_VERSION)
-
 #define DRV_MODULE_SYM         qede
 
 struct qede_stats_common {
@@ -589,7 +580,9 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto,
                            struct flow_cls_offload *f);
 
 void qede_forced_speed_maps_init(void);
-int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal);
+int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal,
+                     struct kernel_ethtool_coalesce *kernel_coal,
+                     struct netlink_ext_ack *extack);
 int qede_set_per_coalesce(struct net_device *dev, u32 queue,
                          struct ethtool_coalesce *coal);
 
index 1560ad3..8284c4c 100644 (file)
@@ -625,13 +625,13 @@ static void qede_get_drvinfo(struct net_device *ndev,
                 (edev->dev_info.common.mfw_rev >> 8) & 0xFF,
                 edev->dev_info.common.mfw_rev & 0xFF);
 
-       if ((strlen(storm) + strlen(DRV_MODULE_VERSION) + strlen("[storm]  ")) <
+       if ((strlen(storm) + strlen("[storm]")) <
            sizeof(info->version))
                snprintf(info->version, sizeof(info->version),
-                        "%s [storm %s]", DRV_MODULE_VERSION, storm);
+                        "[storm %s]", storm);
        else
                snprintf(info->version, sizeof(info->version),
-                        "%s %s", DRV_MODULE_VERSION, storm);
+                        "%s", storm);
 
        if (edev->dev_info.common.mbi_version) {
                snprintf(mbi, ETHTOOL_FWVERS_LEN, "%d.%d.%d",
@@ -760,7 +760,9 @@ static int qede_flash_device(struct net_device *dev,
 }
 
 static int qede_get_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *coal)
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        void *rx_handle = NULL, *tx_handle = NULL;
        struct qede_dev *edev = netdev_priv(dev);
@@ -819,7 +821,9 @@ out:
        return rc;
 }
 
-int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
+int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal,
+                     struct kernel_ethtool_coalesce *kernel_coal,
+                     struct netlink_ext_ack *extack)
 {
        struct qede_dev *edev = netdev_priv(dev);
        struct qede_fastpath *fp;
index 1c7f9ed..9837bdb 100644 (file)
 #include "qede.h"
 #include "qede_ptp.h"
 
-static char version[] =
-       "QLogic FastLinQ 4xxxx Ethernet Driver qede " DRV_MODULE_VERSION "\n";
-
 MODULE_DESCRIPTION("QLogic FastLinQ 4xxxx Ethernet Driver");
 MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_MODULE_VERSION);
 
 static uint debug;
 module_param(debug, uint, 0);
@@ -258,7 +254,7 @@ int __init qede_init(void)
 {
        int ret;
 
-       pr_info("qede_init: %s\n", version);
+       pr_info("qede init: QLogic FastLinQ 4xxxx Ethernet Driver qede\n");
 
        qede_forced_speed_maps_init();
 
@@ -644,7 +640,7 @@ static const struct net_device_ops qede_netdev_ops = {
        .ndo_set_mac_address    = qede_set_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_change_mtu         = qede_change_mtu,
-       .ndo_do_ioctl           = qede_ioctl,
+       .ndo_eth_ioctl          = qede_ioctl,
        .ndo_tx_timeout         = qede_tx_timeout,
 #ifdef CONFIG_QED_SRIOV
        .ndo_set_vf_mac         = qede_set_vf_mac,
@@ -1157,10 +1153,6 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
        /* Start the Slowpath-process */
        memset(&sp_params, 0, sizeof(sp_params));
        sp_params.int_mode = QED_INT_MODE_MSIX;
-       sp_params.drv_major = QEDE_MAJOR_VERSION;
-       sp_params.drv_minor = QEDE_MINOR_VERSION;
-       sp_params.drv_rev = QEDE_REVISION_VERSION;
-       sp_params.drv_eng = QEDE_ENGINEERING_VERSION;
        strlcpy(sp_params.name, "qede LAN", QED_DRV_VER_STR_SIZE);
        rc = qed_ops->common->slowpath_start(cdev, &sp_params);
        if (rc) {
@@ -1907,6 +1899,12 @@ static int qede_req_msix_irqs(struct qede_dev *edev)
                                 &edev->fp_array[i]);
                if (rc) {
                        DP_ERR(edev, "Request fp %d irq failed\n", i);
+#ifdef CONFIG_RFS_ACCEL
+                       if (edev->ndev->rx_cpu_rmap)
+                               free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap);
+
+                       edev->ndev->rx_cpu_rmap = NULL;
+#endif
                        qede_sync_free_irqs(edev);
                        return rc;
                }
@@ -2299,6 +2297,15 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
                rc = qede_stop_queues(edev);
                if (rc) {
+#ifdef CONFIG_RFS_ACCEL
+                       if (edev->dev_info.common.b_arfs_capable) {
+                               qede_poll_for_freeing_arfs_filters(edev);
+                               if (edev->ndev->rx_cpu_rmap)
+                                       free_irq_cpu_rmap(edev->ndev->rx_cpu_rmap);
+
+                               edev->ndev->rx_cpu_rmap = NULL;
+                       }
+#endif
                        qede_sync_free_irqs(edev);
                        goto out;
                }
@@ -2628,8 +2635,10 @@ static void qede_generic_hw_err_handler(struct qede_dev *edev)
                  "Generic sleepable HW error handling started - err_flags 0x%lx\n",
                  edev->err_flags);
 
-       if (edev->devlink)
+       if (edev->devlink) {
+               DP_NOTICE(edev, "Reporting fatal error to devlink\n");
                edev->ops->common->report_fatal_error(edev->devlink, edev->last_err_type);
+       }
 
        clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
 
@@ -2651,6 +2660,8 @@ static void qede_set_hw_err_flags(struct qede_dev *edev,
        case QED_HW_ERR_FW_ASSERT:
                set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags);
                set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags);
+               /* make this error as recoverable and start recovery*/
+               set_bit(QEDE_ERR_IS_RECOVERABLE, &err_flags);
                break;
 
        default:
index d8f0863..fc364b4 100644 (file)
@@ -1021,7 +1021,7 @@ clear_diag_irq:
 
 static void qlcnic_create_loopback_buff(unsigned char *data, u8 mac[])
 {
-       unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00};
+       static const unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00};
 
        memset(data, 0x4e, QLCNIC_ILB_PKT_SIZE);
 
@@ -1527,7 +1527,9 @@ qlcnic_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
  * firmware coalescing to default.
  */
 static int qlcnic_set_intr_coalesce(struct net_device *netdev,
-                       struct ethtool_coalesce *ethcoal)
+                                   struct ethtool_coalesce *ethcoal,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct qlcnic_adapter *adapter = netdev_priv(netdev);
        int err;
@@ -1551,7 +1553,9 @@ static int qlcnic_set_intr_coalesce(struct net_device *netdev,
 }
 
 static int qlcnic_get_intr_coalesce(struct net_device *netdev,
-                       struct ethtool_coalesce *ethcoal)
+                                   struct ethtool_coalesce *ethcoal,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct qlcnic_adapter *adapter = netdev_priv(netdev);
 
index e678402..3d61a76 100644 (file)
@@ -94,10 +94,8 @@ void qlcnic_release_rx_buffers(struct qlcnic_adapter *adapter)
                        if (rx_buf->skb == NULL)
                                continue;
 
-                       pci_unmap_single(adapter->pdev,
-                                       rx_buf->dma,
-                                       rds_ring->dma_size,
-                                       PCI_DMA_FROMDEVICE);
+                       dma_unmap_single(&adapter->pdev->dev, rx_buf->dma,
+                                        rds_ring->dma_size, DMA_FROM_DEVICE);
 
                        dev_kfree_skb_any(rx_buf->skb);
                }
@@ -139,16 +137,16 @@ void qlcnic_release_tx_buffers(struct qlcnic_adapter *adapter,
        for (i = 0; i < tx_ring->num_desc; i++) {
                buffrag = cmd_buf->frag_array;
                if (buffrag->dma) {
-                       pci_unmap_single(adapter->pdev, buffrag->dma,
-                                        buffrag->length, PCI_DMA_TODEVICE);
+                       dma_unmap_single(&adapter->pdev->dev, buffrag->dma,
+                                        buffrag->length, DMA_TO_DEVICE);
                        buffrag->dma = 0ULL;
                }
                for (j = 1; j < cmd_buf->frag_count; j++) {
                        buffrag++;
                        if (buffrag->dma) {
-                               pci_unmap_page(adapter->pdev, buffrag->dma,
-                                              buffrag->length,
-                                              PCI_DMA_TODEVICE);
+                               dma_unmap_page(&adapter->pdev->dev,
+                                              buffrag->dma, buffrag->length,
+                                              DMA_TO_DEVICE);
                                buffrag->dma = 0ULL;
                        }
                }
index af4c516..29cdcb2 100644 (file)
@@ -587,9 +587,9 @@ static int qlcnic_map_tx_skb(struct pci_dev *pdev, struct sk_buff *skb,
        nr_frags = skb_shinfo(skb)->nr_frags;
        nf = &pbuf->frag_array[0];
 
-       map = pci_map_single(pdev, skb->data, skb_headlen(skb),
-                            PCI_DMA_TODEVICE);
-       if (pci_dma_mapping_error(pdev, map))
+       map = dma_map_single(&pdev->dev, skb->data, skb_headlen(skb),
+                            DMA_TO_DEVICE);
+       if (dma_mapping_error(&pdev->dev, map))
                goto out_err;
 
        nf->dma = map;
@@ -612,11 +612,11 @@ static int qlcnic_map_tx_skb(struct pci_dev *pdev, struct sk_buff *skb,
 unwind:
        while (--i >= 0) {
                nf = &pbuf->frag_array[i+1];
-               pci_unmap_page(pdev, nf->dma, nf->length, PCI_DMA_TODEVICE);
+               dma_unmap_page(&pdev->dev, nf->dma, nf->length, DMA_TO_DEVICE);
        }
 
        nf = &pbuf->frag_array[0];
-       pci_unmap_single(pdev, nf->dma, skb_headlen(skb), PCI_DMA_TODEVICE);
+       dma_unmap_single(&pdev->dev, nf->dma, skb_headlen(skb), DMA_TO_DEVICE);
 
 out_err:
        return -ENOMEM;
@@ -630,11 +630,11 @@ static void qlcnic_unmap_buffers(struct pci_dev *pdev, struct sk_buff *skb,
 
        for (i = 0; i < nr_frags; i++) {
                nf = &pbuf->frag_array[i+1];
-               pci_unmap_page(pdev, nf->dma, nf->length, PCI_DMA_TODEVICE);
+               dma_unmap_page(&pdev->dev, nf->dma, nf->length, DMA_TO_DEVICE);
        }
 
        nf = &pbuf->frag_array[0];
-       pci_unmap_single(pdev, nf->dma, skb_headlen(skb), PCI_DMA_TODEVICE);
+       dma_unmap_single(&pdev->dev, nf->dma, skb_headlen(skb), DMA_TO_DEVICE);
        pbuf->skb = NULL;
 }
 
@@ -825,10 +825,10 @@ static int qlcnic_alloc_rx_skb(struct qlcnic_adapter *adapter,
        }
 
        skb_reserve(skb, NET_IP_ALIGN);
-       dma = pci_map_single(pdev, skb->data,
-                            rds_ring->dma_size, PCI_DMA_FROMDEVICE);
+       dma = dma_map_single(&pdev->dev, skb->data, rds_ring->dma_size,
+                            DMA_FROM_DEVICE);
 
-       if (pci_dma_mapping_error(pdev, dma)) {
+       if (dma_mapping_error(&pdev->dev, dma)) {
                adapter->stats.rx_dma_map_error++;
                dev_kfree_skb_any(skb);
                return -ENOMEM;
@@ -903,13 +903,13 @@ static int qlcnic_process_cmd_ring(struct qlcnic_adapter *adapter,
                buffer = &tx_ring->cmd_buf_arr[sw_consumer];
                if (buffer->skb) {
                        frag = &buffer->frag_array[0];
-                       pci_unmap_single(pdev, frag->dma, frag->length,
-                                        PCI_DMA_TODEVICE);
+                       dma_unmap_single(&pdev->dev, frag->dma, frag->length,
+                                        DMA_TO_DEVICE);
                        frag->dma = 0ULL;
                        for (i = 1; i < buffer->frag_count; i++) {
                                frag++;
-                               pci_unmap_page(pdev, frag->dma, frag->length,
-                                              PCI_DMA_TODEVICE);
+                               dma_unmap_page(&pdev->dev, frag->dma,
+                                              frag->length, DMA_TO_DEVICE);
                                frag->dma = 0ULL;
                        }
                        tx_ring->tx_stats.xmit_finished++;
@@ -1147,8 +1147,8 @@ static struct sk_buff *qlcnic_process_rxbuf(struct qlcnic_adapter *adapter,
                return NULL;
        }
 
-       pci_unmap_single(adapter->pdev, buffer->dma, ring->dma_size,
-                        PCI_DMA_FROMDEVICE);
+       dma_unmap_single(&adapter->pdev->dev, buffer->dma, ring->dma_size,
+                        DMA_FROM_DEVICE);
 
        skb = buffer->skb;
        if (likely((adapter->netdev->features & NETIF_F_RXCSUM) &&
index a4fa507..75960a2 100644 (file)
@@ -2343,11 +2343,9 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev,
 
 static int qlcnic_set_dma_mask(struct pci_dev *pdev, int *pci_using_dac)
 {
-       if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) &&
-                       !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
+       if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)))
                *pci_using_dac = 1;
-       else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) &&
-                       !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))
+       else if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
                *pci_using_dac = 0;
        else {
                dev_err(&pdev->dev, "Unable to set DMA mask, aborting\n");
index ad655f0..9015a38 100644 (file)
@@ -377,7 +377,7 @@ static const struct net_device_ops emac_netdev_ops = {
        .ndo_start_xmit         = emac_start_xmit,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_change_mtu         = emac_change_mtu,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_tx_timeout         = emac_tx_timeout,
        .ndo_get_stats64        = emac_get_stats64,
        .ndo_set_features       = emac_set_features,
index b64c254..8427fe1 100644 (file)
@@ -434,7 +434,7 @@ qcaspi_receive(struct qcaspi *qca)
                                skb_put(qca->rx_skb, retcode);
                                qca->rx_skb->protocol = eth_type_trans(
                                        qca->rx_skb, qca->rx_skb->dev);
-                               qca->rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
+                               skb_checksum_none_assert(qca->rx_skb);
                                netif_rx_ni(qca->rx_skb);
                                qca->rx_skb = netdev_alloc_skb_ip_align(net_dev,
                                        net_dev->mtu + VLAN_ETH_HLEN);
index bcdeca7..ce3f7ce 100644 (file)
@@ -107,7 +107,7 @@ qca_tty_receive(struct serdev_device *serdev, const unsigned char *data,
                        skb_put(qca->rx_skb, retcode);
                        qca->rx_skb->protocol = eth_type_trans(
                                                qca->rx_skb, qca->rx_skb->dev);
-                       qca->rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       skb_checksum_none_assert(qca->rx_skb);
                        netif_rx_ni(qca->rx_skb);
                        qca->rx_skb = netdev_alloc_skb_ip_align(netdev,
                                                                netdev->mtu +
index 47e9998..4b2eca5 100644 (file)
@@ -954,7 +954,7 @@ static const struct net_device_ops r6040_netdev_ops = {
        .ndo_set_rx_mode        = r6040_multicast_list,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_do_ioctl           = phy_do_ioctl,
+       .ndo_eth_ioctl          = phy_do_ioctl,
        .ndo_tx_timeout         = r6040_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = r6040_poll_controller,
index 9677e25..2b84b45 100644 (file)
@@ -514,7 +514,7 @@ static int cp_rx_poll(struct napi_struct *napi, int budget)
                }
 
                new_mapping = dma_map_single(&cp->pdev->dev, new_skb->data, buflen,
-                                        PCI_DMA_FROMDEVICE);
+                                        DMA_FROM_DEVICE);
                if (dma_mapping_error(&cp->pdev->dev, new_mapping)) {
                        dev->stats.rx_dropped++;
                        kfree_skb(new_skb);
@@ -522,7 +522,7 @@ static int cp_rx_poll(struct napi_struct *napi, int budget)
                }
 
                dma_unmap_single(&cp->pdev->dev, mapping,
-                                buflen, PCI_DMA_FROMDEVICE);
+                                buflen, DMA_FROM_DEVICE);
 
                /* Handle checksum offloading for incoming packets. */
                if (cp_rx_csum_ok(status))
@@ -666,7 +666,7 @@ static void cp_tx (struct cp_private *cp)
 
                dma_unmap_single(&cp->pdev->dev, le64_to_cpu(txd->addr),
                                 cp->tx_opts[tx_tail] & 0xffff,
-                                PCI_DMA_TODEVICE);
+                                DMA_TO_DEVICE);
 
                if (status & LastFrag) {
                        if (status & (TxError | TxFIFOUnder)) {
@@ -724,7 +724,7 @@ static void unwind_tx_frag_mapping(struct cp_private *cp, struct sk_buff *skb,
                txd = &cp->tx_ring[index];
                this_frag = &skb_shinfo(skb)->frags[frag];
                dma_unmap_single(&cp->pdev->dev, le64_to_cpu(txd->addr),
-                                skb_frag_size(this_frag), PCI_DMA_TODEVICE);
+                                skb_frag_size(this_frag), DMA_TO_DEVICE);
        }
 }
 
@@ -781,7 +781,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb,
                dma_addr_t mapping;
 
                len = skb->len;
-               mapping = dma_map_single(&cp->pdev->dev, skb->data, len, PCI_DMA_TODEVICE);
+               mapping = dma_map_single(&cp->pdev->dev, skb->data, len, DMA_TO_DEVICE);
                if (dma_mapping_error(&cp->pdev->dev, mapping))
                        goto out_dma_error;
 
@@ -810,7 +810,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb,
                first_eor = eor;
                first_len = skb_headlen(skb);
                first_mapping = dma_map_single(&cp->pdev->dev, skb->data,
-                                              first_len, PCI_DMA_TODEVICE);
+                                              first_len, DMA_TO_DEVICE);
                if (dma_mapping_error(&cp->pdev->dev, first_mapping))
                        goto out_dma_error;
 
@@ -826,7 +826,7 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb,
                        len = skb_frag_size(this_frag);
                        mapping = dma_map_single(&cp->pdev->dev,
                                                 skb_frag_address(this_frag),
-                                                len, PCI_DMA_TODEVICE);
+                                                len, DMA_TO_DEVICE);
                        if (dma_mapping_error(&cp->pdev->dev, mapping)) {
                                unwind_tx_frag_mapping(cp, skb, first_entry, entry);
                                goto out_dma_error;
@@ -1069,7 +1069,7 @@ static int cp_refill_rx(struct cp_private *cp)
                        goto err_out;
 
                mapping = dma_map_single(&cp->pdev->dev, skb->data,
-                                        cp->rx_buf_sz, PCI_DMA_FROMDEVICE);
+                                        cp->rx_buf_sz, DMA_FROM_DEVICE);
                if (dma_mapping_error(&cp->pdev->dev, mapping)) {
                        kfree_skb(skb);
                        goto err_out;
@@ -1139,7 +1139,7 @@ static void cp_clean_rings (struct cp_private *cp)
                if (cp->rx_skb[i]) {
                        desc = cp->rx_ring + i;
                        dma_unmap_single(&cp->pdev->dev,le64_to_cpu(desc->addr),
-                                        cp->rx_buf_sz, PCI_DMA_FROMDEVICE);
+                                        cp->rx_buf_sz, DMA_FROM_DEVICE);
                        dev_kfree_skb_any(cp->rx_skb[i]);
                }
        }
@@ -1151,7 +1151,7 @@ static void cp_clean_rings (struct cp_private *cp)
                        desc = cp->tx_ring + i;
                        dma_unmap_single(&cp->pdev->dev,le64_to_cpu(desc->addr),
                                         le32_to_cpu(desc->opts1) & 0xffff,
-                                        PCI_DMA_TODEVICE);
+                                        DMA_TO_DEVICE);
                        if (le32_to_cpu(desc->opts1) & LastFrag)
                                dev_kfree_skb_any(skb);
                        cp->dev->stats.tx_dropped++;
@@ -1869,7 +1869,7 @@ static const struct net_device_ops cp_netdev_ops = {
        .ndo_set_mac_address    = cp_set_mac_address,
        .ndo_set_rx_mode        = cp_set_rx_mode,
        .ndo_get_stats          = cp_get_stats,
-       .ndo_do_ioctl           = cp_ioctl,
+       .ndo_eth_ioctl          = cp_ioctl,
        .ndo_start_xmit         = cp_start_xmit,
        .ndo_tx_timeout         = cp_tx_timeout,
        .ndo_set_features       = cp_set_features,
@@ -1945,24 +1945,17 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 
        /* Configure DMA attributes. */
        if ((sizeof(dma_addr_t) > 4) &&
-           !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) &&
-           !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+           !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
                pci_using_dac = 1;
        } else {
                pci_using_dac = 0;
 
-               rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
                if (rc) {
                        dev_err(&pdev->dev,
                                "No usable DMA configuration, aborting\n");
                        goto err_out_res;
                }
-               rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-               if (rc) {
-                       dev_err(&pdev->dev,
-                               "No usable consistent DMA configuration, aborting\n");
-                       goto err_out_res;
-               }
        }
 
        cp->cpcmd = (pci_using_dac ? PCIDAC : 0) |
index f0608f0..2e6923c 100644 (file)
@@ -932,7 +932,7 @@ static const struct net_device_ops rtl8139_netdev_ops = {
        .ndo_set_mac_address    = rtl8139_set_mac_address,
        .ndo_start_xmit         = rtl8139_start_xmit,
        .ndo_set_rx_mode        = rtl8139_set_rx_mode,
-       .ndo_do_ioctl           = netdev_ioctl,
+       .ndo_eth_ioctl          = netdev_ioctl,
        .ndo_tx_timeout         = rtl8139_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = rtl8139_poll_controller,
index 4d8e337..46a6ff9 100644 (file)
@@ -1749,7 +1749,10 @@ rtl_coalesce_info(struct rtl8169_private *tp)
        return ERR_PTR(-ELNRNG);
 }
 
-static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int rtl_get_coalesce(struct net_device *dev,
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
        const struct rtl_coalesce_info *ci;
@@ -1807,7 +1810,10 @@ static int rtl_coalesce_choose_scale(struct rtl8169_private *tp, u32 usec,
        return -ERANGE;
 }
 
-static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+static int rtl_set_coalesce(struct net_device *dev,
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct rtl8169_private *tp = netdev_priv(dev);
        u32 tx_fr = ec->tx_max_coalesced_frames;
@@ -2598,7 +2604,7 @@ static u32 rtl_csi_read(struct rtl8169_private *tp, int addr)
                RTL_R32(tp, CSIDR) : ~0;
 }
 
-static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val)
+static void rtl_set_aspm_entry_latency(struct rtl8169_private *tp, u8 val)
 {
        struct pci_dev *pdev = tp->pci_dev;
        u32 csi;
@@ -2606,6 +2612,8 @@ static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val)
        /* According to Realtek the value at config space address 0x070f
         * controls the L0s/L1 entrance latency. We try standard ECAM access
         * first and if it fails fall back to CSI.
+        * bit 0..2: L0: 0 = 1us, 1 = 2us .. 6 = 7us, 7 = 7us (no typo)
+        * bit 3..5: L1: 0 = 1us, 1 = 2us .. 6 = 64us, 7 = 64us
         */
        if (pdev->cfg_size > 0x070f &&
            pci_write_config_byte(pdev, 0x070f, val) == PCIBIOS_SUCCESSFUL)
@@ -2619,7 +2627,8 @@ static void rtl_csi_access_enable(struct rtl8169_private *tp, u8 val)
 
 static void rtl_set_def_aspm_entry_latency(struct rtl8169_private *tp)
 {
-       rtl_csi_access_enable(tp, 0x27);
+       /* L0 7us, L1 16us */
+       rtl_set_aspm_entry_latency(tp, 0x27);
 }
 
 struct ephy_info {
@@ -2660,6 +2669,34 @@ static void rtl_pcie_state_l2l3_disable(struct rtl8169_private *tp)
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Rdy_to_L23);
 }
 
+static void rtl_enable_exit_l1(struct rtl8169_private *tp)
+{
+       /* Bits control which events trigger ASPM L1 exit:
+        * Bit 12: rxdv
+        * Bit 11: ltr_msg
+        * Bit 10: txdma_poll
+        * Bit  9: xadm
+        * Bit  8: pktavi
+        * Bit  7: txpla
+        */
+       switch (tp->mac_version) {
+       case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_36:
+               rtl_eri_set_bits(tp, 0xd4, 0x1f00);
+               break;
+       case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_38:
+               rtl_eri_set_bits(tp, 0xd4, 0x0c00);
+               break;
+       case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53:
+               rtl_eri_set_bits(tp, 0xd4, 0x1f80);
+               break;
+       case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63:
+               r8168_mac_ocp_modify(tp, 0xc0ac, 0, 0x1f80);
+               break;
+       default:
+               break;
+       }
+}
+
 static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
 {
        /* Don't enable ASPM in the chip if OS can't control ASPM */
@@ -2848,7 +2885,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_1111, 0x0000);
        rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
-       rtl_eri_set_bits(tp, 0x0d4, 0x1f00);
        rtl_eri_set_bits(tp, 0x1d0, BIT(1));
        rtl_reset_packet_filter(tp);
        rtl_eri_set_bits(tp, 0x1b0, BIT(4));
@@ -2905,8 +2941,6 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
        rtl_hw_start_8168f(tp);
 
        rtl_ephy_init(tp, e_info_8168f_1);
-
-       rtl_eri_set_bits(tp, 0x0d4, 0x1f00);
 }
 
 static void rtl_hw_start_8411(struct rtl8169_private *tp)
@@ -2923,8 +2957,6 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
        rtl_pcie_state_l2l3_disable(tp);
 
        rtl_ephy_init(tp, e_info_8168f_1);
-
-       rtl_eri_set_bits(tp, 0x0d4, 0x0c00);
 }
 
 static void rtl_hw_start_8168g(struct rtl8169_private *tp)
@@ -2941,7 +2973,6 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
        rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
        rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
-       rtl_eri_set_bits(tp, 0x0d4, 0x1f80);
 
        rtl8168_config_eee_mac(tp);
 
@@ -3172,7 +3203,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
        rtl_reset_packet_filter(tp);
 
-       rtl_eri_set_bits(tp, 0xd4, 0x1f00);
        rtl_eri_set_bits(tp, 0xdc, 0x001c);
 
        rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
@@ -3226,8 +3256,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
        rtl_reset_packet_filter(tp);
 
-       rtl_eri_set_bits(tp, 0xd4, 0x1f80);
-
        rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
 
        RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN);
@@ -3329,7 +3357,7 @@ static void rtl_hw_start_8117(struct rtl8169_private *tp)
 
        rtl_reset_packet_filter(tp);
 
-       rtl_eri_set_bits(tp, 0xd4, 0x1f90);
+       rtl_eri_set_bits(tp, 0xd4, 0x0010);
 
        rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
 
@@ -3502,8 +3530,8 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
        RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
        RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
 
-       /* The default value is 0x13. Change it to 0x2f */
-       rtl_csi_access_enable(tp, 0x2f);
+       /* L0 7us, L1 32us - needed to avoid issues with link-up detection */
+       rtl_set_aspm_entry_latency(tp, 0x2f);
 
        rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000);
 
@@ -3560,7 +3588,6 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
        r8168_mac_ocp_modify(tp, 0xea1c, 0x0003, 0x0001);
        r8168_mac_ocp_modify(tp, 0xe0c0, 0x4f0f, 0x4403);
        r8168_mac_ocp_modify(tp, 0xe052, 0x0080, 0x0068);
-       r8168_mac_ocp_modify(tp, 0xc0ac, 0x0080, 0x1f00);
        r8168_mac_ocp_modify(tp, 0xd430, 0x0fff, 0x047f);
 
        r8168_mac_ocp_modify(tp, 0xea1c, 0x0004, 0x0000);
@@ -3783,6 +3810,7 @@ static void rtl_hw_start(struct  rtl8169_private *tp)
        else
                rtl_hw_start_8168(tp);
 
+       rtl_enable_exit_l1(tp);
        rtl_set_rx_max_size(tp);
        rtl_set_rx_tx_desc_registers(tp);
        rtl_lock_config_regs(tp);
@@ -4983,7 +5011,7 @@ static const struct net_device_ops rtl_netdev_ops = {
        .ndo_fix_features       = rtl8169_fix_features,
        .ndo_set_features       = rtl8169_set_features,
        .ndo_set_mac_address    = rtl_set_mac_address,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_set_rx_mode        = rtl_set_rx_mode,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = rtl8169_netpoll,
@@ -5278,11 +5306,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (rc)
                return rc;
 
-       /* Disable ASPM completely as that cause random device stop working
+       /* Disable ASPM L1 as that cause random device stop working
         * problems as well as full system hangs for some PCIe devices users.
         */
-       rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S |
-                                         PCIE_LINK_STATE_L1);
+       rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
        tp->aspm_manageable = !rc;
 
        /* enable device (incl. PCI PM wakeup and hotplug setup) */
index 5a2a4af..8008b2f 100644 (file)
@@ -32,11 +32,11 @@ config SH_ETH
 config RAVB
        tristate "Renesas Ethernet AVB support"
        depends on ARCH_RENESAS || COMPILE_TEST
+       depends on PTP_1588_CLOCK_OPTIONAL
        select CRC32
        select MII
        select MDIO_BITBANG
        select PHYLIB
-       imply PTP_1588_CLOCK
        help
          Renesas Ethernet AVB device driver.
          This driver supports the following SoCs:
index 80e62ca..47c5377 100644 (file)
@@ -956,10 +956,6 @@ enum RAVB_QUEUE {
 
 #define RX_BUF_SZ      (2048 - ETH_FCS_LEN + sizeof(__sum16))
 
-/* TX descriptors per packet */
-#define NUM_TX_DESC_GEN2       2
-#define NUM_TX_DESC_GEN3       1
-
 struct ravb_tstamp_skb {
        struct list_head list;
        struct sk_buff *skb;
@@ -983,9 +979,29 @@ struct ravb_ptp {
        struct ravb_ptp_perout perout[N_PER_OUT];
 };
 
-enum ravb_chip_id {
-       RCAR_GEN2,
-       RCAR_GEN3,
+struct ravb_hw_info {
+       void (*rx_ring_free)(struct net_device *ndev, int q);
+       void (*rx_ring_format)(struct net_device *ndev, int q);
+       void *(*alloc_rx_desc)(struct net_device *ndev, int q);
+       bool (*receive)(struct net_device *ndev, int *quota, int q);
+       void (*set_rate)(struct net_device *ndev);
+       int (*set_rx_csum_feature)(struct net_device *ndev, netdev_features_t features);
+       void (*dmac_init)(struct net_device *ndev);
+       void (*emac_init)(struct net_device *ndev);
+       const char (*gstrings_stats)[ETH_GSTRING_LEN];
+       size_t gstrings_size;
+       netdev_features_t net_hw_features;
+       netdev_features_t net_features;
+       int stats_len;
+       size_t max_rx_len;
+       unsigned aligned_tx: 1;
+
+       /* hardware features */
+       unsigned internal_delay:1;      /* AVB-DMAC has internal delays */
+       unsigned tx_counters:1;         /* E-MAC has TX counters */
+       unsigned multi_irqs:1;          /* AVB-DMAC and E-MAC has multiple irqs */
+       unsigned no_ptp_cfg_active:1;   /* AVB-DMAC does not support gPTP active in config mode */
+       unsigned ptp_cfg_active:1;      /* AVB-DMAC has gPTP support active in config mode */
 };
 
 struct ravb_private {
@@ -1029,7 +1045,6 @@ struct ravb_private {
        int msg_enable;
        int speed;
        int emac_irq;
-       enum ravb_chip_id chip_id;
        int rx_irqs[NUM_RX_QUEUE];
        int tx_irqs[NUM_TX_QUEUE];
 
@@ -1039,7 +1054,10 @@ struct ravb_private {
        unsigned rxcidm:1;              /* RX Clock Internal Delay Mode */
        unsigned txcidm:1;              /* TX Clock Internal Delay Mode */
        unsigned rgmii_override:1;      /* Deprecated rgmii-*id behavior */
-       int num_tx_desc;                /* TX descriptors per packet */
+       unsigned int num_tx_desc;       /* TX descriptors per packet */
+
+       const struct ravb_hw_info *info;
+       struct reset_control *rstc;
 };
 
 static inline u32 ravb_read(struct net_device *ndev, enum ravb_reg reg)
index 8053970..0f85f2d 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/sys_soc.h>
+#include <linux/reset.h>
 
 #include <asm/div64.h>
 
@@ -177,10 +178,10 @@ static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only)
 {
        struct ravb_private *priv = netdev_priv(ndev);
        struct net_device_stats *stats = &priv->stats[q];
-       int num_tx_desc = priv->num_tx_desc;
+       unsigned int num_tx_desc = priv->num_tx_desc;
        struct ravb_tx_desc *desc;
+       unsigned int entry;
        int free_num = 0;
-       int entry;
        u32 size;
 
        for (; priv->cur_tx[q] - priv->dirty_tx[q] > 0; priv->dirty_tx[q]++) {
@@ -216,31 +217,42 @@ static int ravb_tx_free(struct net_device *ndev, int q, bool free_txed_only)
        return free_num;
 }
 
-/* Free skb's and DMA buffers for Ethernet AVB */
-static void ravb_ring_free(struct net_device *ndev, int q)
+static void ravb_rx_ring_free(struct net_device *ndev, int q)
 {
        struct ravb_private *priv = netdev_priv(ndev);
-       int num_tx_desc = priv->num_tx_desc;
-       int ring_size;
-       int i;
+       unsigned int ring_size;
+       unsigned int i;
+
+       if (!priv->rx_ring[q])
+               return;
 
-       if (priv->rx_ring[q]) {
-               for (i = 0; i < priv->num_rx_ring[q]; i++) {
-                       struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i];
+       for (i = 0; i < priv->num_rx_ring[q]; i++) {
+               struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i];
 
-                       if (!dma_mapping_error(ndev->dev.parent,
-                                              le32_to_cpu(desc->dptr)))
-                               dma_unmap_single(ndev->dev.parent,
-                                                le32_to_cpu(desc->dptr),
-                                                RX_BUF_SZ,
-                                                DMA_FROM_DEVICE);
-               }
-               ring_size = sizeof(struct ravb_ex_rx_desc) *
-                           (priv->num_rx_ring[q] + 1);
-               dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q],
-                                 priv->rx_desc_dma[q]);
-               priv->rx_ring[q] = NULL;
+               if (!dma_mapping_error(ndev->dev.parent,
+                                      le32_to_cpu(desc->dptr)))
+                       dma_unmap_single(ndev->dev.parent,
+                                        le32_to_cpu(desc->dptr),
+                                        RX_BUF_SZ,
+                                        DMA_FROM_DEVICE);
        }
+       ring_size = sizeof(struct ravb_ex_rx_desc) *
+                   (priv->num_rx_ring[q] + 1);
+       dma_free_coherent(ndev->dev.parent, ring_size, priv->rx_ring[q],
+                         priv->rx_desc_dma[q]);
+       priv->rx_ring[q] = NULL;
+}
+
+/* Free skb's and DMA buffers for Ethernet AVB */
+static void ravb_ring_free(struct net_device *ndev, int q)
+{
+       struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
+       unsigned int num_tx_desc = priv->num_tx_desc;
+       unsigned int ring_size;
+       unsigned int i;
+
+       info->rx_ring_free(ndev, q);
 
        if (priv->tx_ring[q]) {
                ravb_tx_free(ndev, q, false);
@@ -271,24 +283,13 @@ static void ravb_ring_free(struct net_device *ndev, int q)
        priv->tx_skb[q] = NULL;
 }
 
-/* Format skb and descriptor buffer for Ethernet AVB */
-static void ravb_ring_format(struct net_device *ndev, int q)
+static void ravb_rx_ring_format(struct net_device *ndev, int q)
 {
        struct ravb_private *priv = netdev_priv(ndev);
-       int num_tx_desc = priv->num_tx_desc;
        struct ravb_ex_rx_desc *rx_desc;
-       struct ravb_tx_desc *tx_desc;
-       struct ravb_desc *desc;
-       int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q];
-       int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q] *
-                          num_tx_desc;
+       unsigned int rx_ring_size = sizeof(*rx_desc) * priv->num_rx_ring[q];
        dma_addr_t dma_addr;
-       int i;
-
-       priv->cur_rx[q] = 0;
-       priv->cur_tx[q] = 0;
-       priv->dirty_rx[q] = 0;
-       priv->dirty_tx[q] = 0;
+       unsigned int i;
 
        memset(priv->rx_ring[q], 0, rx_ring_size);
        /* Build RX ring buffer */
@@ -310,6 +311,26 @@ static void ravb_ring_format(struct net_device *ndev, int q)
        rx_desc = &priv->rx_ring[q][i];
        rx_desc->dptr = cpu_to_le32((u32)priv->rx_desc_dma[q]);
        rx_desc->die_dt = DT_LINKFIX; /* type */
+}
+
+/* Format skb and descriptor buffer for Ethernet AVB */
+static void ravb_ring_format(struct net_device *ndev, int q)
+{
+       struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
+       unsigned int num_tx_desc = priv->num_tx_desc;
+       struct ravb_tx_desc *tx_desc;
+       struct ravb_desc *desc;
+       unsigned int tx_ring_size = sizeof(*tx_desc) * priv->num_tx_ring[q] *
+                                   num_tx_desc;
+       unsigned int i;
+
+       priv->cur_rx[q] = 0;
+       priv->cur_tx[q] = 0;
+       priv->dirty_rx[q] = 0;
+       priv->dirty_tx[q] = 0;
+
+       info->rx_ring_format(ndev, q);
 
        memset(priv->tx_ring[q], 0, tx_ring_size);
        /* Build TX ring buffer */
@@ -335,14 +356,28 @@ static void ravb_ring_format(struct net_device *ndev, int q)
        desc->dptr = cpu_to_le32((u32)priv->tx_desc_dma[q]);
 }
 
+static void *ravb_alloc_rx_desc(struct net_device *ndev, int q)
+{
+       struct ravb_private *priv = netdev_priv(ndev);
+       unsigned int ring_size;
+
+       ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1);
+
+       priv->rx_ring[q] = dma_alloc_coherent(ndev->dev.parent, ring_size,
+                                             &priv->rx_desc_dma[q],
+                                             GFP_KERNEL);
+       return priv->rx_ring[q];
+}
+
 /* Init skb and descriptor buffer for Ethernet AVB */
 static int ravb_ring_init(struct net_device *ndev, int q)
 {
        struct ravb_private *priv = netdev_priv(ndev);
-       int num_tx_desc = priv->num_tx_desc;
+       const struct ravb_hw_info *info = priv->info;
+       unsigned int num_tx_desc = priv->num_tx_desc;
+       unsigned int ring_size;
        struct sk_buff *skb;
-       int ring_size;
-       int i;
+       unsigned int i;
 
        /* Allocate RX and TX skb rings */
        priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
@@ -353,7 +388,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
                goto error;
 
        for (i = 0; i < priv->num_rx_ring[q]; i++) {
-               skb = netdev_alloc_skb(ndev, RX_BUF_SZ + RAVB_ALIGN - 1);
+               skb = netdev_alloc_skb(ndev, info->max_rx_len);
                if (!skb)
                        goto error;
                ravb_set_buffer_align(skb);
@@ -369,11 +404,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
        }
 
        /* Allocate all RX descriptors. */
-       ring_size = sizeof(struct ravb_ex_rx_desc) * (priv->num_rx_ring[q] + 1);
-       priv->rx_ring[q] = dma_alloc_coherent(ndev->dev.parent, ring_size,
-                                             &priv->rx_desc_dma[q],
-                                             GFP_KERNEL);
-       if (!priv->rx_ring[q])
+       if (!info->alloc_rx_desc(ndev, q))
                goto error;
 
        priv->dirty_rx[q] = 0;
@@ -395,8 +426,7 @@ error:
        return -ENOMEM;
 }
 
-/* E-MAC init function */
-static void ravb_emac_init(struct net_device *ndev)
+static void ravb_rcar_emac_init(struct net_device *ndev)
 {
        /* Receive frame limit set register */
        ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR);
@@ -422,29 +452,19 @@ static void ravb_emac_init(struct net_device *ndev)
        ravb_write(ndev, ECSIPR_ICDIP | ECSIPR_MPDIP | ECSIPR_LCHNGIP, ECSIPR);
 }
 
-/* Device init function for Ethernet AVB */
-static int ravb_dmac_init(struct net_device *ndev)
+/* E-MAC init function */
+static void ravb_emac_init(struct net_device *ndev)
 {
        struct ravb_private *priv = netdev_priv(ndev);
-       int error;
+       const struct ravb_hw_info *info = priv->info;
 
-       /* Set CONFIG mode */
-       error = ravb_config(ndev);
-       if (error)
-               return error;
-
-       error = ravb_ring_init(ndev, RAVB_BE);
-       if (error)
-               return error;
-       error = ravb_ring_init(ndev, RAVB_NC);
-       if (error) {
-               ravb_ring_free(ndev, RAVB_BE);
-               return error;
-       }
+       info->emac_init(ndev);
+}
 
-       /* Descriptor format */
-       ravb_ring_format(ndev, RAVB_BE);
-       ravb_ring_format(ndev, RAVB_NC);
+static void ravb_rcar_dmac_init(struct net_device *ndev)
+{
+       struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
 
        /* Set AVB RX */
        ravb_write(ndev,
@@ -457,7 +477,7 @@ static int ravb_dmac_init(struct net_device *ndev)
        ravb_write(ndev, TCCR_TFEN, TCCR);
 
        /* Interrupt init: */
-       if (priv->chip_id == RCAR_GEN3) {
+       if (info->multi_irqs) {
                /* Clear DIL.DPLx */
                ravb_write(ndev, 0, DIL);
                /* Set queue specific interrupt */
@@ -471,6 +491,34 @@ static int ravb_dmac_init(struct net_device *ndev)
        ravb_write(ndev, RIC2_QFE0 | RIC2_QFE1 | RIC2_RFFE, RIC2);
        /* Frame transmitted, timestamp FIFO updated */
        ravb_write(ndev, TIC_FTE0 | TIC_FTE1 | TIC_TFUE, TIC);
+}
+
+/* Device init function for Ethernet AVB */
+static int ravb_dmac_init(struct net_device *ndev)
+{
+       struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
+       int error;
+
+       /* Set CONFIG mode */
+       error = ravb_config(ndev);
+       if (error)
+               return error;
+
+       error = ravb_ring_init(ndev, RAVB_BE);
+       if (error)
+               return error;
+       error = ravb_ring_init(ndev, RAVB_NC);
+       if (error) {
+               ravb_ring_free(ndev, RAVB_BE);
+               return error;
+       }
+
+       /* Descriptor format */
+       ravb_ring_format(ndev, RAVB_BE);
+       ravb_ring_format(ndev, RAVB_NC);
+
+       info->dmac_init(ndev);
 
        /* Setting the control will start the AVB-DMAC process. */
        ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_OPERATION);
@@ -531,10 +579,10 @@ static void ravb_rx_csum(struct sk_buff *skb)
        skb_trim(skb, skb->len - sizeof(__sum16));
 }
 
-/* Packet receive function for Ethernet AVB */
-static bool ravb_rx(struct net_device *ndev, int *quota, int q)
+static bool ravb_rcar_rx(struct net_device *ndev, int *quota, int q)
 {
        struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
        int entry = priv->cur_rx[q] % priv->num_rx_ring[q];
        int boguscnt = (priv->dirty_rx[q] + priv->num_rx_ring[q]) -
                        priv->cur_rx[q];
@@ -619,9 +667,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
                desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
 
                if (!priv->rx_skb[q][entry]) {
-                       skb = netdev_alloc_skb(ndev,
-                                              RX_BUF_SZ +
-                                              RAVB_ALIGN - 1);
+                       skb = netdev_alloc_skb(ndev, info->max_rx_len);
                        if (!skb)
                                break;  /* Better luck next round. */
                        ravb_set_buffer_align(skb);
@@ -647,6 +693,15 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
        return boguscnt <= 0;
 }
 
+/* Packet receive function for Ethernet AVB */
+static bool ravb_rx(struct net_device *ndev, int *quota, int q)
+{
+       struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
+
+       return info->receive(ndev, quota, q);
+}
+
 static void ravb_rcv_snd_disable(struct net_device *ndev)
 {
        /* Disable TX and RX */
@@ -758,6 +813,7 @@ static void ravb_error_interrupt(struct net_device *ndev)
 static bool ravb_queue_interrupt(struct net_device *ndev, int q)
 {
        struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
        u32 ris0 = ravb_read(ndev, RIS0);
        u32 ric0 = ravb_read(ndev, RIC0);
        u32 tis  = ravb_read(ndev, TIS);
@@ -766,7 +822,7 @@ static bool ravb_queue_interrupt(struct net_device *ndev, int q)
        if (((ris0 & ric0) & BIT(q)) || ((tis  & tic)  & BIT(q))) {
                if (napi_schedule_prep(&priv->napi[q])) {
                        /* Mask RX and TX interrupts */
-                       if (priv->chip_id == RCAR_GEN2) {
+                       if (!info->multi_irqs) {
                                ravb_write(ndev, ric0 & ~BIT(q), RIC0);
                                ravb_write(ndev, tic & ~BIT(q), TIC);
                        } else {
@@ -909,6 +965,7 @@ static int ravb_poll(struct napi_struct *napi, int budget)
 {
        struct net_device *ndev = napi->dev;
        struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
        unsigned long flags;
        int q = napi - priv->napi;
        int mask = BIT(q);
@@ -932,7 +989,7 @@ static int ravb_poll(struct napi_struct *napi, int budget)
 
        /* Re-enable RX/TX interrupts */
        spin_lock_irqsave(&priv->lock, flags);
-       if (priv->chip_id == RCAR_GEN2) {
+       if (!info->multi_irqs) {
                ravb_modify(ndev, RIC0, mask, mask);
                ravb_modify(ndev, TIC,  mask, mask);
        } else {
@@ -956,6 +1013,7 @@ out:
 static void ravb_adjust_link(struct net_device *ndev)
 {
        struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
        struct phy_device *phydev = ndev->phydev;
        bool new_state = false;
        unsigned long flags;
@@ -970,7 +1028,7 @@ static void ravb_adjust_link(struct net_device *ndev)
                if (phydev->speed != priv->speed) {
                        new_state = true;
                        priv->speed = phydev->speed;
-                       ravb_set_rate(ndev);
+                       info->set_rate(ndev);
                }
                if (!priv->link) {
                        ravb_modify(ndev, ECMR, ECMR_TXF, 0);
@@ -1133,13 +1191,14 @@ static const char ravb_gstrings_stats[][ETH_GSTRING_LEN] = {
        "rx_queue_1_over_errors",
 };
 
-#define RAVB_STATS_LEN ARRAY_SIZE(ravb_gstrings_stats)
-
 static int ravb_get_sset_count(struct net_device *netdev, int sset)
 {
+       struct ravb_private *priv = netdev_priv(netdev);
+       const struct ravb_hw_info *info = priv->info;
+
        switch (sset) {
        case ETH_SS_STATS:
-               return RAVB_STATS_LEN;
+               return info->stats_len;
        default:
                return -EOPNOTSUPP;
        }
@@ -1176,9 +1235,12 @@ static void ravb_get_ethtool_stats(struct net_device *ndev,
 
 static void ravb_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
 {
+       struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
+
        switch (stringset) {
        case ETH_SS_STATS:
-               memcpy(data, ravb_gstrings_stats, sizeof(ravb_gstrings_stats));
+               memcpy(data, info->gstrings_stats, info->gstrings_size);
                break;
        }
 }
@@ -1198,6 +1260,7 @@ static int ravb_set_ringparam(struct net_device *ndev,
                              struct ethtool_ringparam *ring)
 {
        struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
        int error;
 
        if (ring->tx_pending > BE_TX_RING_MAX ||
@@ -1211,7 +1274,7 @@ static int ravb_set_ringparam(struct net_device *ndev,
        if (netif_running(ndev)) {
                netif_device_detach(ndev);
                /* Stop PTP Clock driver */
-               if (priv->chip_id == RCAR_GEN2)
+               if (info->no_ptp_cfg_active)
                        ravb_ptp_stop(ndev);
                /* Wait for DMA stopping */
                error = ravb_stop_dma(ndev);
@@ -1243,7 +1306,7 @@ static int ravb_set_ringparam(struct net_device *ndev,
                ravb_emac_init(ndev);
 
                /* Initialise PTP Clock driver */
-               if (priv->chip_id == RCAR_GEN2)
+               if (info->no_ptp_cfg_active)
                        ravb_ptp_init(ndev, priv->pdev);
 
                netif_device_attach(ndev);
@@ -1334,6 +1397,7 @@ static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler,
 static int ravb_open(struct net_device *ndev)
 {
        struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
        struct platform_device *pdev = priv->pdev;
        struct device *dev = &pdev->dev;
        int error;
@@ -1341,7 +1405,7 @@ static int ravb_open(struct net_device *ndev)
        napi_enable(&priv->napi[RAVB_BE]);
        napi_enable(&priv->napi[RAVB_NC]);
 
-       if (priv->chip_id == RCAR_GEN2) {
+       if (!info->multi_irqs) {
                error = request_irq(ndev->irq, ravb_interrupt, IRQF_SHARED,
                                    ndev->name, ndev);
                if (error) {
@@ -1382,7 +1446,7 @@ static int ravb_open(struct net_device *ndev)
        ravb_emac_init(ndev);
 
        /* Initialise PTP Clock driver */
-       if (priv->chip_id == RCAR_GEN2)
+       if (info->no_ptp_cfg_active)
                ravb_ptp_init(ndev, priv->pdev);
 
        netif_tx_start_all_queues(ndev);
@@ -1396,10 +1460,10 @@ static int ravb_open(struct net_device *ndev)
 
 out_ptp_stop:
        /* Stop PTP Clock driver */
-       if (priv->chip_id == RCAR_GEN2)
+       if (info->no_ptp_cfg_active)
                ravb_ptp_stop(ndev);
 out_free_irq_nc_tx:
-       if (priv->chip_id == RCAR_GEN2)
+       if (!info->multi_irqs)
                goto out_free_irq;
        free_irq(priv->tx_irqs[RAVB_NC], ndev);
 out_free_irq_nc_rx:
@@ -1437,13 +1501,14 @@ static void ravb_tx_timeout_work(struct work_struct *work)
 {
        struct ravb_private *priv = container_of(work, struct ravb_private,
                                                 work);
+       const struct ravb_hw_info *info = priv->info;
        struct net_device *ndev = priv->ndev;
        int error;
 
        netif_tx_stop_all_queues(ndev);
 
        /* Stop PTP Clock driver */
-       if (priv->chip_id == RCAR_GEN2)
+       if (info->no_ptp_cfg_active)
                ravb_ptp_stop(ndev);
 
        /* Wait for DMA stopping */
@@ -1478,7 +1543,7 @@ static void ravb_tx_timeout_work(struct work_struct *work)
 
 out:
        /* Initialise PTP Clock driver */
-       if (priv->chip_id == RCAR_GEN2)
+       if (info->no_ptp_cfg_active)
                ravb_ptp_init(ndev, priv->pdev);
 
        netif_tx_start_all_queues(ndev);
@@ -1488,7 +1553,7 @@ out:
 static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
        struct ravb_private *priv = netdev_priv(ndev);
-       int num_tx_desc = priv->num_tx_desc;
+       unsigned int num_tx_desc = priv->num_tx_desc;
        u16 q = skb_get_queue_mapping(skb);
        struct ravb_tstamp_skb *ts_skb;
        struct ravb_tx_desc *desc;
@@ -1628,13 +1693,14 @@ static u16 ravb_select_queue(struct net_device *ndev, struct sk_buff *skb,
 static struct net_device_stats *ravb_get_stats(struct net_device *ndev)
 {
        struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
        struct net_device_stats *nstats, *stats0, *stats1;
 
        nstats = &ndev->stats;
        stats0 = &priv->stats[RAVB_BE];
        stats1 = &priv->stats[RAVB_NC];
 
-       if (priv->chip_id == RCAR_GEN3) {
+       if (info->tx_counters) {
                nstats->tx_dropped += ravb_read(ndev, TROCR);
                ravb_write(ndev, 0, TROCR);     /* (write clear) */
        }
@@ -1675,6 +1741,7 @@ static int ravb_close(struct net_device *ndev)
 {
        struct device_node *np = ndev->dev.parent->of_node;
        struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
        struct ravb_tstamp_skb *ts_skb, *ts_skb2;
 
        netif_tx_stop_all_queues(ndev);
@@ -1685,7 +1752,7 @@ static int ravb_close(struct net_device *ndev)
        ravb_write(ndev, 0, TIC);
 
        /* Stop PTP Clock driver */
-       if (priv->chip_id == RCAR_GEN2)
+       if (info->no_ptp_cfg_active)
                ravb_ptp_stop(ndev);
 
        /* Set the config mode to stop the AVB-DMAC's processes */
@@ -1708,7 +1775,7 @@ static int ravb_close(struct net_device *ndev)
                        of_phy_deregister_fixed_link(np);
        }
 
-       if (priv->chip_id != RCAR_GEN2) {
+       if (info->multi_irqs) {
                free_irq(priv->tx_irqs[RAVB_NC], ndev);
                free_irq(priv->rx_irqs[RAVB_NC], ndev);
                free_irq(priv->tx_irqs[RAVB_BE], ndev);
@@ -1851,8 +1918,8 @@ static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
        spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static int ravb_set_features(struct net_device *ndev,
-                            netdev_features_t features)
+static int ravb_set_features_rx_csum(struct net_device *ndev,
+                                    netdev_features_t features)
 {
        netdev_features_t changed = ndev->features ^ features;
 
@@ -1864,6 +1931,15 @@ static int ravb_set_features(struct net_device *ndev,
        return 0;
 }
 
+static int ravb_set_features(struct net_device *ndev,
+                            netdev_features_t features)
+{
+       struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
+
+       return info->set_rx_csum_feature(ndev, features);
+}
+
 static const struct net_device_ops ravb_netdev_ops = {
        .ndo_open               = ravb_open,
        .ndo_stop               = ravb_close,
@@ -1872,7 +1948,7 @@ static const struct net_device_ops ravb_netdev_ops = {
        .ndo_get_stats          = ravb_get_stats,
        .ndo_set_rx_mode        = ravb_set_rx_mode,
        .ndo_tx_timeout         = ravb_tx_timeout,
-       .ndo_do_ioctl           = ravb_do_ioctl,
+       .ndo_eth_ioctl          = ravb_do_ioctl,
        .ndo_change_mtu         = ravb_change_mtu,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
@@ -1924,12 +2000,52 @@ static int ravb_mdio_release(struct ravb_private *priv)
        return 0;
 }
 
+static const struct ravb_hw_info ravb_gen3_hw_info = {
+       .rx_ring_free = ravb_rx_ring_free,
+       .rx_ring_format = ravb_rx_ring_format,
+       .alloc_rx_desc = ravb_alloc_rx_desc,
+       .receive = ravb_rcar_rx,
+       .set_rate = ravb_set_rate,
+       .set_rx_csum_feature = ravb_set_features_rx_csum,
+       .dmac_init = ravb_rcar_dmac_init,
+       .emac_init = ravb_rcar_emac_init,
+       .gstrings_stats = ravb_gstrings_stats,
+       .gstrings_size = sizeof(ravb_gstrings_stats),
+       .net_hw_features = NETIF_F_RXCSUM,
+       .net_features = NETIF_F_RXCSUM,
+       .stats_len = ARRAY_SIZE(ravb_gstrings_stats),
+       .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1,
+       .internal_delay = 1,
+       .tx_counters = 1,
+       .multi_irqs = 1,
+       .ptp_cfg_active = 1,
+};
+
+static const struct ravb_hw_info ravb_gen2_hw_info = {
+       .rx_ring_free = ravb_rx_ring_free,
+       .rx_ring_format = ravb_rx_ring_format,
+       .alloc_rx_desc = ravb_alloc_rx_desc,
+       .receive = ravb_rcar_rx,
+       .set_rate = ravb_set_rate,
+       .set_rx_csum_feature = ravb_set_features_rx_csum,
+       .dmac_init = ravb_rcar_dmac_init,
+       .emac_init = ravb_rcar_emac_init,
+       .gstrings_stats = ravb_gstrings_stats,
+       .gstrings_size = sizeof(ravb_gstrings_stats),
+       .net_hw_features = NETIF_F_RXCSUM,
+       .net_features = NETIF_F_RXCSUM,
+       .stats_len = ARRAY_SIZE(ravb_gstrings_stats),
+       .max_rx_len = RX_BUF_SZ + RAVB_ALIGN - 1,
+       .aligned_tx = 1,
+       .no_ptp_cfg_active = 1,
+};
+
 static const struct of_device_id ravb_match_table[] = {
-       { .compatible = "renesas,etheravb-r8a7790", .data = (void *)RCAR_GEN2 },
-       { .compatible = "renesas,etheravb-r8a7794", .data = (void *)RCAR_GEN2 },
-       { .compatible = "renesas,etheravb-rcar-gen2", .data = (void *)RCAR_GEN2 },
-       { .compatible = "renesas,etheravb-r8a7795", .data = (void *)RCAR_GEN3 },
-       { .compatible = "renesas,etheravb-rcar-gen3", .data = (void *)RCAR_GEN3 },
+       { .compatible = "renesas,etheravb-r8a7790", .data = &ravb_gen2_hw_info },
+       { .compatible = "renesas,etheravb-r8a7794", .data = &ravb_gen2_hw_info },
+       { .compatible = "renesas,etheravb-rcar-gen2", .data = &ravb_gen2_hw_info },
+       { .compatible = "renesas,etheravb-r8a7795", .data = &ravb_gen3_hw_info },
+       { .compatible = "renesas,etheravb-rcar-gen3", .data = &ravb_gen3_hw_info },
        { }
 };
 MODULE_DEVICE_TABLE(of, ravb_match_table);
@@ -1962,8 +2078,9 @@ static int ravb_set_gti(struct net_device *ndev)
 static void ravb_set_config_mode(struct net_device *ndev)
 {
        struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
 
-       if (priv->chip_id == RCAR_GEN2) {
+       if (info->no_ptp_cfg_active) {
                ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
                /* Set CSEL value */
                ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
@@ -1973,13 +2090,6 @@ static void ravb_set_config_mode(struct net_device *ndev)
        }
 }
 
-static const struct soc_device_attribute ravb_delay_mode_quirk_match[] = {
-       { .soc_id = "r8a774c0" },
-       { .soc_id = "r8a77990" },
-       { .soc_id = "r8a77995" },
-       { /* sentinel */ }
-};
-
 /* Set tx and rx clock internal delay modes */
 static void ravb_parse_delay_mode(struct device_node *np, struct net_device *ndev)
 {
@@ -2010,12 +2120,8 @@ static void ravb_parse_delay_mode(struct device_node *np, struct net_device *nde
 
        if (priv->phy_interface == PHY_INTERFACE_MODE_RGMII_ID ||
            priv->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) {
-               if (!WARN(soc_device_match(ravb_delay_mode_quirk_match),
-                         "phy-mode %s requires TX clock internal delay mode which is not supported by this hardware revision. Please update device tree",
-                         phy_modes(priv->phy_interface))) {
-                       priv->txcidm = 1;
-                       priv->rgmii_override = 1;
-               }
+               priv->txcidm = 1;
+               priv->rgmii_override = 1;
        }
 }
 
@@ -2034,8 +2140,9 @@ static void ravb_set_delay_mode(struct net_device *ndev)
 static int ravb_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
+       const struct ravb_hw_info *info;
+       struct reset_control *rstc;
        struct ravb_private *priv;
-       enum ravb_chip_id chip_id;
        struct net_device *ndev;
        int error, irq, q;
        struct resource *res;
@@ -2047,20 +2154,26 @@ static int ravb_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
+       rstc = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL);
+       if (IS_ERR(rstc))
+               return dev_err_probe(&pdev->dev, PTR_ERR(rstc),
+                                    "failed to get cpg reset\n");
+
        ndev = alloc_etherdev_mqs(sizeof(struct ravb_private),
                                  NUM_TX_QUEUE, NUM_RX_QUEUE);
        if (!ndev)
                return -ENOMEM;
 
-       ndev->features = NETIF_F_RXCSUM;
-       ndev->hw_features = NETIF_F_RXCSUM;
+       info = of_device_get_match_data(&pdev->dev);
 
+       ndev->features = info->net_features;
+       ndev->hw_features = info->net_hw_features;
+
+       reset_control_deassert(rstc);
        pm_runtime_enable(&pdev->dev);
        pm_runtime_get_sync(&pdev->dev);
 
-       chip_id = (enum ravb_chip_id)of_device_get_match_data(&pdev->dev);
-
-       if (chip_id == RCAR_GEN3)
+       if (info->multi_irqs)
                irq = platform_get_irq_byname(pdev, "ch22");
        else
                irq = platform_get_irq(pdev, 0);
@@ -2073,6 +2186,8 @@ static int ravb_probe(struct platform_device *pdev)
        SET_NETDEV_DEV(ndev, &pdev->dev);
 
        priv = netdev_priv(ndev);
+       priv->info = info;
+       priv->rstc = rstc;
        priv->ndev = ndev;
        priv->pdev = pdev;
        priv->num_tx_ring[RAVB_BE] = BE_TX_RING_SIZE;
@@ -2099,7 +2214,7 @@ static int ravb_probe(struct platform_device *pdev)
        priv->avb_link_active_low =
                of_property_read_bool(np, "renesas,ether-link-active-low");
 
-       if (chip_id == RCAR_GEN3) {
+       if (info->multi_irqs) {
                irq = platform_get_irq_byname(pdev, "ch24");
                if (irq < 0) {
                        error = irq;
@@ -2124,8 +2239,6 @@ static int ravb_probe(struct platform_device *pdev)
                }
        }
 
-       priv->chip_id = chip_id;
-
        priv->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(priv->clk)) {
                error = PTR_ERR(priv->clk);
@@ -2142,8 +2255,12 @@ static int ravb_probe(struct platform_device *pdev)
        ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
        ndev->min_mtu = ETH_MIN_MTU;
 
-       priv->num_tx_desc = chip_id == RCAR_GEN2 ?
-               NUM_TX_DESC_GEN2 : NUM_TX_DESC_GEN3;
+       /* FIXME: R-Car Gen2 has 4byte alignment restriction for tx buffer
+        * Use two descriptor to handle such situation. First descriptor to
+        * handle aligned data buffer and second descriptor to handle the
+        * overflow data because of alignment.
+        */
+       priv->num_tx_desc = info->aligned_tx ? 2 : 1;
 
        /* Set function */
        ndev->netdev_ops = &ravb_netdev_ops;
@@ -2160,7 +2277,7 @@ static int ravb_probe(struct platform_device *pdev)
        /* Request GTI loading */
        ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
 
-       if (priv->chip_id != RCAR_GEN2) {
+       if (info->internal_delay) {
                ravb_parse_delay_mode(np, ndev);
                ravb_set_delay_mode(ndev);
        }
@@ -2184,7 +2301,7 @@ static int ravb_probe(struct platform_device *pdev)
        INIT_LIST_HEAD(&priv->ts_skb_list);
 
        /* Initialise PTP Clock driver */
-       if (chip_id != RCAR_GEN2)
+       if (info->ptp_cfg_active)
                ravb_ptp_init(ndev, pdev);
 
        /* Debug message level */
@@ -2232,7 +2349,7 @@ out_dma_free:
                          priv->desc_bat_dma);
 
        /* Stop PTP Clock driver */
-       if (chip_id != RCAR_GEN2)
+       if (info->ptp_cfg_active)
                ravb_ptp_stop(ndev);
 out_disable_refclk:
        clk_disable_unprepare(priv->refclk);
@@ -2241,6 +2358,7 @@ out_release:
 
        pm_runtime_put(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
+       reset_control_assert(rstc);
        return error;
 }
 
@@ -2248,9 +2366,10 @@ static int ravb_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
 
        /* Stop PTP Clock driver */
-       if (priv->chip_id != RCAR_GEN2)
+       if (info->ptp_cfg_active)
                ravb_ptp_stop(ndev);
 
        clk_disable_unprepare(priv->refclk);
@@ -2265,6 +2384,7 @@ static int ravb_remove(struct platform_device *pdev)
        netif_napi_del(&priv->napi[RAVB_BE]);
        ravb_mdio_release(priv);
        pm_runtime_disable(&pdev->dev);
+       reset_control_assert(priv->rstc);
        free_netdev(ndev);
        platform_set_drvdata(pdev, NULL);
 
@@ -2333,6 +2453,7 @@ static int __maybe_unused ravb_resume(struct device *dev)
 {
        struct net_device *ndev = dev_get_drvdata(dev);
        struct ravb_private *priv = netdev_priv(ndev);
+       const struct ravb_hw_info *info = priv->info;
        int ret = 0;
 
        /* If WoL is enabled set reset mode to rearm the WoL logic */
@@ -2355,7 +2476,7 @@ static int __maybe_unused ravb_resume(struct device *dev)
        /* Request GTI loading */
        ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
 
-       if (priv->chip_id != RCAR_GEN2)
+       if (info->internal_delay)
                ravb_set_delay_mode(ndev);
 
        /* Restore descriptor base address table */
index 6984bd5..c099656 100644 (file)
@@ -179,6 +179,7 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp,
 {
        struct ravb_private *priv = container_of(ptp, struct ravb_private,
                                                 ptp.info);
+       const struct ravb_hw_info *info = priv->info;
        struct net_device *ndev = priv->ndev;
        unsigned long flags;
 
@@ -197,7 +198,7 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp,
        priv->ptp.extts[req->index] = on;
 
        spin_lock_irqsave(&priv->lock, flags);
-       if (priv->chip_id == RCAR_GEN2)
+       if (!info->multi_irqs)
                ravb_modify(ndev, GIC, GIC_PTCE, on ? GIC_PTCE : 0);
        else if (on)
                ravb_write(ndev, GIE_PTCS, GIE);
@@ -213,6 +214,7 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp,
 {
        struct ravb_private *priv = container_of(ptp, struct ravb_private,
                                                 ptp.info);
+       const struct ravb_hw_info *info = priv->info;
        struct net_device *ndev = priv->ndev;
        struct ravb_ptp_perout *perout;
        unsigned long flags;
@@ -252,7 +254,7 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp,
                error = ravb_ptp_update_compare(priv, (u32)start_ns);
                if (!error) {
                        /* Unmask interrupt */
-                       if (priv->chip_id == RCAR_GEN2)
+                       if (!info->multi_irqs)
                                ravb_modify(ndev, GIC, GIC_PTME, GIC_PTME);
                        else
                                ravb_write(ndev, GIE_PTMS0, GIE);
@@ -264,7 +266,7 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp,
                perout->period = 0;
 
                /* Mask interrupt */
-               if (priv->chip_id == RCAR_GEN2)
+               if (!info->multi_irqs)
                        ravb_modify(ndev, GIC, GIC_PTME, 0);
                else
                        ravb_write(ndev, GID_PTMD0, GID);
index 8404786..6c8ba91 100644 (file)
@@ -3141,7 +3141,7 @@ static const struct net_device_ops sh_eth_netdev_ops = {
        .ndo_get_stats          = sh_eth_get_stats,
        .ndo_set_rx_mode        = sh_eth_set_rx_mode,
        .ndo_tx_timeout         = sh_eth_tx_timeout,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_change_mtu         = sh_eth_change_mtu,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
@@ -3157,7 +3157,7 @@ static const struct net_device_ops sh_eth_netdev_ops_tsu = {
        .ndo_vlan_rx_add_vid    = sh_eth_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = sh_eth_vlan_rx_kill_vid,
        .ndo_tx_timeout         = sh_eth_tx_timeout,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_change_mtu         = sh_eth_change_mtu,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
index 315a6e5..e75814a 100644 (file)
@@ -119,7 +119,8 @@ struct rocker_world_ops {
        int (*port_obj_fdb_del)(struct rocker_port *rocker_port,
                                u16 vid, const unsigned char *addr);
        int (*port_master_linked)(struct rocker_port *rocker_port,
-                                 struct net_device *master);
+                                 struct net_device *master,
+                                 struct netlink_ext_ack *extack);
        int (*port_master_unlinked)(struct rocker_port *rocker_port,
                                    struct net_device *master);
        int (*port_neigh_update)(struct rocker_port *rocker_port,
index 1f06b92..3364b6a 100644 (file)
@@ -1670,13 +1670,14 @@ rocker_world_port_fdb_del(struct rocker_port *rocker_port,
 }
 
 static int rocker_world_port_master_linked(struct rocker_port *rocker_port,
-                                          struct net_device *master)
+                                          struct net_device *master,
+                                          struct netlink_ext_ack *extack)
 {
        struct rocker_world_ops *wops = rocker_port->rocker->wops;
 
        if (!wops->port_master_linked)
                return -EOPNOTSUPP;
-       return wops->port_master_linked(rocker_port, master);
+       return wops->port_master_linked(rocker_port, master, extack);
 }
 
 static int rocker_world_port_master_unlinked(struct rocker_port *rocker_port,
@@ -3107,6 +3108,7 @@ struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev,
 static int rocker_netdevice_event(struct notifier_block *unused,
                                  unsigned long event, void *ptr)
 {
+       struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct netdev_notifier_changeupper_info *info;
        struct rocker_port *rocker_port;
@@ -3123,7 +3125,8 @@ static int rocker_netdevice_event(struct notifier_block *unused,
                rocker_port = netdev_priv(dev);
                if (info->linking) {
                        err = rocker_world_port_master_linked(rocker_port,
-                                                             info->upper_dev);
+                                                             info->upper_dev,
+                                                             extack);
                        if (err)
                                netdev_warn(dev, "failed to reflect master linked (err %d)\n",
                                            err);
index e33a9d2..3e1ca7a 100644 (file)
@@ -2571,8 +2571,10 @@ static int ofdpa_port_obj_fdb_del(struct rocker_port *rocker_port,
 }
 
 static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port,
-                                 struct net_device *bridge)
+                                 struct net_device *bridge,
+                                 struct netlink_ext_ack *extack)
 {
+       struct net_device *dev = ofdpa_port->dev;
        int err;
 
        /* Port is joining bridge, so the internal VLAN for the
@@ -2592,13 +2594,21 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port,
 
        ofdpa_port->bridge_dev = bridge;
 
-       return ofdpa_port_vlan_add(ofdpa_port, OFDPA_UNTAGGED_VID, 0);
+       err = ofdpa_port_vlan_add(ofdpa_port, OFDPA_UNTAGGED_VID, 0);
+       if (err)
+               return err;
+
+       return switchdev_bridge_port_offload(dev, dev, NULL, NULL, NULL,
+                                            false, extack);
 }
 
 static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port)
 {
+       struct net_device *dev = ofdpa_port->dev;
        int err;
 
+       switchdev_bridge_port_unoffload(dev, NULL, NULL, NULL);
+
        err = ofdpa_port_vlan_del(ofdpa_port, OFDPA_UNTAGGED_VID, 0);
        if (err)
                return err;
@@ -2637,13 +2647,14 @@ static int ofdpa_port_ovs_changed(struct ofdpa_port *ofdpa_port,
 }
 
 static int ofdpa_port_master_linked(struct rocker_port *rocker_port,
-                                   struct net_device *master)
+                                   struct net_device *master,
+                                   struct netlink_ext_ack *extack)
 {
        struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
        int err = 0;
 
        if (netif_is_bridge_master(master))
-               err = ofdpa_port_bridge_join(ofdpa_port, master);
+               err = ofdpa_port_bridge_join(ofdpa_port, master, extack);
        else if (netif_is_ovs_master(master))
                err = ofdpa_port_ovs_changed(ofdpa_port, master);
        return err;
index 0582e11..2a6c265 100644 (file)
@@ -20,9 +20,9 @@ if NET_VENDOR_SAMSUNG
 config SXGBE_ETH
        tristate "Samsung 10G/2.5G/1G SXGBE Ethernet driver"
        depends on HAS_IOMEM && HAS_DMA
+       depends on PTP_1588_CLOCK_OPTIONAL
        select PHYLIB
        select CRC32
-       imply PTP_1588_CLOCK
        help
          This is the driver for the SXGBE 10G Ethernet IP block found on
          Samsung platforms.
index 7f8b10c..98edb01 100644 (file)
@@ -274,7 +274,9 @@ static u32 sxgbe_usec2riwt(u32 usec, struct sxgbe_priv_data *priv)
 }
 
 static int sxgbe_get_coalesce(struct net_device *dev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct sxgbe_priv_data *priv = netdev_priv(dev);
 
@@ -285,7 +287,9 @@ static int sxgbe_get_coalesce(struct net_device *dev,
 }
 
 static int sxgbe_set_coalesce(struct net_device *dev,
-                             struct ethtool_coalesce *ec)
+                             struct ethtool_coalesce *ec,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct sxgbe_priv_data *priv = netdev_priv(dev);
        unsigned int rx_riwt;
index 090bcd2..6781aa6 100644 (file)
@@ -1964,7 +1964,7 @@ static const struct net_device_ops sxgbe_netdev_ops = {
        .ndo_set_features       = sxgbe_set_features,
        .ndo_set_rx_mode        = sxgbe_set_rx_mode,
        .ndo_tx_timeout         = sxgbe_tx_timeout,
-       .ndo_do_ioctl           = sxgbe_ioctl,
+       .ndo_eth_ioctl          = sxgbe_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = sxgbe_poll_controller,
 #endif
index 5e37c83..97ce640 100644 (file)
@@ -19,9 +19,9 @@ if NET_VENDOR_SOLARFLARE
 config SFC
        tristate "Solarflare SFC9000/SFC9100/EF100-family support"
        depends on PCI
+       depends on PTP_1588_CLOCK_OPTIONAL
        select MDIO
        select CRC32
-       imply PTP_1588_CLOCK
        help
          This driver supports 10/40-gigabit Ethernet cards based on
          the Solarflare SFC9000-family and SFC9100-family controllers.
index 37fcf2e..a295e26 100644 (file)
@@ -591,7 +591,7 @@ static const struct net_device_ops efx_netdev_ops = {
        .ndo_tx_timeout         = efx_watchdog,
        .ndo_start_xmit         = efx_hard_start_xmit,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = efx_ioctl,
+       .ndo_eth_ioctl          = efx_ioctl,
        .ndo_change_mtu         = efx_change_mtu,
        .ndo_set_mac_address    = efx_set_mac_address,
        .ndo_set_rx_mode        = efx_set_rx_mode,
index 058d9fe..e002ce2 100644 (file)
@@ -97,7 +97,9 @@ static void efx_ethtool_get_regs(struct net_device *net_dev,
  */
 
 static int efx_ethtool_get_coalesce(struct net_device *net_dev,
-                                   struct ethtool_coalesce *coalesce)
+                                   struct ethtool_coalesce *coalesce,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct efx_nic *efx = netdev_priv(net_dev);
        unsigned int tx_usecs, rx_usecs;
@@ -115,7 +117,9 @@ static int efx_ethtool_get_coalesce(struct net_device *net_dev,
 }
 
 static int efx_ethtool_set_coalesce(struct net_device *net_dev,
-                                   struct ethtool_coalesce *coalesce)
+                                   struct ethtool_coalesce *coalesce,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct efx_nic *efx = netdev_priv(net_dev);
        struct efx_channel *channel;
index 9ec752a..c177ea0 100644 (file)
@@ -2219,7 +2219,7 @@ static const struct net_device_ops ef4_netdev_ops = {
        .ndo_tx_timeout         = ef4_watchdog,
        .ndo_start_xmit         = ef4_hard_start_xmit,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = ef4_ioctl,
+       .ndo_eth_ioctl          = ef4_ioctl,
        .ndo_change_mtu         = ef4_change_mtu,
        .ndo_set_mac_address    = ef4_set_mac_address,
        .ndo_set_rx_mode        = ef4_set_rx_mode,
index a6bae6a..137e8a7 100644 (file)
@@ -577,7 +577,9 @@ static int ef4_ethtool_nway_reset(struct net_device *net_dev)
  */
 
 static int ef4_ethtool_get_coalesce(struct net_device *net_dev,
-                                   struct ethtool_coalesce *coalesce)
+                                   struct ethtool_coalesce *coalesce,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct ef4_nic *efx = netdev_priv(net_dev);
        unsigned int tx_usecs, rx_usecs;
@@ -595,7 +597,9 @@ static int ef4_ethtool_get_coalesce(struct net_device *net_dev,
 }
 
 static int ef4_ethtool_set_coalesce(struct net_device *net_dev,
-                                   struct ethtool_coalesce *coalesce)
+                                   struct ethtool_coalesce *coalesce,
+                                   struct kernel_ethtool_coalesce *kernel_coal,
+                                   struct netlink_ext_ack *extack)
 {
        struct ef4_nic *efx = netdev_priv(net_dev);
        struct ef4_channel *channel;
index 2b29fd4..062f784 100644 (file)
@@ -820,7 +820,7 @@ static const struct net_device_ops ioc3_netdev_ops = {
        .ndo_tx_timeout         = ioc3_timeout,
        .ndo_get_stats          = ioc3_get_stats,
        .ndo_set_rx_mode        = ioc3_set_multicast_list,
-       .ndo_do_ioctl           = ioc3_ioctl,
+       .ndo_eth_ioctl          = ioc3_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = ioc3_set_mac_address,
 };
index 0c396ec..efce834 100644 (file)
@@ -812,7 +812,7 @@ static const struct net_device_ops meth_netdev_ops = {
        .ndo_open               = meth_open,
        .ndo_stop               = meth_release,
        .ndo_start_xmit         = meth_tx,
-       .ndo_do_ioctl           = meth_ioctl,
+       .ndo_eth_ioctl          = meth_ioctl,
        .ndo_tx_timeout         = meth_tx_timeout,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
index 676b193..3d1a18a 100644 (file)
@@ -1841,7 +1841,7 @@ static int sis190_mac_addr(struct net_device  *dev, void *p)
 static const struct net_device_ops sis190_netdev_ops = {
        .ndo_open               = sis190_open,
        .ndo_stop               = sis190_close,
-       .ndo_do_ioctl           = sis190_ioctl,
+       .ndo_eth_ioctl          = sis190_ioctl,
        .ndo_start_xmit         = sis190_start_xmit,
        .ndo_tx_timeout         = sis190_tx_timeout,
        .ndo_set_rx_mode        = sis190_set_rx_mode,
index cff87de..60a0c0e 100644 (file)
@@ -404,7 +404,7 @@ static const struct net_device_ops sis900_netdev_ops = {
        .ndo_set_rx_mode        = set_rx_mode,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_do_ioctl           = mii_ioctl,
+       .ndo_eth_ioctl          = mii_ioctl,
        .ndo_tx_timeout         = sis900_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
         .ndo_poll_controller   = sis900_poll,
index c52a38d..72e42a8 100644 (file)
@@ -23,6 +23,7 @@ config SMC9194
        tristate "SMC 9194 support"
        depends on ISA
        select CRC32
+       select NETDEV_LEGACY_INIT
        help
          This is support for the SMC9xxx based Ethernet cards. Choose this
          option if you have a DELL laptop with the docking station, or
index 51cd7dc..44daf79 100644 (file)
@@ -312,7 +312,7 @@ static const struct net_device_ops epic_netdev_ops = {
        .ndo_tx_timeout         = epic_tx_timeout,
        .ndo_get_stats          = epic_get_stats,
        .ndo_set_rx_mode        = set_rx_mode,
-       .ndo_do_ioctl           = netdev_ioctl,
+       .ndo_eth_ioctl          = netdev_ioctl,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 };
index bf7c8c8..0ce403f 100644 (file)
@@ -1508,7 +1508,7 @@ MODULE_PARM_DESC(io, "SMC 99194 I/O base address");
 MODULE_PARM_DESC(irq, "SMC 99194 IRQ number");
 MODULE_PARM_DESC(ifport, "SMC 99194 interface port (0-default, 1-TP, 2-AUI)");
 
-int __init init_module(void)
+static int __init smc_init_module(void)
 {
        if (io == 0)
                printk(KERN_WARNING
@@ -1518,13 +1518,15 @@ int __init init_module(void)
        devSMC9194 = smc_init(-1);
        return PTR_ERR_OR_ZERO(devSMC9194);
 }
+module_init(smc_init_module);
 
-void __exit cleanup_module(void)
+static void __exit smc_cleanup_module(void)
 {
        unregister_netdev(devSMC9194);
        free_irq(devSMC9194->irq, devSMC9194);
        release_region(devSMC9194->base_addr, SMC_IO_EXTENT);
        free_netdev(devSMC9194);
 }
+module_exit(smc_cleanup_module);
 
 #endif /* MODULE */
index f2a50eb..42fc37c 100644 (file)
@@ -294,7 +294,7 @@ static const struct net_device_ops smc_netdev_ops = {
        .ndo_tx_timeout         = smc_tx_timeout,
        .ndo_set_config         = s9k_config,
        .ndo_set_rx_mode        = set_rx_mode,
-       .ndo_do_ioctl           = smc_ioctl,
+       .ndo_eth_ioctl          = smc_ioctl,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 };
index 556a979..199a973 100644 (file)
@@ -2148,7 +2148,7 @@ static const struct net_device_ops smsc911x_netdev_ops = {
        .ndo_start_xmit         = smsc911x_hard_start_xmit,
        .ndo_get_stats          = smsc911x_get_stats,
        .ndo_set_rx_mode        = smsc911x_set_multicast_list,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = smsc911x_set_mac_address,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index c1dab00..fdbd2a4 100644 (file)
@@ -1482,7 +1482,7 @@ static const struct net_device_ops smsc9420_netdev_ops = {
        .ndo_start_xmit         = smsc9420_hard_start_xmit,
        .ndo_get_stats          = smsc9420_get_stats,
        .ndo_set_rx_mode        = smsc9420_set_multicast_list,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 20d148c..1f46af1 100644 (file)
@@ -532,7 +532,9 @@ static void netsec_et_get_drvinfo(struct net_device *net_device,
 }
 
 static int netsec_et_get_coalesce(struct net_device *net_device,
-                                 struct ethtool_coalesce *et_coalesce)
+                                 struct ethtool_coalesce *et_coalesce,
+                                 struct kernel_ethtool_coalesce *kernel_coal,
+                                 struct netlink_ext_ack *extack)
 {
        struct netsec_priv *priv = netdev_priv(net_device);
 
@@ -542,7 +544,9 @@ static int netsec_et_get_coalesce(struct net_device *net_device,
 }
 
 static int netsec_et_set_coalesce(struct net_device *net_device,
-                                 struct ethtool_coalesce *et_coalesce)
+                                 struct ethtool_coalesce *et_coalesce,
+                                 struct kernel_ethtool_coalesce *kernel_coal,
+                                 struct netlink_ext_ack *extack)
 {
        struct netsec_priv *priv = netdev_priv(net_device);
 
@@ -1544,7 +1548,7 @@ static int netsec_start_gmac(struct netsec_priv *priv)
        netsec_write(priv, NETSEC_REG_NRM_RX_INTEN_CLR, ~0);
        netsec_write(priv, NETSEC_REG_NRM_TX_INTEN_CLR, ~0);
 
-       netsec_et_set_coalesce(priv->ndev, &priv->et_coalesce);
+       netsec_et_set_coalesce(priv->ndev, &priv->et_coalesce, NULL, NULL);
 
        if (netsec_mac_write(priv, GMAC_REG_OMR, value))
                return -ETIMEDOUT;
@@ -1831,7 +1835,7 @@ static const struct net_device_ops netsec_netdev_ops = {
        .ndo_set_features       = netsec_netdev_set_features,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = phy_do_ioctl,
+       .ndo_eth_ioctl          = phy_do_ioctl,
        .ndo_xdp_xmit           = netsec_xdp_xmit,
        .ndo_bpf                = netsec_xdp,
 };
index 5eb6bb4..ae31ed9 100644 (file)
@@ -1543,7 +1543,7 @@ static const struct net_device_ops ave_netdev_ops = {
        .ndo_open               = ave_open,
        .ndo_stop               = ave_stop,
        .ndo_start_xmit         = ave_start_xmit,
-       .ndo_do_ioctl           = ave_ioctl,
+       .ndo_eth_ioctl          = ave_ioctl,
        .ndo_set_rx_mode        = ave_set_rx_mode,
        .ndo_get_stats64        = ave_get_stats64,
        .ndo_set_mac_address    = ave_set_mac_address,
index ac3c248..929cfc2 100644 (file)
@@ -2,12 +2,12 @@
 config STMMAC_ETH
        tristate "STMicroelectronics Multi-Gigabit Ethernet driver"
        depends on HAS_IOMEM && HAS_DMA
+       depends on PTP_1588_CLOCK_OPTIONAL
        select MII
        select PCS_XPCS
        select PAGE_POOL
        select PHYLINK
        select CRC32
-       imply PTP_1588_CLOCK
        select RESET_CONTROLLER
        help
          This is the driver for the Ethernet IPs built around a
index 5fecc83..b6d945e 100644 (file)
 #undef FRAME_FILTER_DEBUG
 /* #define FRAME_FILTER_DEBUG */
 
+struct stmmac_txq_stats {
+       unsigned long tx_pkt_n;
+       unsigned long tx_normal_irq_n;
+};
+
+struct stmmac_rxq_stats {
+       unsigned long rx_pkt_n;
+       unsigned long rx_normal_irq_n;
+};
+
 /* Extra statistic and debug information exposed by ethtool */
 struct stmmac_extra_stats {
        /* Transmit errors */
@@ -189,6 +199,9 @@ struct stmmac_extra_stats {
        unsigned long mtl_est_hlbf;
        unsigned long mtl_est_btre;
        unsigned long mtl_est_btrlm;
+       /* per queue statistics */
+       struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES];
+       struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES];
 };
 
 /* Safety Feature statistics exposed by ethtool */
index 28dd0ed..f7dc845 100644 (file)
@@ -289,10 +289,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
                val &= ~NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL;
                break;
        default:
-               dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
-                       phy_modes(gmac->phy_mode));
-               err = -EINVAL;
-               goto err_remove_config_dt;
+               goto err_unsupported_phy;
        }
        regmap_write(gmac->nss_common, NSS_COMMON_GMAC_CTL(gmac->id), val);
 
@@ -309,10 +306,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
                        NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id);
                break;
        default:
-               dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
-                       phy_modes(gmac->phy_mode));
-               err = -EINVAL;
-               goto err_remove_config_dt;
+               goto err_unsupported_phy;
        }
        regmap_write(gmac->nss_common, NSS_COMMON_CLK_SRC_CTRL, val);
 
@@ -329,8 +323,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
                                NSS_COMMON_CLK_GATE_GMII_TX_EN(gmac->id);
                break;
        default:
-               /* We don't get here; the switch above will have errored out */
-               unreachable();
+               goto err_unsupported_phy;
        }
        regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val);
 
@@ -361,6 +354,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 
        return 0;
 
+err_unsupported_phy:
+       dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
+               phy_modes(gmac->phy_mode));
+       err = -EINVAL;
+
 err_remove_config_dt:
        stmmac_remove_config_dt(pdev, plat_dat);
 
index e632702..9292a1f 100644 (file)
@@ -170,13 +170,16 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr,
                x->normal_irq_n++;
        if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
                x->rx_normal_irq_n++;
+               x->rxq_stats[chan].rx_normal_irq_n++;
                ret |= handle_rx;
        }
-       if (likely(intr_status & (DMA_CHAN_STATUS_TI |
-               DMA_CHAN_STATUS_TBU))) {
+       if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
                x->tx_normal_irq_n++;
+               x->txq_stats[chan].tx_normal_irq_n++;
                ret |= handle_tx;
        }
+       if (unlikely(intr_status & DMA_CHAN_STATUS_TBU))
+               ret |= handle_tx;
        if (unlikely(intr_status & DMA_CHAN_STATUS_ERI))
                x->rx_early_irq++;
 
index d0ce608..d894558 100644 (file)
@@ -261,6 +261,18 @@ static const struct stmmac_stats stmmac_mmc[] = {
 };
 #define STMMAC_MMC_STATS_LEN ARRAY_SIZE(stmmac_mmc)
 
+static const char stmmac_qstats_tx_string[][ETH_GSTRING_LEN] = {
+       "tx_pkt_n",
+       "tx_irq_n",
+#define STMMAC_TXQ_STATS ARRAY_SIZE(stmmac_qstats_tx_string)
+};
+
+static const char stmmac_qstats_rx_string[][ETH_GSTRING_LEN] = {
+       "rx_pkt_n",
+       "rx_irq_n",
+#define STMMAC_RXQ_STATS ARRAY_SIZE(stmmac_qstats_rx_string)
+};
+
 static void stmmac_ethtool_getdrvinfo(struct net_device *dev,
                                      struct ethtool_drvinfo *info)
 {
@@ -510,6 +522,31 @@ stmmac_set_pauseparam(struct net_device *netdev,
        }
 }
 
+static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
+{
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       int q, stat;
+       char *p;
+
+       for (q = 0; q < tx_cnt; q++) {
+               p = (char *)priv + offsetof(struct stmmac_priv,
+                                           xstats.txq_stats[q].tx_pkt_n);
+               for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
+                       *data++ = (*(u64 *)p);
+                       p += sizeof(u64 *);
+               }
+       }
+       for (q = 0; q < rx_cnt; q++) {
+               p = (char *)priv + offsetof(struct stmmac_priv,
+                                           xstats.rxq_stats[q].rx_pkt_n);
+               for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
+                       *data++ = (*(u64 *)p);
+                       p += sizeof(u64 *);
+               }
+       }
+}
+
 static void stmmac_get_ethtool_stats(struct net_device *dev,
                                 struct ethtool_stats *dummy, u64 *data)
 {
@@ -560,16 +597,21 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
                data[j++] = (stmmac_gstrings_stats[i].sizeof_stat ==
                             sizeof(u64)) ? (*(u64 *)p) : (*(u32 *)p);
        }
+       stmmac_get_per_qstats(priv, &data[j]);
 }
 
 static int stmmac_get_sset_count(struct net_device *netdev, int sset)
 {
        struct stmmac_priv *priv = netdev_priv(netdev);
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
        int i, len, safety_len = 0;
 
        switch (sset) {
        case ETH_SS_STATS:
-               len = STMMAC_STATS_LEN;
+               len = STMMAC_STATS_LEN +
+                     STMMAC_TXQ_STATS * tx_cnt +
+                     STMMAC_RXQ_STATS * rx_cnt;
 
                if (priv->dma_cap.rmon)
                        len += STMMAC_MMC_STATS_LEN;
@@ -592,6 +634,28 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset)
        }
 }
 
+static void stmmac_get_qstats_string(struct stmmac_priv *priv, u8 *data)
+{
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
+       u32 rx_cnt = priv->plat->rx_queues_to_use;
+       int q, stat;
+
+       for (q = 0; q < tx_cnt; q++) {
+               for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
+                       snprintf(data, ETH_GSTRING_LEN, "q%d_%s", q,
+                                stmmac_qstats_tx_string[stat]);
+                       data += ETH_GSTRING_LEN;
+               }
+       }
+       for (q = 0; q < rx_cnt; q++) {
+               for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
+                       snprintf(data, ETH_GSTRING_LEN, "q%d_%s", q,
+                                stmmac_qstats_rx_string[stat]);
+                       data += ETH_GSTRING_LEN;
+               }
+       }
+}
+
 static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
        int i;
@@ -622,6 +686,7 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
                                ETH_GSTRING_LEN);
                        p += ETH_GSTRING_LEN;
                }
+               stmmac_get_qstats_string(priv, p);
                break;
        case ETH_SS_TEST:
                stmmac_selftest_get_strings(priv, p);
@@ -809,7 +874,9 @@ static int __stmmac_get_coalesce(struct net_device *dev,
 }
 
 static int stmmac_get_coalesce(struct net_device *dev,
-                              struct ethtool_coalesce *ec)
+                              struct ethtool_coalesce *ec,
+                              struct kernel_ethtool_coalesce *kernel_coal,
+                              struct netlink_ext_ack *extack)
 {
        return __stmmac_get_coalesce(dev, ec, -1);
 }
@@ -893,7 +960,9 @@ static int __stmmac_set_coalesce(struct net_device *dev,
 }
 
 static int stmmac_set_coalesce(struct net_device *dev,
-                              struct ethtool_coalesce *ec)
+                              struct ethtool_coalesce *ec,
+                              struct kernel_ethtool_coalesce *kernel_coal,
+                              struct netlink_ext_ack *extack)
 {
        return __stmmac_set_coalesce(dev, ec, -1);
 }
index fa90bcd..ed0cd39 100644 (file)
@@ -2500,6 +2500,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
                        } else {
                                priv->dev->stats.tx_packets++;
                                priv->xstats.tx_pkt_n++;
+                               priv->xstats.txq_stats[queue].tx_pkt_n++;
                        }
                        if (skb)
                                stmmac_get_tx_hwtstamp(priv, p, skb);
@@ -5000,6 +5001,9 @@ read_again:
 
        stmmac_finalize_xdp_rx(priv, xdp_status);
 
+       priv->xstats.rx_pkt_n += count;
+       priv->xstats.rxq_stats[queue].rx_pkt_n += count;
+
        if (xsk_uses_need_wakeup(rx_q->xsk_pool)) {
                if (failure || stmmac_rx_dirty(priv, queue) > 0)
                        xsk_set_rx_need_wakeup(rx_q->xsk_pool);
@@ -5287,6 +5291,7 @@ drain_data:
        stmmac_rx_refill(priv, queue);
 
        priv->xstats.rx_pkt_n += count;
+       priv->xstats.rxq_stats[queue].rx_pkt_n += count;
 
        return count;
 }
@@ -6451,7 +6456,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
        .ndo_set_features = stmmac_set_features,
        .ndo_set_rx_mode = stmmac_set_rx_mode,
        .ndo_tx_timeout = stmmac_tx_timeout,
-       .ndo_do_ioctl = stmmac_ioctl,
+       .ndo_eth_ioctl = stmmac_ioctl,
        .ndo_setup_tc = stmmac_setup_tc,
        .ndo_select_queue = stmmac_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 981685c..287ae4c 100644 (file)
@@ -4876,7 +4876,7 @@ static const struct net_device_ops cas_netdev_ops = {
        .ndo_start_xmit         = cas_start_xmit,
        .ndo_get_stats          = cas_get_stats,
        .ndo_set_rx_mode        = cas_set_multicast,
-       .ndo_do_ioctl           = cas_ioctl,
+       .ndo_eth_ioctl          = cas_ioctl,
        .ndo_tx_timeout         = cas_tx_timeout,
        .ndo_change_mtu         = cas_change_mtu,
        .ndo_set_mac_address    = eth_mac_addr,
index 860644d..a68a01d 100644 (file)
@@ -9208,7 +9208,7 @@ static int niu_get_of_props(struct niu *np)
        else
                dp = pci_device_to_OF_node(np->pdev);
 
-       phy_type = of_get_property(dp, "phy-type", &prop_len);
+       phy_type = of_get_property(dp, "phy-type", NULL);
        if (!phy_type) {
                netdev_err(dev, "%pOF: OF node lacks phy-type property\n", dp);
                return -EINVAL;
@@ -9242,12 +9242,12 @@ static int niu_get_of_props(struct niu *np)
                return -EINVAL;
        }
 
-       model = of_get_property(dp, "model", &prop_len);
+       model = of_get_property(dp, "model", NULL);
 
        if (model)
                strcpy(np->vpd.model, model);
 
-       if (of_find_property(dp, "hot-swappable-phy", &prop_len)) {
+       if (of_find_property(dp, "hot-swappable-phy", NULL)) {
                np->flags |= (NIU_FLAGS_10G | NIU_FLAGS_FIBER |
                        NIU_FLAGS_HOTPLUG_PHY);
        }
@@ -9668,7 +9668,7 @@ static const struct net_device_ops niu_netdev_ops = {
        .ndo_set_rx_mode        = niu_set_rx_mode,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = niu_set_mac_addr,
-       .ndo_do_ioctl           = niu_ioctl,
+       .ndo_eth_ioctl          = niu_ioctl,
        .ndo_tx_timeout         = niu_tx_timeout,
        .ndo_change_mtu         = niu_change_mtu,
 };
@@ -9722,7 +9722,6 @@ static int niu_pci_init_one(struct pci_dev *pdev,
        struct net_device *dev;
        struct niu *np;
        int err;
-       u64 dma_mask;
 
        niu_driver_version();
 
@@ -9777,18 +9776,11 @@ static int niu_pci_init_one(struct pci_dev *pdev,
                PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE |
                PCI_EXP_DEVCTL_RELAX_EN);
 
-       dma_mask = DMA_BIT_MASK(44);
-       err = pci_set_dma_mask(pdev, dma_mask);
-       if (!err) {
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
+       if (!err)
                dev->features |= NETIF_F_HIGHDMA;
-               err = pci_set_consistent_dma_mask(pdev, dma_mask);
-               if (err) {
-                       dev_err(&pdev->dev, "Unable to obtain 44 bit DMA for consistent allocations, aborting\n");
-                       goto err_out_release_parent;
-               }
-       }
        if (err) {
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
                if (err) {
                        dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
                        goto err_out_release_parent;
index cfb9e21..d72018a 100644 (file)
@@ -2831,7 +2831,7 @@ static const struct net_device_ops gem_netdev_ops = {
        .ndo_start_xmit         = gem_start_xmit,
        .ndo_get_stats          = gem_get_stats,
        .ndo_set_rx_mode        = gem_set_multicast,
-       .ndo_do_ioctl           = gem_ioctl,
+       .ndo_eth_ioctl          = gem_ioctl,
        .ndo_tx_timeout         = gem_tx_timeout,
        .ndo_change_mtu         = gem_change_mtu,
        .ndo_validate_addr      = eth_validate_addr,
index a2c1a40..62f81b0 100644 (file)
@@ -251,14 +251,6 @@ static u32 pci_hme_read_desc32(hme32 *p)
        ((__hp)->write_txd((__txd), (__flags), (__addr)))
 #define hme_read_desc32(__hp, __p) \
        ((__hp)->read_desc32(__p))
-#define hme_dma_map(__hp, __ptr, __size, __dir) \
-       ((__hp)->dma_map((__hp)->dma_dev, (__ptr), (__size), (__dir)))
-#define hme_dma_unmap(__hp, __addr, __size, __dir) \
-       ((__hp)->dma_unmap((__hp)->dma_dev, (__addr), (__size), (__dir)))
-#define hme_dma_sync_for_cpu(__hp, __addr, __size, __dir) \
-       ((__hp)->dma_sync_for_cpu((__hp)->dma_dev, (__addr), (__size), (__dir)))
-#define hme_dma_sync_for_device(__hp, __addr, __size, __dir) \
-       ((__hp)->dma_sync_for_device((__hp)->dma_dev, (__addr), (__size), (__dir)))
 #else
 #ifdef CONFIG_SBUS
 /* SBUS only compilation */
@@ -277,14 +269,6 @@ do {       (__txd)->tx_addr = (__force hme32)(u32)(__addr); \
        (__txd)->tx_flags = (__force hme32)(u32)(__flags); \
 } while(0)
 #define hme_read_desc32(__hp, __p)     ((__force u32)(hme32)*(__p))
-#define hme_dma_map(__hp, __ptr, __size, __dir) \
-       dma_map_single((__hp)->dma_dev, (__ptr), (__size), (__dir))
-#define hme_dma_unmap(__hp, __addr, __size, __dir) \
-       dma_unmap_single((__hp)->dma_dev, (__addr), (__size), (__dir))
-#define hme_dma_sync_for_cpu(__hp, __addr, __size, __dir) \
-       dma_dma_sync_single_for_cpu((__hp)->dma_dev, (__addr), (__size), (__dir))
-#define hme_dma_sync_for_device(__hp, __addr, __size, __dir) \
-       dma_dma_sync_single_for_device((__hp)->dma_dev, (__addr), (__size), (__dir))
 #else
 /* PCI only compilation */
 #define hme_write32(__hp, __reg, __val) \
@@ -305,14 +289,6 @@ static inline u32 hme_read_desc32(struct happy_meal *hp, hme32 *p)
 {
        return le32_to_cpup((__le32 *)p);
 }
-#define hme_dma_map(__hp, __ptr, __size, __dir) \
-       pci_map_single((__hp)->dma_dev, (__ptr), (__size), (__dir))
-#define hme_dma_unmap(__hp, __addr, __size, __dir) \
-       pci_unmap_single((__hp)->dma_dev, (__addr), (__size), (__dir))
-#define hme_dma_sync_for_cpu(__hp, __addr, __size, __dir) \
-       pci_dma_sync_single_for_cpu((__hp)->dma_dev, (__addr), (__size), (__dir))
-#define hme_dma_sync_for_device(__hp, __addr, __size, __dir) \
-       pci_dma_sync_single_for_device((__hp)->dma_dev, (__addr), (__size), (__dir))
 #endif
 #endif
 
index bc198ea..49f8c6b 100644 (file)
@@ -146,8 +146,11 @@ static void xlgmac_ethtool_get_channels(struct net_device *netdev,
        channel->tx_count = pdata->tx_q_count;
 }
 
-static int xlgmac_ethtool_get_coalesce(struct net_device *netdev,
-                                      struct ethtool_coalesce *ec)
+static int
+xlgmac_ethtool_get_coalesce(struct net_device *netdev,
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct xlgmac_pdata *pdata = netdev_priv(netdev);
 
@@ -158,8 +161,11 @@ static int xlgmac_ethtool_get_coalesce(struct net_device *netdev,
        return 0;
 }
 
-static int xlgmac_ethtool_set_coalesce(struct net_device *netdev,
-                                      struct ethtool_coalesce *ec)
+static int
+xlgmac_ethtool_set_coalesce(struct net_device *netdev,
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct xlgmac_pdata *pdata = netdev_priv(netdev);
        struct xlgmac_hw_ops *hw_ops = &pdata->hw_ops;
index 26d178f..1db7104 100644 (file)
@@ -933,7 +933,7 @@ static const struct net_device_ops xlgmac_netdev_ops = {
        .ndo_change_mtu         = xlgmac_change_mtu,
        .ndo_set_mac_address    = xlgmac_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = xlgmac_ioctl,
+       .ndo_eth_ioctl          = xlgmac_ioctl,
        .ndo_vlan_rx_add_vid    = xlgmac_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = xlgmac_vlan_rx_kill_vid,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index d054c6e..6b409f9 100644 (file)
@@ -637,7 +637,8 @@ static int bdx_range_check(struct bdx_priv *priv, u32 offset)
                -EINVAL : 0;
 }
 
-static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd)
+static int bdx_siocdevprivate(struct net_device *ndev, struct ifreq *ifr,
+                             void __user *udata, int cmd)
 {
        struct bdx_priv *priv = netdev_priv(ndev);
        u32 data[3];
@@ -647,7 +648,7 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd)
 
        DBG("jiffies=%ld cmd=%d\n", jiffies, cmd);
        if (cmd != SIOCDEVPRIVATE) {
-               error = copy_from_user(data, ifr->ifr_data, sizeof(data));
+               error = copy_from_user(data, udata, sizeof(data));
                if (error) {
                        pr_err("can't copy from user\n");
                        RET(-EFAULT);
@@ -669,7 +670,7 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd)
                data[2] = READ_REG(priv, data[1]);
                DBG("read_reg(0x%x)=0x%x (dec %d)\n", data[1], data[2],
                    data[2]);
-               error = copy_to_user(ifr->ifr_data, data, sizeof(data));
+               error = copy_to_user(udata, data, sizeof(data));
                if (error)
                        RET(-EFAULT);
                break;
@@ -688,15 +689,6 @@ static int bdx_ioctl_priv(struct net_device *ndev, struct ifreq *ifr, int cmd)
        return 0;
 }
 
-static int bdx_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
-{
-       ENTER;
-       if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
-               RET(bdx_ioctl_priv(ndev, ifr, cmd));
-       else
-               RET(-EOPNOTSUPP);
-}
-
 /**
  * __bdx_vlan_rx_vid - private helper for adding/killing VLAN vid
  * @ndev: network device
@@ -1860,7 +1852,7 @@ static const struct net_device_ops bdx_netdev_ops = {
        .ndo_stop               = bdx_close,
        .ndo_start_xmit         = bdx_tx_transmit,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = bdx_ioctl,
+       .ndo_siocdevprivate     = bdx_siocdevprivate,
        .ndo_set_rx_mode        = bdx_setmulti,
        .ndo_change_mtu         = bdx_change_mtu,
        .ndo_set_mac_address    = bdx_set_mac,
@@ -2159,8 +2151,10 @@ bdx_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
  * @netdev
  * @ecoal
  */
-static int
-bdx_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecoal)
+static int bdx_get_coalesce(struct net_device *netdev,
+                           struct ethtool_coalesce *ecoal,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        u32 rdintcm;
        u32 tdintcm;
@@ -2188,8 +2182,10 @@ bdx_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecoal)
  * @netdev
  * @ecoal
  */
-static int
-bdx_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ecoal)
+static int bdx_set_coalesce(struct net_device *netdev,
+                           struct ethtool_coalesce *ecoal,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        u32 rdintcm;
        u32 tdintcm;
index 67a08cb..130346f 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/sys_soc.h>
 #include <linux/dma/ti-cppi5.h>
 #include <linux/dma/k3-udma-glue.h>
+#include <net/switchdev.h>
 
 #include "cpsw_ale.h"
 #include "cpsw_sl.h"
@@ -518,6 +519,10 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common,
        }
 
        napi_enable(&common->napi_rx);
+       if (common->rx_irq_disabled) {
+               common->rx_irq_disabled = false;
+               enable_irq(common->rx_chns.irq);
+       }
 
        dev_dbg(common->dev, "cpsw_nuss started\n");
        return 0;
@@ -871,8 +876,12 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
 
        dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget);
 
-       if (num_rx < budget && napi_complete_done(napi_rx, num_rx))
-               enable_irq(common->rx_chns.irq);
+       if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
+               if (common->rx_irq_disabled) {
+                       common->rx_irq_disabled = false;
+                       enable_irq(common->rx_chns.irq);
+               }
+       }
 
        return num_rx;
 }
@@ -1077,19 +1086,20 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget)
        else
                num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, budget);
 
-       num_tx = min(num_tx, budget);
-       if (num_tx < budget) {
-               napi_complete(napi_tx);
+       if (num_tx >= budget)
+               return budget;
+
+       if (napi_complete_done(napi_tx, num_tx))
                enable_irq(tx_chn->irq);
-       }
 
-       return num_tx;
+       return 0;
 }
 
 static irqreturn_t am65_cpsw_nuss_rx_irq(int irq, void *dev_id)
 {
        struct am65_cpsw_common *common = dev_id;
 
+       common->rx_irq_disabled = true;
        disable_irq_nosync(irq);
        napi_schedule(&common->napi_rx);
 
@@ -1479,7 +1489,7 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops = {
        .ndo_tx_timeout         = am65_cpsw_nuss_ndo_host_tx_timeout,
        .ndo_vlan_rx_add_vid    = am65_cpsw_nuss_ndo_slave_add_vid,
        .ndo_vlan_rx_kill_vid   = am65_cpsw_nuss_ndo_slave_kill_vid,
-       .ndo_do_ioctl           = am65_cpsw_nuss_ndo_slave_ioctl,
+       .ndo_eth_ioctl          = am65_cpsw_nuss_ndo_slave_ioctl,
        .ndo_setup_tc           = am65_cpsw_qos_ndo_setup_tc,
        .ndo_get_devlink_port   = am65_cpsw_ndo_get_devlink_port,
 };
@@ -2081,10 +2091,13 @@ bool am65_cpsw_port_dev_check(const struct net_device *ndev)
        return false;
 }
 
-static int am65_cpsw_netdevice_port_link(struct net_device *ndev, struct net_device *br_ndev)
+static int am65_cpsw_netdevice_port_link(struct net_device *ndev,
+                                        struct net_device *br_ndev,
+                                        struct netlink_ext_ack *extack)
 {
        struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
        struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(ndev);
+       int err;
 
        if (!common->br_members) {
                common->hw_bridge_dev = br_ndev;
@@ -2096,6 +2109,11 @@ static int am65_cpsw_netdevice_port_link(struct net_device *ndev, struct net_dev
                        return -EOPNOTSUPP;
        }
 
+       err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
+                                           false, extack);
+       if (err)
+               return err;
+
        common->br_members |= BIT(priv->port->port_id);
 
        am65_cpsw_port_offload_fwd_mark_update(common);
@@ -2108,6 +2126,8 @@ static void am65_cpsw_netdevice_port_unlink(struct net_device *ndev)
        struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
        struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(ndev);
 
+       switchdev_bridge_port_unoffload(ndev, NULL, NULL, NULL);
+
        common->br_members &= ~BIT(priv->port->port_id);
 
        am65_cpsw_port_offload_fwd_mark_update(common);
@@ -2120,6 +2140,7 @@ static void am65_cpsw_netdevice_port_unlink(struct net_device *ndev)
 static int am65_cpsw_netdevice_event(struct notifier_block *unused,
                                     unsigned long event, void *ptr)
 {
+       struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
        struct netdev_notifier_changeupper_info *info;
        int ret = NOTIFY_DONE;
@@ -2133,7 +2154,9 @@ static int am65_cpsw_netdevice_event(struct notifier_block *unused,
 
                if (netif_is_bridge_master(info->upper_dev)) {
                        if (info->linking)
-                               ret = am65_cpsw_netdevice_port_link(ndev, info->upper_dev);
+                               ret = am65_cpsw_netdevice_port_link(ndev,
+                                                                   info->upper_dev,
+                                                                   extack);
                        else
                                am65_cpsw_netdevice_port_unlink(ndev);
                }
@@ -2388,21 +2411,6 @@ static const struct devlink_param am65_cpsw_devlink_params[] = {
                             am65_cpsw_dl_switch_mode_set, NULL),
 };
 
-static void am65_cpsw_unregister_devlink_ports(struct am65_cpsw_common *common)
-{
-       struct devlink_port *dl_port;
-       struct am65_cpsw_port *port;
-       int i;
-
-       for (i = 1; i <= common->port_num; i++) {
-               port = am65_common_get_port(common, i);
-               dl_port = &port->devlink_port;
-
-               if (dl_port->registered)
-                       devlink_port_unregister(dl_port);
-       }
-}
-
 static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common)
 {
        struct devlink_port_attrs attrs = {};
@@ -2414,14 +2422,14 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common)
        int i;
 
        common->devlink =
-               devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv));
+               devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv), dev);
        if (!common->devlink)
                return -ENOMEM;
 
        dl_priv = devlink_priv(common->devlink);
        dl_priv->common = common;
 
-       ret = devlink_register(common->devlink, dev);
+       ret = devlink_register(common->devlink);
        if (ret) {
                dev_err(dev, "devlink reg fail ret:%d\n", ret);
                goto dl_free;
@@ -2464,7 +2472,12 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common)
        return ret;
 
 dl_port_unreg:
-       am65_cpsw_unregister_devlink_ports(common);
+       for (i = i - 1; i >= 1; i--) {
+               port = am65_common_get_port(common, i);
+               dl_port = &port->devlink_port;
+
+               devlink_port_unregister(dl_port);
+       }
 dl_unreg:
        devlink_unregister(common->devlink);
 dl_free:
@@ -2475,6 +2488,17 @@ dl_free:
 
 static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common)
 {
+       struct devlink_port *dl_port;
+       struct am65_cpsw_port *port;
+       int i;
+
+       for (i = 1; i <= common->port_num; i++) {
+               port = am65_common_get_port(common, i);
+               dl_port = &port->devlink_port;
+
+               devlink_port_unregister(dl_port);
+       }
+
        if (!AM65_CPSW_IS_CPSW2G(common) &&
            IS_ENABLED(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV)) {
                devlink_params_unpublish(common->devlink);
@@ -2482,7 +2506,6 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common)
                                          ARRAY_SIZE(am65_cpsw_devlink_params));
        }
 
-       am65_cpsw_unregister_devlink_ports(common);
        devlink_unregister(common->devlink);
        devlink_free(common->devlink);
 }
index 5d93e34..048ed10 100644 (file)
@@ -126,6 +126,8 @@ struct am65_cpsw_common {
        struct am65_cpsw_rx_chn rx_chns;
        struct napi_struct      napi_rx;
 
+       bool                    rx_irq_disabled;
+
        u32                     nuss_ver;
        u32                     cpsw_ver;
        unsigned long           bus_freq;
index c207151..02d4e51 100644 (file)
@@ -1044,7 +1044,7 @@ static const struct net_device_ops cpmac_netdev_ops = {
        .ndo_start_xmit         = cpmac_start_xmit,
        .ndo_tx_timeout         = cpmac_tx_timeout,
        .ndo_set_rx_mode        = cpmac_set_multicast_list,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
 };
index cbbd0f6..66f7ddd 100644 (file)
@@ -431,7 +431,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
        skb->protocol = eth_type_trans(skb, ndev);
 
        /* mark skb for recycling */
-       skb_mark_for_recycle(skb, page, pool);
+       skb_mark_for_recycle(skb);
        netif_receive_skb(skb);
 
        ndev->stats.rx_bytes += len;
@@ -845,7 +845,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
                struct ethtool_coalesce coal;
 
                coal.rx_coalesce_usecs = cpsw->coal_intvl;
-               cpsw_set_coalesce(ndev, &coal);
+               cpsw_set_coalesce(ndev, &coal, NULL, NULL);
        }
 
        cpdma_ctlr_start(cpsw->dma);
@@ -905,7 +905,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
        struct cpdma_chan *txch;
        int ret, q_idx;
 
-       if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) {
+       if (skb_put_padto(skb, CPSW_MIN_PACKET_SIZE)) {
                cpsw_err(priv, tx_err, "packet pad failed\n");
                ndev->stats.tx_dropped++;
                return NET_XMIT_DROP;
@@ -1159,7 +1159,7 @@ static const struct net_device_ops cpsw_netdev_ops = {
        .ndo_stop               = cpsw_ndo_stop,
        .ndo_start_xmit         = cpsw_ndo_start_xmit,
        .ndo_set_mac_address    = cpsw_ndo_set_mac_address,
-       .ndo_do_ioctl           = cpsw_ndo_ioctl,
+       .ndo_eth_ioctl          = cpsw_ndo_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_tx_timeout         = cpsw_ndo_tx_timeout,
        .ndo_set_rx_mode        = cpsw_ndo_set_rx_mode,
index 4619c3a..158c8d3 100644 (file)
@@ -152,7 +152,9 @@ void cpsw_set_msglevel(struct net_device *ndev, u32 value)
        priv->msg_enable = value;
 }
 
-int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal)
+int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+                     struct kernel_ethtool_coalesce *kernel_coal,
+                     struct netlink_ext_ack *extack)
 {
        struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
@@ -160,7 +162,9 @@ int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal)
        return 0;
 }
 
-int cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal)
+int cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+                     struct kernel_ethtool_coalesce *kernel_coal,
+                     struct netlink_ext_ack *extack)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
        u32 int_ctrl;
index d1d0200..7968f24 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/kmemleak.h>
 #include <linux/sys_soc.h>
 
+#include <net/switchdev.h>
 #include <net/page_pool.h>
 #include <net/pkt_cls.h>
 #include <net/devlink.h>
@@ -374,7 +375,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
        skb->protocol = eth_type_trans(skb, ndev);
 
        /* mark skb for recycling */
-       skb_mark_for_recycle(skb, page, pool);
+       skb_mark_for_recycle(skb);
        netif_receive_skb(skb);
 
        ndev->stats.rx_bytes += len;
@@ -501,7 +502,7 @@ static void cpsw_restore(struct cpsw_priv *priv)
 
 static void cpsw_init_stp_ale_entry(struct cpsw_common *cpsw)
 {
-       char stpa[] = {0x01, 0x80, 0xc2, 0x0, 0x0, 0x0};
+       static const char stpa[] = {0x01, 0x80, 0xc2, 0x0, 0x0, 0x0};
 
        cpsw_ale_add_mcast(cpsw->ale, stpa,
                           ALE_PORT_HOST, ALE_SUPER, 0,
@@ -893,7 +894,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
                struct ethtool_coalesce coal;
 
                coal.rx_coalesce_usecs = cpsw->coal_intvl;
-               cpsw_set_coalesce(ndev, &coal);
+               cpsw_set_coalesce(ndev, &coal, NULL, NULL);
        }
 
        cpdma_ctlr_start(cpsw->dma);
@@ -1127,7 +1128,7 @@ static const struct net_device_ops cpsw_netdev_ops = {
        .ndo_stop               = cpsw_ndo_stop,
        .ndo_start_xmit         = cpsw_ndo_start_xmit,
        .ndo_set_mac_address    = cpsw_ndo_set_mac_address,
-       .ndo_do_ioctl           = cpsw_ndo_ioctl,
+       .ndo_eth_ioctl          = cpsw_ndo_ioctl,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_tx_timeout         = cpsw_ndo_tx_timeout,
        .ndo_set_rx_mode        = cpsw_ndo_set_rx_mode,
@@ -1500,10 +1501,12 @@ static void cpsw_port_offload_fwd_mark_update(struct cpsw_common *cpsw)
 }
 
 static int cpsw_netdevice_port_link(struct net_device *ndev,
-                                   struct net_device *br_ndev)
+                                   struct net_device *br_ndev,
+                                   struct netlink_ext_ack *extack)
 {
        struct cpsw_priv *priv = netdev_priv(ndev);
        struct cpsw_common *cpsw = priv->cpsw;
+       int err;
 
        if (!cpsw->br_members) {
                cpsw->hw_bridge_dev = br_ndev;
@@ -1515,6 +1518,11 @@ static int cpsw_netdevice_port_link(struct net_device *ndev,
                        return -EOPNOTSUPP;
        }
 
+       err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
+                                           false, extack);
+       if (err)
+               return err;
+
        cpsw->br_members |= BIT(priv->emac_port);
 
        cpsw_port_offload_fwd_mark_update(cpsw);
@@ -1527,6 +1535,8 @@ static void cpsw_netdevice_port_unlink(struct net_device *ndev)
        struct cpsw_priv *priv = netdev_priv(ndev);
        struct cpsw_common *cpsw = priv->cpsw;
 
+       switchdev_bridge_port_unoffload(ndev, NULL, NULL, NULL);
+
        cpsw->br_members &= ~BIT(priv->emac_port);
 
        cpsw_port_offload_fwd_mark_update(cpsw);
@@ -1539,6 +1549,7 @@ static void cpsw_netdevice_port_unlink(struct net_device *ndev)
 static int cpsw_netdevice_event(struct notifier_block *unused,
                                unsigned long event, void *ptr)
 {
+       struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
        struct netdev_notifier_changeupper_info *info;
        int ret = NOTIFY_DONE;
@@ -1553,7 +1564,8 @@ static int cpsw_netdevice_event(struct notifier_block *unused,
                if (netif_is_bridge_master(info->upper_dev)) {
                        if (info->linking)
                                ret = cpsw_netdevice_port_link(ndev,
-                                                              info->upper_dev);
+                                                              info->upper_dev,
+                                                              extack);
                        else
                                cpsw_netdevice_port_unlink(ndev);
                }
@@ -1791,14 +1803,14 @@ static int cpsw_register_devlink(struct cpsw_common *cpsw)
        struct cpsw_devlink *dl_priv;
        int ret = 0;
 
-       cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv));
+       cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv), dev);
        if (!cpsw->devlink)
                return -ENOMEM;
 
        dl_priv = devlink_priv(cpsw->devlink);
        dl_priv->cpsw = cpsw;
 
-       ret = devlink_register(cpsw->devlink, dev);
+       ret = devlink_register(cpsw->devlink);
        if (ret) {
                dev_err(dev, "DL reg fail ret:%d\n", ret);
                goto dl_free;
index 2951fb7..435668e 100644 (file)
@@ -464,8 +464,12 @@ void cpsw_mqprio_resume(struct cpsw_slave *slave, struct cpsw_priv *priv);
 /* ethtool */
 u32 cpsw_get_msglevel(struct net_device *ndev);
 void cpsw_set_msglevel(struct net_device *ndev, u32 value);
-int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal);
-int cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal);
+int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+                     struct kernel_ethtool_coalesce *kernel_coal,
+                     struct netlink_ext_ack *extack);
+int cpsw_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal,
+                     struct kernel_ethtool_coalesce *kernel_coal,
+                     struct netlink_ext_ack *extack);
 int cpsw_get_sset_count(struct net_device *ndev, int sset);
 void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data);
 void cpsw_get_ethtool_stats(struct net_device *ndev,
index c674e34..e8291d8 100644 (file)
@@ -383,12 +383,16 @@ static void emac_get_drvinfo(struct net_device *ndev,
  * emac_get_coalesce - Get interrupt coalesce settings for this device
  * @ndev : The DaVinci EMAC network adapter
  * @coal : ethtool coalesce settings structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
  *
  * Fetch the current interrupt coalesce settings
  *
  */
 static int emac_get_coalesce(struct net_device *ndev,
-                               struct ethtool_coalesce *coal)
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct emac_priv *priv = netdev_priv(ndev);
 
@@ -401,12 +405,16 @@ static int emac_get_coalesce(struct net_device *ndev,
  * emac_set_coalesce - Set interrupt coalesce settings for this device
  * @ndev : The DaVinci EMAC network adapter
  * @coal : ethtool coalesce settings structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
  *
  * Set interrupt coalesce parameters
  *
  */
 static int emac_set_coalesce(struct net_device *ndev,
-                               struct ethtool_coalesce *coal)
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct emac_priv *priv = netdev_priv(ndev);
        u32 int_ctrl, num_interrupts = 0;
@@ -943,7 +951,7 @@ static int emac_dev_xmit(struct sk_buff *skb, struct net_device *ndev)
                goto fail_tx;
        }
 
-       ret_code = skb_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE);
+       ret_code = skb_put_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE);
        if (unlikely(ret_code < 0)) {
                if (netif_msg_tx_err(priv) && net_ratelimit())
                        dev_err(emac_dev, "DaVinci EMAC: packet pad failed");
@@ -1462,7 +1470,7 @@ static int emac_dev_open(struct net_device *ndev)
                struct ethtool_coalesce coal;
 
                coal.rx_coalesce_usecs = (priv->coal_intvl << 4);
-               emac_set_coalesce(ndev, &coal);
+               emac_set_coalesce(ndev, &coal, NULL, NULL);
        }
 
        cpdma_ctlr_start(priv->dma);
@@ -1670,7 +1678,7 @@ static const struct net_device_ops emac_netdev_ops = {
        .ndo_start_xmit         = emac_dev_xmit,
        .ndo_set_rx_mode        = emac_dev_mcast_set,
        .ndo_set_mac_address    = emac_dev_setmac_addr,
-       .ndo_do_ioctl           = emac_devioctl,
+       .ndo_eth_ioctl          = emac_devioctl,
        .ndo_tx_timeout         = emac_dev_tx_timeout,
        .ndo_get_stats          = emac_dev_getnetstats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 97942b0..eda2961 100644 (file)
@@ -1944,7 +1944,7 @@ static const struct net_device_ops netcp_netdev_ops = {
        .ndo_stop               = netcp_ndo_stop,
        .ndo_start_xmit         = netcp_ndo_start_xmit,
        .ndo_set_rx_mode        = netcp_set_rx_mode,
-       .ndo_do_ioctl           = netcp_ndo_ioctl,
+       .ndo_eth_ioctl           = netcp_ndo_ioctl,
        .ndo_get_stats64        = netcp_get_stats,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
index e0cb713..77c448a 100644 (file)
@@ -749,7 +749,7 @@ static const struct net_device_ops tlan_netdev_ops = {
        .ndo_tx_timeout         = tlan_tx_timeout,
        .ndo_get_stats          = tlan_get_stats,
        .ndo_set_rx_mode        = tlan_set_multicast_list,
-       .ndo_do_ioctl           = tlan_ioctl,
+       .ndo_eth_ioctl          = tlan_ioctl,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 226a766..66d4e02 100644 (file)
@@ -354,9 +354,10 @@ spider_net_free_rx_chain_contents(struct spider_net_card *card)
        descr = card->rx_chain.head;
        do {
                if (descr->skb) {
-                       pci_unmap_single(card->pdev, descr->hwdescr->buf_addr,
+                       dma_unmap_single(&card->pdev->dev,
+                                        descr->hwdescr->buf_addr,
                                         SPIDER_NET_MAX_FRAME,
-                                        PCI_DMA_BIDIRECTIONAL);
+                                        DMA_BIDIRECTIONAL);
                        dev_kfree_skb(descr->skb);
                        descr->skb = NULL;
                }
@@ -411,9 +412,9 @@ spider_net_prepare_rx_descr(struct spider_net_card *card,
        if (offset)
                skb_reserve(descr->skb, SPIDER_NET_RXBUF_ALIGN - offset);
        /* iommu-map the skb */
-       buf = pci_map_single(card->pdev, descr->skb->data,
-                       SPIDER_NET_MAX_FRAME, PCI_DMA_FROMDEVICE);
-       if (pci_dma_mapping_error(card->pdev, buf)) {
+       buf = dma_map_single(&card->pdev->dev, descr->skb->data,
+                            SPIDER_NET_MAX_FRAME, DMA_FROM_DEVICE);
+       if (dma_mapping_error(&card->pdev->dev, buf)) {
                dev_kfree_skb_any(descr->skb);
                descr->skb = NULL;
                if (netif_msg_rx_err(card) && net_ratelimit())
@@ -653,8 +654,9 @@ spider_net_prepare_tx_descr(struct spider_net_card *card,
        dma_addr_t buf;
        unsigned long flags;
 
-       buf = pci_map_single(card->pdev, skb->data, skb->len, PCI_DMA_TODEVICE);
-       if (pci_dma_mapping_error(card->pdev, buf)) {
+       buf = dma_map_single(&card->pdev->dev, skb->data, skb->len,
+                            DMA_TO_DEVICE);
+       if (dma_mapping_error(&card->pdev->dev, buf)) {
                if (netif_msg_tx_err(card) && net_ratelimit())
                        dev_err(&card->netdev->dev, "could not iommu-map packet (%p, %i). "
                                  "Dropping packet\n", skb->data, skb->len);
@@ -666,7 +668,8 @@ spider_net_prepare_tx_descr(struct spider_net_card *card,
        descr = card->tx_chain.head;
        if (descr->next == chain->tail->prev) {
                spin_unlock_irqrestore(&chain->lock, flags);
-               pci_unmap_single(card->pdev, buf, skb->len, PCI_DMA_TODEVICE);
+               dma_unmap_single(&card->pdev->dev, buf, skb->len,
+                                DMA_TO_DEVICE);
                return -ENOMEM;
        }
        hwdescr = descr->hwdescr;
@@ -822,8 +825,8 @@ spider_net_release_tx_chain(struct spider_net_card *card, int brutal)
 
                /* unmap the skb */
                if (skb) {
-                       pci_unmap_single(card->pdev, buf_addr, skb->len,
-                                       PCI_DMA_TODEVICE);
+                       dma_unmap_single(&card->pdev->dev, buf_addr, skb->len,
+                                        DMA_TO_DEVICE);
                        dev_consume_skb_any(skb);
                }
        }
@@ -1165,8 +1168,8 @@ spider_net_decode_one_descr(struct spider_net_card *card)
        /* unmap descriptor */
        hw_buf_addr = hwdescr->buf_addr;
        hwdescr->buf_addr = 0xffffffff;
-       pci_unmap_single(card->pdev, hw_buf_addr,
-                       SPIDER_NET_MAX_FRAME, PCI_DMA_FROMDEVICE);
+       dma_unmap_single(&card->pdev->dev, hw_buf_addr, SPIDER_NET_MAX_FRAME,
+                        DMA_FROM_DEVICE);
 
        if ( (status == SPIDER_NET_DESCR_RESPONSE_ERROR) ||
             (status == SPIDER_NET_DESCR_PROTECTION_ERROR) ||
@@ -2214,7 +2217,7 @@ static const struct net_device_ops spider_net_ops = {
        .ndo_start_xmit         = spider_net_xmit,
        .ndo_set_rx_mode        = spider_net_set_multi,
        .ndo_set_mac_address    = spider_net_set_mac,
-       .ndo_do_ioctl           = spider_net_do_ioctl,
+       .ndo_eth_ioctl          = spider_net_do_ioctl,
        .ndo_tx_timeout         = spider_net_tx_timeout,
        .ndo_validate_addr      = eth_validate_addr,
        /* HW VLAN */
index fedb2bf..52245ac 100644 (file)
@@ -750,7 +750,7 @@ static const struct net_device_ops tc35815_netdev_ops = {
        .ndo_get_stats          = tc35815_get_stats,
        .ndo_set_rx_mode        = tc35815_set_multicast_list,
        .ndo_tx_timeout         = tc35815_tx_timeout,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index c62f474..cf0917b 100644 (file)
@@ -1538,7 +1538,7 @@ static const struct net_device_ops tsi108_netdev_ops = {
        .ndo_start_xmit         = tsi108_send_packet,
        .ndo_set_rx_mode        = tsi108_set_rx_mode,
        .ndo_get_stats          = tsi108_get_stats,
-       .ndo_do_ioctl           = tsi108_do_ioctl,
+       .ndo_eth_ioctl          = tsi108_do_ioctl,
        .ndo_set_mac_address    = tsi108_set_mac,
        .ndo_validate_addr      = eth_validate_addr,
 };
index 73ca597..3b73a9c 100644 (file)
@@ -884,7 +884,7 @@ static const struct net_device_ops rhine_netdev_ops = {
        .ndo_set_rx_mode         = rhine_set_rx_mode,
        .ndo_validate_addr       = eth_validate_addr,
        .ndo_set_mac_address     = eth_mac_addr,
-       .ndo_do_ioctl            = netdev_ioctl,
+       .ndo_eth_ioctl           = netdev_ioctl,
        .ndo_tx_timeout          = rhine_tx_timeout,
        .ndo_vlan_rx_add_vid     = rhine_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid    = rhine_vlan_rx_kill_vid,
@@ -1113,13 +1113,12 @@ err_out:
 
 static int rhine_init_one_platform(struct platform_device *pdev)
 {
-       const struct of_device_id *match;
        const u32 *quirks;
        int irq;
        void __iomem *ioaddr;
 
-       match = of_match_device(rhine_of_tbl, &pdev->dev);
-       if (!match)
+       quirks = of_device_get_match_data(&pdev->dev);
+       if (!quirks)
                return -EINVAL;
 
        ioaddr = devm_platform_ioremap_resource(pdev, 0);
@@ -1130,10 +1129,6 @@ static int rhine_init_one_platform(struct platform_device *pdev)
        if (!irq)
                return -EINVAL;
 
-       quirks = match->data;
-       if (!quirks)
-               return -EINVAL;
-
        return rhine_init_one_common(&pdev->dev, *quirks,
                                     (long)ioaddr, ioaddr, irq);
 }
index 88426b5..4b9c30f 100644 (file)
@@ -2637,7 +2637,7 @@ static const struct net_device_ops velocity_netdev_ops = {
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_set_rx_mode        = velocity_set_multi,
        .ndo_change_mtu         = velocity_change_mtu,
-       .ndo_do_ioctl           = velocity_ioctl,
+       .ndo_eth_ioctl          = velocity_ioctl,
        .ndo_vlan_rx_add_vid    = velocity_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = velocity_vlan_rx_kill_vid,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2943,14 +2943,12 @@ static void velocity_pci_remove(struct pci_dev *pdev)
 
 static int velocity_platform_probe(struct platform_device *pdev)
 {
-       const struct of_device_id *of_id;
        const struct velocity_info_tbl *info;
        int irq;
 
-       of_id = of_match_device(velocity_of_ids, &pdev->dev);
-       if (!of_id)
+       info = of_device_get_match_data(&pdev->dev);
+       if (!info)
                return -EINVAL;
-       info = of_id->data;
 
        irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
        if (!irq)
@@ -3520,7 +3518,9 @@ static void set_pending_timer_val(int *val, u32 us)
 
 
 static int velocity_get_coalesce(struct net_device *dev,
-               struct ethtool_coalesce *ecmd)
+                                struct ethtool_coalesce *ecmd,
+                                struct kernel_ethtool_coalesce *kernel_coal,
+                                struct netlink_ext_ack *extack)
 {
        struct velocity_info *vptr = netdev_priv(dev);
 
@@ -3534,7 +3534,9 @@ static int velocity_get_coalesce(struct net_device *dev,
 }
 
 static int velocity_set_coalesce(struct net_device *dev,
-               struct ethtool_coalesce *ecmd)
+                                struct ethtool_coalesce *ecmd,
+                                struct kernel_ethtool_coalesce *kernel_coal,
+                                struct netlink_ext_ack *extack)
 {
        struct velocity_info *vptr = netdev_priv(dev);
        int max_us = 0x3f * 64;
index 811815f..f974e70 100644 (file)
@@ -1047,6 +1047,8 @@ static int w5100_mmio_probe(struct platform_device *pdev)
                mac_addr = data->mac_addr;
 
        mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!mem)
+               return -EINVAL;
        if (resource_size(mem) < W5100_BUS_DIRECT_SIZE)
                ops = &w5100_mmio_indirect_ops;
        else
index 60a4f79..463094c 100644 (file)
@@ -1237,7 +1237,7 @@ static const struct net_device_ops temac_netdev_ops = {
        .ndo_set_rx_mode = temac_set_multicast_list,
        .ndo_set_mac_address = temac_set_mac_address,
        .ndo_validate_addr = eth_validate_addr,
-       .ndo_do_ioctl = phy_do_ioctl_running,
+       .ndo_eth_ioctl = phy_do_ioctl_running,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller = temac_poll_controller,
 #endif
@@ -1310,8 +1310,11 @@ static int ll_temac_ethtools_set_ringparam(struct net_device *ndev,
        return 0;
 }
 
-static int ll_temac_ethtools_get_coalesce(struct net_device *ndev,
-                                         struct ethtool_coalesce *ec)
+static int
+ll_temac_ethtools_get_coalesce(struct net_device *ndev,
+                              struct ethtool_coalesce *ec,
+                              struct kernel_ethtool_coalesce *kernel_coal,
+                              struct netlink_ext_ack *extack)
 {
        struct temac_local *lp = netdev_priv(ndev);
 
@@ -1322,8 +1325,11 @@ static int ll_temac_ethtools_get_coalesce(struct net_device *ndev,
        return 0;
 }
 
-static int ll_temac_ethtools_set_coalesce(struct net_device *ndev,
-                                         struct ethtool_coalesce *ec)
+static int
+ll_temac_ethtools_set_coalesce(struct net_device *ndev,
+                              struct ethtool_coalesce *ec,
+                              struct kernel_ethtool_coalesce *kernel_coal,
+                              struct netlink_ext_ack *extack)
 {
        struct temac_local *lp = netdev_priv(ndev);
 
index 13cd799..871b5ec 100644 (file)
@@ -1227,7 +1227,7 @@ static const struct net_device_ops axienet_netdev_ops = {
        .ndo_change_mtu = axienet_change_mtu,
        .ndo_set_mac_address = netdev_set_mac_address,
        .ndo_validate_addr = eth_validate_addr,
-       .ndo_do_ioctl = axienet_ioctl,
+       .ndo_eth_ioctl = axienet_ioctl,
        .ndo_set_rx_mode = axienet_set_multicast_list,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller = axienet_poll_controller,
@@ -1400,6 +1400,8 @@ axienet_ethtools_set_pauseparam(struct net_device *ndev,
  * axienet_ethtools_get_coalesce - Get DMA interrupt coalescing count.
  * @ndev:      Pointer to net_device structure
  * @ecoalesce: Pointer to ethtool_coalesce structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack:    extack for reporting error messages
  *
  * This implements ethtool command for getting the DMA interrupt coalescing
  * count on Tx and Rx paths. Issue "ethtool -c ethX" under linux prompt to
@@ -1407,8 +1409,11 @@ axienet_ethtools_set_pauseparam(struct net_device *ndev,
  *
  * Return: 0 always
  */
-static int axienet_ethtools_get_coalesce(struct net_device *ndev,
-                                        struct ethtool_coalesce *ecoalesce)
+static int
+axienet_ethtools_get_coalesce(struct net_device *ndev,
+                             struct ethtool_coalesce *ecoalesce,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        u32 regval = 0;
        struct axienet_local *lp = netdev_priv(ndev);
@@ -1425,6 +1430,8 @@ static int axienet_ethtools_get_coalesce(struct net_device *ndev,
  * axienet_ethtools_set_coalesce - Set DMA interrupt coalescing count.
  * @ndev:      Pointer to net_device structure
  * @ecoalesce: Pointer to ethtool_coalesce structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack:    extack for reporting error messages
  *
  * This implements ethtool command for setting the DMA interrupt coalescing
  * count on Tx and Rx paths. Issue "ethtool -C ethX rx-frames 5" under linux
@@ -1432,8 +1439,11 @@ static int axienet_ethtools_get_coalesce(struct net_device *ndev,
  *
  * Return: 0, on success, Non-zero error value on failure.
  */
-static int axienet_ethtools_set_coalesce(struct net_device *ndev,
-                                        struct ethtool_coalesce *ecoalesce)
+static int
+axienet_ethtools_set_coalesce(struct net_device *ndev,
+                             struct ethtool_coalesce *ecoalesce,
+                             struct kernel_ethtool_coalesce *kernel_coal,
+                             struct netlink_ext_ack *extack)
 {
        struct axienet_local *lp = netdev_priv(ndev);
 
index b06377f..b780aad 100644 (file)
@@ -1263,7 +1263,7 @@ static const struct net_device_ops xemaclite_netdev_ops = {
        .ndo_start_xmit         = xemaclite_send,
        .ndo_set_mac_address    = xemaclite_set_mac_address,
        .ndo_tx_timeout         = xemaclite_tx_timeout,
-       .ndo_do_ioctl           = xemaclite_ioctl,
+       .ndo_eth_ioctl          = xemaclite_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller = xemaclite_poll_controller,
 #endif
index 4f6db6f..ae611e4 100644 (file)
@@ -464,7 +464,7 @@ static const struct net_device_ops netdev_ops = {
        .ndo_start_xmit         = do_start_xmit,
        .ndo_tx_timeout         = xirc_tx_timeout,
        .ndo_set_config         = do_config,
-       .ndo_do_ioctl           = do_ioctl,
+       .ndo_eth_ioctl          = do_ioctl,
        .ndo_set_rx_mode        = set_multicast_list,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
index 468ffe3..0e878fa 100644 (file)
@@ -29,9 +29,9 @@ config IXP4XX_ETH
          on IXP4xx processor.
 
 config PTP_1588_CLOCK_IXP46X
-       tristate "Intel IXP46x as PTP clock"
+       bool "Intel IXP46x as PTP clock"
        depends on IXP4XX_ETH
-       depends on PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK=y || PTP_1588_CLOCK=IXP4XX_ETH
        default y
        help
          This driver adds support for using the IXP46X as a PTP
index 607f91b..e935f2a 100644 (file)
@@ -3,5 +3,9 @@
 # Makefile for the Intel XScale IXP device drivers.
 #
 
+# Keep this link order to avoid deferred probing
+ifdef CONFIG_PTP_1588_CLOCK_IXP46X
+obj-$(CONFIG_IXP4XX_ETH)               += ptp_ixp46x.o
+endif
+
 obj-$(CONFIG_IXP4XX_ETH)               += ixp4xx_eth.o
-obj-$(CONFIG_PTP_1588_CLOCK_IXP46X)    += ptp_ixp46x.o
index d792130..ee9b93d 100644 (file)
@@ -62,7 +62,16 @@ struct ixp46x_ts_regs {
 #define TX_SNAPSHOT_LOCKED (1<<0)
 #define RX_SNAPSHOT_LOCKED (1<<1)
 
-/* The ptp_ixp46x module will set this variable */
-extern int ixp46x_phc_index;
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK_IXP46X)
+int ixp46x_ptp_find(struct ixp46x_ts_regs *__iomem *regs, int *phc_index);
+#else
+static inline int ixp46x_ptp_find(struct ixp46x_ts_regs *__iomem *regs, int *phc_index)
+{
+       *regs = NULL;
+       *phc_index = -1;
+
+       return -ENODEV;
+}
+#endif
 
 #endif
index 7ae754e..931494c 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/module.h>
 #include <linux/soc/ixp4xx/npe.h>
 #include <linux/soc/ixp4xx/qmgr.h>
-#include <mach/hardware.h>
 #include <linux/soc/ixp4xx/cpu.h>
 
 #include "ixp46x_ts.h"
@@ -169,13 +168,15 @@ struct eth_regs {
 
 struct port {
        struct eth_regs __iomem *regs;
+       struct ixp46x_ts_regs __iomem *timesync_regs;
+       int phc_index;
        struct npe *npe;
        struct net_device *netdev;
        struct napi_struct napi;
        struct eth_plat_info *plat;
        buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS];
        struct desc *desc_tab;  /* coherent */
-       u32 desc_tab_phys;
+       dma_addr_t desc_tab_phys;
        int id;                 /* logical port ID */
        int speed, duplex;
        u8 firmware[4];
@@ -295,7 +296,7 @@ static void ixp_rx_timestamp(struct port *port, struct sk_buff *skb)
 
        ch = PORT2CHANNEL(port);
 
-       regs = (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+       regs = port->timesync_regs;
 
        val = __raw_readl(&regs->channel[ch].ch_event);
 
@@ -340,7 +341,7 @@ static void ixp_tx_timestamp(struct port *port, struct sk_buff *skb)
 
        ch = PORT2CHANNEL(port);
 
-       regs = (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+       regs = port->timesync_regs;
 
        /*
         * This really stinks, but we have to poll for the Tx time stamp.
@@ -375,6 +376,7 @@ static int hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
        struct hwtstamp_config cfg;
        struct ixp46x_ts_regs *regs;
        struct port *port = netdev_priv(netdev);
+       int ret;
        int ch;
 
        if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
@@ -383,8 +385,12 @@ static int hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
        if (cfg.flags) /* reserved for future extensions */
                return -EINVAL;
 
+       ret = ixp46x_ptp_find(&port->timesync_regs, &port->phc_index);
+       if (ret)
+               return ret;
+
        ch = PORT2CHANNEL(port);
-       regs = (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+       regs = port->timesync_regs;
 
        if (cfg.tx_type != HWTSTAMP_TX_OFF && cfg.tx_type != HWTSTAMP_TX_ON)
                return -ERANGE;
@@ -850,14 +856,14 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev)
        bytes = len;
        mem = skb->data;
 #else
-       offset = (int)skb->data & 3; /* keep 32-bit alignment */
+       offset = (uintptr_t)skb->data & 3; /* keep 32-bit alignment */
        bytes = ALIGN(offset + len, 4);
        if (!(mem = kmalloc(bytes, GFP_ATOMIC))) {
                dev_kfree_skb(skb);
                dev->stats.tx_dropped++;
                return NETDEV_TX_OK;
        }
-       memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4);
+       memcpy_swab32(mem, (u32 *)((uintptr_t)skb->data & ~3), bytes / 4);
 #endif
 
        phys = dma_map_single(&dev->dev, mem, bytes, DMA_TO_DEVICE);
@@ -988,25 +994,27 @@ static void ixp4xx_get_drvinfo(struct net_device *dev,
        strlcpy(info->bus_info, "internal", sizeof(info->bus_info));
 }
 
-int ixp46x_phc_index = -1;
-EXPORT_SYMBOL_GPL(ixp46x_phc_index);
-
 static int ixp4xx_get_ts_info(struct net_device *dev,
                              struct ethtool_ts_info *info)
 {
-       if (!cpu_is_ixp46x()) {
+       struct port *port = netdev_priv(dev);
+
+       if (port->phc_index < 0)
+               ixp46x_ptp_find(&port->timesync_regs, &port->phc_index);
+
+       info->phc_index = port->phc_index;
+
+       if (info->phc_index < 0) {
                info->so_timestamping =
                        SOF_TIMESTAMPING_TX_SOFTWARE |
                        SOF_TIMESTAMPING_RX_SOFTWARE |
                        SOF_TIMESTAMPING_SOFTWARE;
-               info->phc_index = -1;
                return 0;
        }
        info->so_timestamping =
                SOF_TIMESTAMPING_TX_HARDWARE |
                SOF_TIMESTAMPING_RX_HARDWARE |
                SOF_TIMESTAMPING_RAW_HARDWARE;
-       info->phc_index = ixp46x_phc_index;
        info->tx_types =
                (1 << HWTSTAMP_TX_OFF) |
                (1 << HWTSTAMP_TX_ON);
@@ -1357,7 +1365,7 @@ static const struct net_device_ops ixp4xx_netdev_ops = {
        .ndo_stop = eth_close,
        .ndo_start_xmit = eth_xmit,
        .ndo_set_rx_mode = eth_set_mcast_list,
-       .ndo_do_ioctl = eth_ioctl,
+       .ndo_eth_ioctl = eth_ioctl,
        .ndo_set_mac_address = eth_mac_addr,
        .ndo_validate_addr = eth_validate_addr,
 };
@@ -1481,6 +1489,7 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
        port = netdev_priv(ndev);
        port->netdev = ndev;
        port->id = plat->npe;
+       port->phc_index = -1;
 
        /* Get the port resource and remap */
        port->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
index a6fb88f..ecece21 100644 (file)
@@ -5,14 +5,16 @@
  * Copyright (C) 2010 OMICRON electronics GmbH
  */
 #include <linux/device.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/ptp_clock_kernel.h>
+#include <linux/platform_device.h>
 #include <linux/soc/ixp4xx/cpu.h>
 #include <linux/module.h>
 #include <mach/ixp4xx-regs.h>
 
 #define DRIVER         "ptp_ixp46x"
 #define N_EXT_TS       2
-#define MASTER_GPIO    8
-#define MASTER_IRQ     25
-#define SLAVE_GPIO     7
-#define SLAVE_IRQ      24
 
 struct ixp_clock {
        struct ixp46x_ts_regs *regs;
@@ -32,9 +30,11 @@ struct ixp_clock {
        struct ptp_clock_info caps;
        int exts0_enabled;
        int exts1_enabled;
+       int slave_irq;
+       int master_irq;
 };
 
-DEFINE_SPINLOCK(register_lock);
+static DEFINE_SPINLOCK(register_lock);
 
 /*
  * Register access functions
@@ -243,53 +243,38 @@ static const struct ptp_clock_info ptp_ixp_caps = {
 
 static struct ixp_clock ixp_clock;
 
-static int setup_interrupt(int gpio)
+int ixp46x_ptp_find(struct ixp46x_ts_regs *__iomem *regs, int *phc_index)
 {
-       int irq;
-       int err;
-
-       err = gpio_request(gpio, "ixp4-ptp");
-       if (err)
-               return err;
-
-       err = gpio_direction_input(gpio);
-       if (err)
-               return err;
-
-       irq = gpio_to_irq(gpio);
-       if (irq < 0)
-               return irq;
+       *regs = ixp_clock.regs;
+       *phc_index = ptp_clock_index(ixp_clock.ptp_clock);
 
-       err = irq_set_irq_type(irq, IRQF_TRIGGER_FALLING);
-       if (err) {
-               pr_err("cannot set trigger type for irq %d\n", irq);
-               return err;
-       }
-
-       err = request_irq(irq, isr, 0, DRIVER, &ixp_clock);
-       if (err) {
-               pr_err("request_irq failed for irq %d\n", irq);
-               return err;
-       }
+       if (!ixp_clock.ptp_clock)
+               return -EPROBE_DEFER;
 
-       return irq;
+       return 0;
 }
+EXPORT_SYMBOL_GPL(ixp46x_ptp_find);
 
-static void __exit ptp_ixp_exit(void)
+/* Called from the registered devm action */
+static void ptp_ixp_unregister_action(void *d)
 {
-       free_irq(MASTER_IRQ, &ixp_clock);
-       free_irq(SLAVE_IRQ, &ixp_clock);
-       ixp46x_phc_index = -1;
-       ptp_clock_unregister(ixp_clock.ptp_clock);
+       struct ptp_clock *ptp_clock = d;
+
+       ptp_clock_unregister(ptp_clock);
+       ixp_clock.ptp_clock = NULL;
 }
 
-static int __init ptp_ixp_init(void)
+static int ptp_ixp_probe(struct platform_device *pdev)
 {
-       if (!cpu_is_ixp46x())
-               return -ENODEV;
+       struct device *dev = &pdev->dev;
+       int ret;
 
-       ixp_clock.regs =
-               (struct ixp46x_ts_regs __iomem *) IXP4XX_TIMESYNC_BASE_VIRT;
+       ixp_clock.regs = devm_platform_ioremap_resource(pdev, 0);
+       ixp_clock.master_irq = platform_get_irq(pdev, 0);
+       ixp_clock.slave_irq = platform_get_irq(pdev, 1);
+       if (IS_ERR(ixp_clock.regs) ||
+           !ixp_clock.master_irq || !ixp_clock.slave_irq)
+               return -ENXIO;
 
        ixp_clock.caps = ptp_ixp_caps;
 
@@ -298,32 +283,51 @@ static int __init ptp_ixp_init(void)
        if (IS_ERR(ixp_clock.ptp_clock))
                return PTR_ERR(ixp_clock.ptp_clock);
 
-       ixp46x_phc_index = ptp_clock_index(ixp_clock.ptp_clock);
+       ret = devm_add_action_or_reset(dev, ptp_ixp_unregister_action,
+                                      ixp_clock.ptp_clock);
+       if (ret) {
+               dev_err(dev, "failed to install clock removal handler\n");
+               return ret;
+       }
 
        __raw_writel(DEFAULT_ADDEND, &ixp_clock.regs->addend);
        __raw_writel(1, &ixp_clock.regs->trgt_lo);
        __raw_writel(0, &ixp_clock.regs->trgt_hi);
        __raw_writel(TTIPEND, &ixp_clock.regs->event);
 
-       if (MASTER_IRQ != setup_interrupt(MASTER_GPIO)) {
-               pr_err("failed to setup gpio %d as irq\n", MASTER_GPIO);
-               goto no_master;
-       }
-       if (SLAVE_IRQ != setup_interrupt(SLAVE_GPIO)) {
-               pr_err("failed to setup gpio %d as irq\n", SLAVE_GPIO);
-               goto no_slave;
-       }
+       ret = devm_request_irq(dev, ixp_clock.master_irq, isr,
+                              0, DRIVER, &ixp_clock);
+       if (ret)
+               return dev_err_probe(dev, ret,
+                                    "request_irq failed for irq %d\n",
+                                    ixp_clock.master_irq);
+
+       ret = devm_request_irq(dev, ixp_clock.slave_irq, isr,
+                              0, DRIVER, &ixp_clock);
+       if (ret)
+               return dev_err_probe(dev, ret,
+                                    "request_irq failed for irq %d\n",
+                                    ixp_clock.slave_irq);
 
        return 0;
-no_slave:
-       free_irq(MASTER_IRQ, &ixp_clock);
-no_master:
-       ptp_clock_unregister(ixp_clock.ptp_clock);
-       return -ENODEV;
 }
 
-module_init(ptp_ixp_init);
-module_exit(ptp_ixp_exit);
+static const struct of_device_id ptp_ixp_match[] = {
+       {
+               .compatible = "intel,ixp46x-ptp-timer",
+       },
+       { },
+};
+
+static struct platform_driver ptp_ixp_driver = {
+       .driver = {
+               .name = "ptp-ixp46x",
+               .of_match_table = ptp_ixp_match,
+               .suppress_bind_attrs = true,
+       },
+       .probe = ptp_ixp_probe,
+};
+module_platform_driver(ptp_ixp_driver);
 
 MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
 MODULE_DESCRIPTION("PTP clock using the IXP46X timer");
index 69c29a2..c5cb421 100644 (file)
@@ -70,6 +70,7 @@ static const char * const boot_msg =
 /* Include files */
 
 #include <linux/capability.h>
+#include <linux/compat.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -103,7 +104,8 @@ static struct net_device_stats *skfp_ctl_get_stats(struct net_device *dev);
 static void skfp_ctl_set_multicast_list(struct net_device *dev);
 static void skfp_ctl_set_multicast_list_wo_lock(struct net_device *dev);
 static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr);
-static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int skfp_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                              void __user *data, int cmd);
 static netdev_tx_t skfp_send_pkt(struct sk_buff *skb,
                                       struct net_device *dev);
 static void send_queued_packets(struct s_smc *smc);
@@ -164,7 +166,7 @@ static const struct net_device_ops skfp_netdev_ops = {
        .ndo_get_stats          = skfp_ctl_get_stats,
        .ndo_set_rx_mode        = skfp_ctl_set_multicast_list,
        .ndo_set_mac_address    = skfp_ctl_set_mac_address,
-       .ndo_do_ioctl           = skfp_ioctl,
+       .ndo_siocdevprivate     = skfp_siocdevprivate,
 };
 
 /*
@@ -932,9 +934,9 @@ static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr)
 
 
 /*
- * ==============
- * = skfp_ioctl =
- * ==============
+ * =======================
+ * = skfp_siocdevprivate =
+ * =======================
  *   
  * Overview:
  *
@@ -954,16 +956,19 @@ static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr)
  */
 
 
-static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int skfp_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd)
 {
        struct s_smc *smc = netdev_priv(dev);
        skfddi_priv *lp = &smc->os;
        struct s_skfp_ioctl ioc;
        int status = 0;
 
-       if (copy_from_user(&ioc, rq->ifr_data, sizeof(struct s_skfp_ioctl)))
+       if (copy_from_user(&ioc, data, sizeof(struct s_skfp_ioctl)))
                return -EFAULT;
 
+       if (in_compat_syscall())
+               return -EOPNOTSUPP;
+
        switch (ioc.cmd) {
        case SKFP_GET_STATS:    /* Get the driver statistics */
                ioc.len = sizeof(lp->MacStat);
@@ -1169,8 +1174,8 @@ static void send_queued_packets(struct s_smc *smc)
 
                txd = (struct s_smt_fp_txd *) HWM_GET_CURR_TXD(smc, queue);
 
-               dma_address = pci_map_single(&bp->pdev, skb->data,
-                                            skb->len, PCI_DMA_TODEVICE);
+               dma_address = dma_map_single(&(&bp->pdev)->dev, skb->data,
+                                            skb->len, DMA_TO_DEVICE);
                if (frame_status & LAN_TX) {
                        txd->txd_os.skb = skb;                  // save skb
                        txd->txd_os.dma_addr = dma_address;     // save dma mapping
@@ -1179,8 +1184,8 @@ static void send_queued_packets(struct s_smc *smc)
                       frame_status | FIRST_FRAG | LAST_FRAG | EN_IRQ_EOF);
 
                if (!(frame_status & LAN_TX)) {         // local only frame
-                       pci_unmap_single(&bp->pdev, dma_address,
-                                        skb->len, PCI_DMA_TODEVICE);
+                       dma_unmap_single(&(&bp->pdev)->dev, dma_address,
+                                        skb->len, DMA_TO_DEVICE);
                        dev_kfree_skb_irq(skb);
                }
                spin_unlock_irqrestore(&bp->DriverLock, Flags);
@@ -1462,8 +1467,9 @@ void dma_complete(struct s_smc *smc, volatile union s_fp_descr *descr, int flag)
                if (r->rxd_os.skb && r->rxd_os.dma_addr) {
                        int MaxFrameSize = bp->MaxFrameSize;
 
-                       pci_unmap_single(&bp->pdev, r->rxd_os.dma_addr,
-                                        MaxFrameSize, PCI_DMA_FROMDEVICE);
+                       dma_unmap_single(&(&bp->pdev)->dev,
+                                        r->rxd_os.dma_addr, MaxFrameSize,
+                                        DMA_FROM_DEVICE);
                        r->rxd_os.dma_addr = 0;
                }
        }
@@ -1498,8 +1504,8 @@ void mac_drv_tx_complete(struct s_smc *smc, volatile struct s_smt_fp_txd *txd)
        txd->txd_os.skb = NULL;
 
        // release the DMA mapping
-       pci_unmap_single(&smc->os.pdev, txd->txd_os.dma_addr,
-                        skb->len, PCI_DMA_TODEVICE);
+       dma_unmap_single(&(&smc->os.pdev)->dev, txd->txd_os.dma_addr,
+                        skb->len, DMA_TO_DEVICE);
        txd->txd_os.dma_addr = 0;
 
        smc->os.MacStat.gen.tx_packets++;       // Count transmitted packets.
@@ -1702,10 +1708,9 @@ void mac_drv_requeue_rxd(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
                                skb_reserve(skb, 3);
                                skb_put(skb, MaxFrameSize);
                                v_addr = skb->data;
-                               b_addr = pci_map_single(&smc->os.pdev,
-                                                       v_addr,
-                                                       MaxFrameSize,
-                                                       PCI_DMA_FROMDEVICE);
+                               b_addr = dma_map_single(&(&smc->os.pdev)->dev,
+                                                       v_addr, MaxFrameSize,
+                                                       DMA_FROM_DEVICE);
                                rxd->rxd_os.dma_addr = b_addr;
                        } else {
                                // no skb available, use local buffer
@@ -1718,10 +1723,8 @@ void mac_drv_requeue_rxd(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
                        // we use skb from old rxd
                        rxd->rxd_os.skb = skb;
                        v_addr = skb->data;
-                       b_addr = pci_map_single(&smc->os.pdev,
-                                               v_addr,
-                                               MaxFrameSize,
-                                               PCI_DMA_FROMDEVICE);
+                       b_addr = dma_map_single(&(&smc->os.pdev)->dev, v_addr,
+                                               MaxFrameSize, DMA_FROM_DEVICE);
                        rxd->rxd_os.dma_addr = b_addr;
                }
                hwm_rx_frag(smc, v_addr, b_addr, MaxFrameSize,
@@ -1773,10 +1776,8 @@ void mac_drv_fill_rxd(struct s_smc *smc)
                        skb_reserve(skb, 3);
                        skb_put(skb, MaxFrameSize);
                        v_addr = skb->data;
-                       b_addr = pci_map_single(&smc->os.pdev,
-                                               v_addr,
-                                               MaxFrameSize,
-                                               PCI_DMA_FROMDEVICE);
+                       b_addr = dma_map_single(&(&smc->os.pdev)->dev, v_addr,
+                                               MaxFrameSize, DMA_FROM_DEVICE);
                        rxd->rxd_os.dma_addr = b_addr;
                } else {
                        // no skb available, use local buffer
@@ -1833,8 +1834,9 @@ void mac_drv_clear_rxd(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
                        skfddi_priv *bp = &smc->os;
                        int MaxFrameSize = bp->MaxFrameSize;
 
-                       pci_unmap_single(&bp->pdev, rxd->rxd_os.dma_addr,
-                                        MaxFrameSize, PCI_DMA_FROMDEVICE);
+                       dma_unmap_single(&(&bp->pdev)->dev,
+                                        rxd->rxd_os.dma_addr, MaxFrameSize,
+                                        DMA_FROM_DEVICE);
 
                        dev_kfree_skb(skb);
                        rxd->rxd_os.skb = NULL;
index 4435a11..775dcf4 100644 (file)
@@ -1005,7 +1005,8 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr)
 
 /* --------------------------------------------------------------------- */
 
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int baycom_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                                void __user *data, int cmd)
 {
        struct baycom_state *bc = netdev_priv(dev);
        struct hdlcdrv_ioctl hi;
@@ -1013,7 +1014,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        if (cmd != SIOCDEVPRIVATE)
                return -ENOIOCTLCMD;
 
-       if (copy_from_user(&hi, ifr->ifr_data, sizeof(hi)))
+       if (copy_from_user(&hi, data, sizeof(hi)))
                return -EFAULT;
        switch (hi.cmd) {
        default:
@@ -1104,7 +1105,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                return HDLCDRV_PARMASK_IOBASE;
 
        }
-       if (copy_to_user(ifr->ifr_data, &hi, sizeof(hi)))
+       if (copy_to_user(data, &hi, sizeof(hi)))
                return -EFAULT;
        return 0;
 }
@@ -1114,7 +1115,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 static const struct net_device_ops baycom_netdev_ops = {
        .ndo_open            = epp_open,
        .ndo_stop            = epp_close,
-       .ndo_do_ioctl        = baycom_ioctl,
+       .ndo_siocdevprivate  = baycom_siocdevprivate,
        .ndo_start_xmit      = baycom_send_packet,
        .ndo_set_mac_address = baycom_set_mac_address,
 };
index 6a3dc7b..fd7da5b 100644 (file)
@@ -380,7 +380,7 @@ static int par96_close(struct net_device *dev)
  * ===================== hdlcdrv driver interface =========================
  */
 
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
                        struct hdlcdrv_ioctl *hi, int cmd);
 
 /* --------------------------------------------------------------------- */
@@ -408,7 +408,7 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr)
 
 /* --------------------------------------------------------------------- */
 
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
                        struct hdlcdrv_ioctl *hi, int cmd)
 {
        struct baycom_state *bc;
@@ -428,7 +428,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 
        case HDLCDRVCTL_GETMODE:
                strcpy(hi->data.modename, bc->options ? "par96" : "picpar");
-               if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+               if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
                        return -EFAULT;
                return 0;
 
@@ -440,7 +440,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 
        case HDLCDRVCTL_MODELIST:
                strcpy(hi->data.modename, "par96,picpar");
-               if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+               if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
                        return -EFAULT;
                return 0;
 
@@ -449,7 +449,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 
        }
 
-       if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi)))
+       if (copy_from_user(&bi, data, sizeof(bi)))
                return -EFAULT;
        switch (bi.cmd) {
        default:
@@ -464,7 +464,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 #endif /* BAYCOM_DEBUG */
 
        }
-       if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi)))
+       if (copy_to_user(data, &bi, sizeof(bi)))
                return -EFAULT;
        return 0;
 
index 04bb409..646f605 100644 (file)
@@ -462,7 +462,7 @@ static int ser12_close(struct net_device *dev)
 
 /* --------------------------------------------------------------------- */
 
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
                        struct hdlcdrv_ioctl *hi, int cmd);
 
 /* --------------------------------------------------------------------- */
@@ -497,7 +497,7 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr)
 
 /* --------------------------------------------------------------------- */
 
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
                        struct hdlcdrv_ioctl *hi, int cmd)
 {
        struct baycom_state *bc;
@@ -519,7 +519,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
                sprintf(hi->data.modename, "ser%u", bc->baud / 100);
                if (bc->opt_dcd <= 0)
                        strcat(hi->data.modename, (!bc->opt_dcd) ? "*" : "+");
-               if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+               if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
                        return -EFAULT;
                return 0;
 
@@ -531,7 +531,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 
        case HDLCDRVCTL_MODELIST:
                strcpy(hi->data.modename, "ser12,ser3,ser24");
-               if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+               if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
                        return -EFAULT;
                return 0;
 
@@ -540,7 +540,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 
        }
 
-       if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi)))
+       if (copy_from_user(&bi, data, sizeof(bi)))
                return -EFAULT;
        switch (bi.cmd) {
        default:
@@ -555,7 +555,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 #endif /* BAYCOM_DEBUG */
 
        }
-       if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi)))
+       if (copy_to_user(data, &bi, sizeof(bi)))
                return -EFAULT;
        return 0;
 
index a1acb3a..5d1ab48 100644 (file)
@@ -521,7 +521,7 @@ static int ser12_close(struct net_device *dev)
 
 /* --------------------------------------------------------------------- */
 
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
                        struct hdlcdrv_ioctl *hi, int cmd);
 
 /* --------------------------------------------------------------------- */
@@ -551,7 +551,7 @@ static int baycom_setmode(struct baycom_state *bc, const char *modestr)
 
 /* --------------------------------------------------------------------- */
 
-static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
+static int baycom_ioctl(struct net_device *dev, void __user *data,
                        struct hdlcdrv_ioctl *hi, int cmd)
 {
        struct baycom_state *bc;
@@ -573,7 +573,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
                strcpy(hi->data.modename, "ser12");
                if (bc->opt_dcd <= 0)
                        strcat(hi->data.modename, (!bc->opt_dcd) ? "*" : (bc->opt_dcd == -2) ? "@" : "+");
-               if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+               if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
                        return -EFAULT;
                return 0;
 
@@ -585,7 +585,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 
        case HDLCDRVCTL_MODELIST:
                strcpy(hi->data.modename, "ser12");
-               if (copy_to_user(ifr->ifr_data, hi, sizeof(struct hdlcdrv_ioctl)))
+               if (copy_to_user(data, hi, sizeof(struct hdlcdrv_ioctl)))
                        return -EFAULT;
                return 0;
 
@@ -594,7 +594,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 
        }
 
-       if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi)))
+       if (copy_from_user(&bi, data, sizeof(bi)))
                return -EFAULT;
        switch (bi.cmd) {
        default:
@@ -609,7 +609,7 @@ static int baycom_ioctl(struct net_device *dev, struct ifreq *ifr,
 #endif /* BAYCOM_DEBUG */
 
        }
-       if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi)))
+       if (copy_to_user(data, &bi, sizeof(bi)))
                return -EFAULT;
        return 0;
 
index 0e623c2..d967b07 100644 (file)
@@ -314,9 +314,10 @@ static int bpq_set_mac_address(struct net_device *dev, void *addr)
  *                                     source ethernet address (broadcast
  *                                     or multicast: accept all)
  */
-static int bpq_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int bpq_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                             void __user *data, int cmd)
 {
-       struct bpq_ethaddr __user *ethaddr = ifr->ifr_data;
+       struct bpq_ethaddr __user *ethaddr = data;
        struct bpqdev *bpq = netdev_priv(dev);
        struct bpq_req req;
 
@@ -325,7 +326,7 @@ static int bpq_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
        switch (cmd) {
                case SIOCSBPQETHOPT:
-                       if (copy_from_user(&req, ifr->ifr_data, sizeof(struct bpq_req)))
+                       if (copy_from_user(&req, data, sizeof(struct bpq_req)))
                                return -EFAULT;
                        switch (req.cmd) {
                                case SIOCGBPQETHPARAM:
@@ -448,7 +449,7 @@ static const struct net_device_ops bpq_netdev_ops = {
        .ndo_stop            = bpq_close,
        .ndo_start_xmit      = bpq_xmit,
        .ndo_set_mac_address = bpq_set_mac_address,
-       .ndo_do_ioctl        = bpq_ioctl,
+       .ndo_siocdevprivate  = bpq_siocdevprivate,
 };
 
 static void bpq_setup(struct net_device *dev)
index c25c8c9..b50b7fa 100644 (file)
@@ -225,7 +225,8 @@ static int read_scc_data(struct scc_priv *priv);
 
 static int scc_open(struct net_device *dev);
 static int scc_close(struct net_device *dev);
-static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+static int scc_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                             void __user *data, int cmd);
 static int scc_send_packet(struct sk_buff *skb, struct net_device *dev);
 static int scc_set_mac_address(struct net_device *dev, void *sa);
 
@@ -432,7 +433,7 @@ static const struct net_device_ops scc_netdev_ops = {
        .ndo_open = scc_open,
        .ndo_stop = scc_close,
        .ndo_start_xmit = scc_send_packet,
-       .ndo_do_ioctl = scc_ioctl,
+       .ndo_siocdevprivate = scc_siocdevprivate,
        .ndo_set_mac_address = scc_set_mac_address,
 };
 
@@ -881,15 +882,13 @@ static int scc_close(struct net_device *dev)
 }
 
 
-static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int scc_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
 {
        struct scc_priv *priv = dev->ml_priv;
 
        switch (cmd) {
        case SIOCGSCCPARAM:
-               if (copy_to_user
-                   (ifr->ifr_data, &priv->param,
-                    sizeof(struct scc_param)))
+               if (copy_to_user(data, &priv->param, sizeof(struct scc_param)))
                        return -EFAULT;
                return 0;
        case SIOCSSCCPARAM:
@@ -897,13 +896,12 @@ static int scc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                        return -EPERM;
                if (netif_running(dev))
                        return -EAGAIN;
-               if (copy_from_user
-                   (&priv->param, ifr->ifr_data,
-                    sizeof(struct scc_param)))
+               if (copy_from_user(&priv->param, data,
+                                  sizeof(struct scc_param)))
                        return -EFAULT;
                return 0;
        default:
-               return -EINVAL;
+               return -EOPNOTSUPP;
        }
 }
 
index cbaf1cd..5805cfc 100644 (file)
@@ -483,23 +483,25 @@ static int hdlcdrv_close(struct net_device *dev)
 
 /* --------------------------------------------------------------------- */
 
-static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int hdlcdrv_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                                 void __user *data, int cmd)
 {
        struct hdlcdrv_state *s = netdev_priv(dev);
        struct hdlcdrv_ioctl bi;
 
-       if (cmd != SIOCDEVPRIVATE) {
-               if (s->ops && s->ops->ioctl)
-                       return s->ops->ioctl(dev, ifr, &bi, cmd);
+       if (cmd != SIOCDEVPRIVATE)
                return -ENOIOCTLCMD;
-       }
-       if (copy_from_user(&bi, ifr->ifr_data, sizeof(bi)))
+
+       if (in_compat_syscall()) /* to be implemented */
+               return -ENOIOCTLCMD;
+
+       if (copy_from_user(&bi, data, sizeof(bi)))
                return -EFAULT;
 
        switch (bi.cmd) {
        default:
                if (s->ops && s->ops->ioctl)
-                       return s->ops->ioctl(dev, ifr, &bi, cmd);
+                       return s->ops->ioctl(dev, data, &bi, cmd);
                return -ENOIOCTLCMD;
 
        case HDLCDRVCTL_GETCHANNELPAR:
@@ -605,7 +607,7 @@ static int hdlcdrv_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                break;
                
        }
-       if (copy_to_user(ifr->ifr_data, &bi, sizeof(bi)))
+       if (copy_to_user(data, &bi, sizeof(bi)))
                return -EFAULT;
        return 0;
 
@@ -617,7 +619,7 @@ static const struct net_device_ops hdlcdrv_netdev = {
        .ndo_open       = hdlcdrv_open,
        .ndo_stop       = hdlcdrv_close,
        .ndo_start_xmit = hdlcdrv_send_packet,
-       .ndo_do_ioctl   = hdlcdrv_ioctl,
+       .ndo_siocdevprivate  = hdlcdrv_siocdevprivate,
        .ndo_set_mac_address = hdlcdrv_set_mac_address,
 };
 
index 3f1edd0..e0bb131 100644 (file)
@@ -210,7 +210,8 @@ static int scc_net_close(struct net_device *dev);
 static void scc_net_rx(struct scc_channel *scc, struct sk_buff *skb);
 static netdev_tx_t scc_net_tx(struct sk_buff *skb,
                              struct net_device *dev);
-static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+static int scc_net_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                                 void __user *data, int cmd);
 static int scc_net_set_mac_address(struct net_device *dev, void *addr);
 static struct net_device_stats * scc_net_get_stats(struct net_device *dev);
 
@@ -1550,7 +1551,7 @@ static const struct net_device_ops scc_netdev_ops = {
        .ndo_start_xmit      = scc_net_tx,
        .ndo_set_mac_address = scc_net_set_mac_address,
        .ndo_get_stats       = scc_net_get_stats,
-       .ndo_do_ioctl        = scc_net_ioctl,
+       .ndo_siocdevprivate  = scc_net_siocdevprivate,
 };
 
 /* ----> Initialize device <----- */
@@ -1703,7 +1704,8 @@ static netdev_tx_t scc_net_tx(struct sk_buff *skb, struct net_device *dev)
  * SIOCSCCCAL          - send calib. pattern   arg: (struct scc_calibrate *) arg
  */
 
-static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int scc_net_siocdevprivate(struct net_device *dev,
+                                 struct ifreq *ifr, void __user *arg, int cmd)
 {
        struct scc_kiss_cmd kiss_cmd;
        struct scc_mem_config memcfg;
@@ -1712,8 +1714,6 @@ static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        struct scc_channel *scc = (struct scc_channel *) dev->ml_priv;
        int chan;
        unsigned char device_name[IFNAMSIZ];
-       void __user *arg = ifr->ifr_data;
-       
        
        if (!Driver_Initialized)
        {
@@ -1722,6 +1722,9 @@ static int scc_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                        int found = 1;
 
                        if (!capable(CAP_SYS_RAWIO)) return -EPERM;
+                       if (in_compat_syscall())
+                               return -EOPNOTSUPP;
+
                        if (!arg) return -EFAULT;
 
                        if (Nchips >= SCC_MAXCHIPS) 
index d491104..6ddacbd 100644 (file)
@@ -920,15 +920,15 @@ static int yam_close(struct net_device *dev)
 
 /* --------------------------------------------------------------------- */
 
-static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int yam_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
 {
        struct yam_port *yp = netdev_priv(dev);
        struct yamdrv_ioctl_cfg yi;
        struct yamdrv_ioctl_mcs *ym;
        int ioctl_cmd;
 
-       if (copy_from_user(&ioctl_cmd, ifr->ifr_data, sizeof(int)))
-                return -EFAULT;
+       if (copy_from_user(&ioctl_cmd, data, sizeof(int)))
+               return -EFAULT;
 
        if (yp->magic != YAM_MAGIC)
                return -EINVAL;
@@ -947,8 +947,7 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        case SIOCYAMSMCS:
                if (netif_running(dev))
                        return -EINVAL;         /* Cannot change this parameter when up */
-               ym = memdup_user(ifr->ifr_data,
-                                sizeof(struct yamdrv_ioctl_mcs));
+               ym = memdup_user(data, sizeof(struct yamdrv_ioctl_mcs));
                if (IS_ERR(ym))
                        return PTR_ERR(ym);
                if (ym->cmd != SIOCYAMSMCS)
@@ -965,8 +964,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        case SIOCYAMSCFG:
                if (!capable(CAP_SYS_RAWIO))
                        return -EPERM;
-               if (copy_from_user(&yi, ifr->ifr_data, sizeof(struct yamdrv_ioctl_cfg)))
-                        return -EFAULT;
+               if (copy_from_user(&yi, data, sizeof(struct yamdrv_ioctl_cfg)))
+                       return -EFAULT;
 
                if (yi.cmd != SIOCYAMSCFG)
                        return -EINVAL;
@@ -1045,8 +1044,8 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                yi.cfg.txtail = yp->txtail;
                yi.cfg.persist = yp->pers;
                yi.cfg.slottime = yp->slot;
-               if (copy_to_user(ifr->ifr_data, &yi, sizeof(struct yamdrv_ioctl_cfg)))
-                        return -EFAULT;
+               if (copy_to_user(data, &yi, sizeof(struct yamdrv_ioctl_cfg)))
+                       return -EFAULT;
                break;
 
        default:
@@ -1074,7 +1073,7 @@ static const struct net_device_ops yam_netdev_ops = {
        .ndo_open            = yam_open,
        .ndo_stop            = yam_close,
        .ndo_start_xmit      = yam_send_packet,
-       .ndo_do_ioctl        = yam_ioctl,
+       .ndo_siocdevprivate  = yam_siocdevprivate,
        .ndo_set_mac_address = yam_set_mac_address,
 };
 
index 2201038..7661dbb 100644 (file)
@@ -63,7 +63,7 @@ static const char version[] =
 static const struct net_device_ops rr_netdev_ops = {
        .ndo_open               = rr_open,
        .ndo_stop               = rr_close,
-       .ndo_do_ioctl           = rr_ioctl,
+       .ndo_siocdevprivate     = rr_siocdevprivate,
        .ndo_start_xmit         = rr_start_xmit,
        .ndo_set_mac_address    = hippi_mac_addr,
 };
@@ -1568,7 +1568,8 @@ out:
 }
 
 
-static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int rr_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                            void __user *data, int cmd)
 {
        struct rr_private *rrpriv;
        unsigned char *image, *oldimage;
@@ -1603,7 +1604,7 @@ static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
                        error = -EFAULT;
                        goto gf_out;
                }
-               error = copy_to_user(rq->ifr_data, image, EEPROM_BYTES);
+               error = copy_to_user(data, image, EEPROM_BYTES);
                if (error)
                        error = -EFAULT;
        gf_out:
@@ -1615,7 +1616,7 @@ static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
                        return -EPERM;
                }
 
-               image = memdup_user(rq->ifr_data, EEPROM_BYTES);
+               image = memdup_user(data, EEPROM_BYTES);
                if (IS_ERR(image))
                        return PTR_ERR(image);
 
@@ -1658,7 +1659,7 @@ static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
                return error;
 
        case SIOCRRID:
-               return put_user(0x52523032, (int __user *)rq->ifr_data);
+               return put_user(0x52523032, (int __user *)data);
        default:
                return error;
        }
index 8753378..5537761 100644 (file)
@@ -835,7 +835,8 @@ static int rr_open(struct net_device *dev);
 static netdev_tx_t rr_start_xmit(struct sk_buff *skb,
                                 struct net_device *dev);
 static int rr_close(struct net_device *dev);
-static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int rr_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                            void __user *data, int cmd);
 static unsigned int rr_read_eeprom(struct rr_private *rrpriv,
                                   unsigned long offset,
                                   unsigned char *buf,
index 506f8d5..bdfb243 100644 (file)
@@ -1,9 +1,6 @@
-# Un-comment the next line if you want to validate configuration data
-#ccflags-y             +=      -DIPA_VALIDATE
-
 obj-$(CONFIG_QCOM_IPA) +=      ipa.o
 
-ipa-y                  :=      ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \
+ipa-y                  :=      ipa_main.o ipa_power.o ipa_reg.o ipa_mem.o \
                                ipa_table.o ipa_interrupt.o gsi.o gsi_trans.o \
                                ipa_gsi.o ipa_smp2p.o ipa_uc.o \
                                ipa_endpoint.o ipa_cmd.o ipa_modem.o \
index 427c68b..a2fcdb1 100644 (file)
@@ -198,77 +198,6 @@ static void gsi_irq_type_disable(struct gsi *gsi, enum gsi_irq_type_id type_id)
        gsi_irq_type_update(gsi, gsi->type_enabled_bitmap & ~BIT(type_id));
 }
 
-/* Turn off all GSI interrupts initially; there is no gsi_irq_teardown() */
-static void gsi_irq_setup(struct gsi *gsi)
-{
-       /* Disable all interrupt types */
-       gsi_irq_type_update(gsi, 0);
-
-       /* Clear all type-specific interrupt masks */
-       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
-       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
-       iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
-       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
-
-       /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */
-       if (gsi->version > IPA_VERSION_3_1) {
-               u32 offset;
-
-               /* These registers are in the non-adjusted address range */
-               offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET;
-               iowrite32(0, gsi->virt_raw + offset);
-               offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET;
-               iowrite32(0, gsi->virt_raw + offset);
-       }
-
-       iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
-}
-
-/* Get # supported channel and event rings; there is no gsi_ring_teardown() */
-static int gsi_ring_setup(struct gsi *gsi)
-{
-       struct device *dev = gsi->dev;
-       u32 count;
-       u32 val;
-
-       if (gsi->version < IPA_VERSION_3_5_1) {
-               /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */
-               gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
-               gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
-
-               return 0;
-       }
-
-       val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
-
-       count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
-       if (!count) {
-               dev_err(dev, "GSI reports zero channels supported\n");
-               return -EINVAL;
-       }
-       if (count > GSI_CHANNEL_COUNT_MAX) {
-               dev_warn(dev, "limiting to %u channels; hardware supports %u\n",
-                        GSI_CHANNEL_COUNT_MAX, count);
-               count = GSI_CHANNEL_COUNT_MAX;
-       }
-       gsi->channel_count = count;
-
-       count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
-       if (!count) {
-               dev_err(dev, "GSI reports zero event rings supported\n");
-               return -EINVAL;
-       }
-       if (count > GSI_EVT_RING_COUNT_MAX) {
-               dev_warn(dev,
-                        "limiting to %u event rings; hardware supports %u\n",
-                        GSI_EVT_RING_COUNT_MAX, count);
-               count = GSI_EVT_RING_COUNT_MAX;
-       }
-       gsi->evt_ring_count = count;
-
-       return 0;
-}
-
 /* Event ring commands are performed one at a time.  Their completion
  * is signaled by the event ring control GSI interrupt type, which is
  * only enabled when we issue an event ring command.  Only the event
@@ -920,12 +849,13 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
        /* All done! */
 }
 
-static int __gsi_channel_start(struct gsi_channel *channel, bool start)
+static int __gsi_channel_start(struct gsi_channel *channel, bool resume)
 {
        struct gsi *gsi = channel->gsi;
        int ret;
 
-       if (!start)
+       /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */
+       if (resume && gsi->version < IPA_VERSION_4_0)
                return 0;
 
        mutex_lock(&gsi->mutex);
@@ -947,7 +877,7 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id)
        napi_enable(&channel->napi);
        gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id);
 
-       ret = __gsi_channel_start(channel, true);
+       ret = __gsi_channel_start(channel, false);
        if (ret) {
                gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id);
                napi_disable(&channel->napi);
@@ -971,7 +901,7 @@ static int gsi_channel_stop_retry(struct gsi_channel *channel)
        return ret;
 }
 
-static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
+static int __gsi_channel_stop(struct gsi_channel *channel, bool suspend)
 {
        struct gsi *gsi = channel->gsi;
        int ret;
@@ -979,7 +909,8 @@ static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
        /* Wait for any underway transactions to complete before stopping. */
        gsi_channel_trans_quiesce(channel);
 
-       if (!stop)
+       /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */
+       if (suspend && gsi->version < IPA_VERSION_4_0)
                return 0;
 
        mutex_lock(&gsi->mutex);
@@ -997,7 +928,7 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
        struct gsi_channel *channel = &gsi->channel[channel_id];
        int ret;
 
-       ret = __gsi_channel_stop(channel, true);
+       ret = __gsi_channel_stop(channel, false);
        if (ret)
                return ret;
 
@@ -1026,13 +957,13 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell)
        mutex_unlock(&gsi->mutex);
 }
 
-/* Stop a STARTED channel for suspend (using stop if requested) */
-int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
+/* Stop a started channel for suspend */
+int gsi_channel_suspend(struct gsi *gsi, u32 channel_id)
 {
        struct gsi_channel *channel = &gsi->channel[channel_id];
        int ret;
 
-       ret = __gsi_channel_stop(channel, stop);
+       ret = __gsi_channel_stop(channel, true);
        if (ret)
                return ret;
 
@@ -1042,12 +973,24 @@ int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
        return 0;
 }
 
-/* Resume a suspended channel (starting will be requested if STOPPED) */
-int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start)
+/* Resume a suspended channel (starting if stopped) */
+int gsi_channel_resume(struct gsi *gsi, u32 channel_id)
 {
        struct gsi_channel *channel = &gsi->channel[channel_id];
 
-       return __gsi_channel_start(channel, start);
+       return __gsi_channel_start(channel, true);
+}
+
+/* Prevent all GSI interrupts while suspended */
+void gsi_suspend(struct gsi *gsi)
+{
+       disable_irq(gsi->irq);
+}
+
+/* Allow all GSI interrupts again when resuming */
+void gsi_resume(struct gsi *gsi)
+{
+       enable_irq(gsi->irq);
 }
 
 /**
@@ -1372,33 +1315,20 @@ static irqreturn_t gsi_isr(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+/* Init function for GSI IRQ lookup; there is no gsi_irq_exit() */
 static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev)
 {
-       struct device *dev = &pdev->dev;
-       unsigned int irq;
        int ret;
 
        ret = platform_get_irq_byname(pdev, "gsi");
        if (ret <= 0)
                return ret ? : -EINVAL;
 
-       irq = ret;
-
-       ret = request_irq(irq, gsi_isr, 0, "gsi", gsi);
-       if (ret) {
-               dev_err(dev, "error %d requesting \"gsi\" IRQ\n", ret);
-               return ret;
-       }
-       gsi->irq = irq;
+       gsi->irq = ret;
 
        return 0;
 }
 
-static void gsi_irq_exit(struct gsi *gsi)
-{
-       free_irq(gsi->irq, gsi);
-}
-
 /* Return the transaction associated with a transfer completion event */
 static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel,
                                         struct gsi_event *event)
@@ -1876,6 +1806,93 @@ static void gsi_channel_teardown(struct gsi *gsi)
        gsi_irq_disable(gsi);
 }
 
+/* Turn off all GSI interrupts initially */
+static int gsi_irq_setup(struct gsi *gsi)
+{
+       int ret;
+
+       /* Writing 1 indicates IRQ interrupts; 0 would be MSI */
+       iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
+
+       /* Disable all interrupt types */
+       gsi_irq_type_update(gsi, 0);
+
+       /* Clear all type-specific interrupt masks */
+       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
+       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
+       iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
+       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
+
+       /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */
+       if (gsi->version > IPA_VERSION_3_1) {
+               u32 offset;
+
+               /* These registers are in the non-adjusted address range */
+               offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET;
+               iowrite32(0, gsi->virt_raw + offset);
+               offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET;
+               iowrite32(0, gsi->virt_raw + offset);
+       }
+
+       iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
+
+       ret = request_irq(gsi->irq, gsi_isr, 0, "gsi", gsi);
+       if (ret)
+               dev_err(gsi->dev, "error %d requesting \"gsi\" IRQ\n", ret);
+
+       return ret;
+}
+
+static void gsi_irq_teardown(struct gsi *gsi)
+{
+       free_irq(gsi->irq, gsi);
+}
+
+/* Get # supported channel and event rings; there is no gsi_ring_teardown() */
+static int gsi_ring_setup(struct gsi *gsi)
+{
+       struct device *dev = gsi->dev;
+       u32 count;
+       u32 val;
+
+       if (gsi->version < IPA_VERSION_3_5_1) {
+               /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */
+               gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
+               gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
+
+               return 0;
+       }
+
+       val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
+
+       count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
+       if (!count) {
+               dev_err(dev, "GSI reports zero channels supported\n");
+               return -EINVAL;
+       }
+       if (count > GSI_CHANNEL_COUNT_MAX) {
+               dev_warn(dev, "limiting to %u channels; hardware supports %u\n",
+                        GSI_CHANNEL_COUNT_MAX, count);
+               count = GSI_CHANNEL_COUNT_MAX;
+       }
+       gsi->channel_count = count;
+
+       count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
+       if (!count) {
+               dev_err(dev, "GSI reports zero event rings supported\n");
+               return -EINVAL;
+       }
+       if (count > GSI_EVT_RING_COUNT_MAX) {
+               dev_warn(dev,
+                        "limiting to %u event rings; hardware supports %u\n",
+                        GSI_EVT_RING_COUNT_MAX, count);
+               count = GSI_EVT_RING_COUNT_MAX;
+       }
+       gsi->evt_ring_count = count;
+
+       return 0;
+}
+
 /* Setup function for GSI.  GSI firmware must be loaded and initialized */
 int gsi_setup(struct gsi *gsi)
 {
@@ -1889,25 +1906,34 @@ int gsi_setup(struct gsi *gsi)
                return -EIO;
        }
 
-       gsi_irq_setup(gsi);             /* No matching teardown required */
+       ret = gsi_irq_setup(gsi);
+       if (ret)
+               return ret;
 
        ret = gsi_ring_setup(gsi);      /* No matching teardown required */
        if (ret)
-               return ret;
+               goto err_irq_teardown;
 
        /* Initialize the error log */
        iowrite32(0, gsi->virt + GSI_ERROR_LOG_OFFSET);
 
-       /* Writing 1 indicates IRQ interrupts; 0 would be MSI */
-       iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
+       ret = gsi_channel_setup(gsi);
+       if (ret)
+               goto err_irq_teardown;
 
-       return gsi_channel_setup(gsi);
+       return 0;
+
+err_irq_teardown:
+       gsi_irq_teardown(gsi);
+
+       return ret;
 }
 
 /* Inverse of gsi_setup() */
 void gsi_teardown(struct gsi *gsi)
 {
        gsi_channel_teardown(gsi);
+       gsi_irq_teardown(gsi);
 }
 
 /* Initialize a channel's event ring */
@@ -1964,7 +1990,6 @@ static void gsi_evt_ring_init(struct gsi *gsi)
 static bool gsi_channel_data_valid(struct gsi *gsi,
                                   const struct ipa_gsi_endpoint_data *data)
 {
-#ifdef IPA_VALIDATION
        u32 channel_id = data->channel_id;
        struct device *dev = gsi->dev;
 
@@ -2010,7 +2035,6 @@ static bool gsi_channel_data_valid(struct gsi *gsi,
                        channel_id, data->channel.event_count);
                return false;
        }
-#endif /* IPA_VALIDATION */
 
        return true;
 }
@@ -2206,20 +2230,18 @@ int gsi_init(struct gsi *gsi, struct platform_device *pdev,
 
        init_completion(&gsi->completion);
 
-       ret = gsi_irq_init(gsi, pdev);
+       ret = gsi_irq_init(gsi, pdev);  /* No matching exit required */
        if (ret)
                goto err_iounmap;
 
        ret = gsi_channel_init(gsi, count, data);
        if (ret)
-               goto err_irq_exit;
+               goto err_iounmap;
 
        mutex_init(&gsi->mutex);
 
        return 0;
 
-err_irq_exit:
-       gsi_irq_exit(gsi);
 err_iounmap:
        iounmap(gsi->virt_raw);
 
@@ -2231,7 +2253,6 @@ void gsi_exit(struct gsi *gsi)
 {
        mutex_destroy(&gsi->mutex);
        gsi_channel_exit(gsi);
-       gsi_irq_exit(gsi);
        iounmap(gsi->virt_raw);
 }
 
index 81cd7b0..88b80dc 100644 (file)
@@ -232,8 +232,35 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id);
  */
 void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell);
 
-int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop);
-int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start);
+/**
+ * gsi_suspend() - Prepare the GSI subsystem for suspend
+ * @gsi:       GSI pointer
+ */
+void gsi_suspend(struct gsi *gsi);
+
+/**
+ * gsi_resume() - Resume the GSI subsystem following suspend
+ * @gsi:       GSI pointer
+ */
+void gsi_resume(struct gsi *gsi);
+
+/**
+ * gsi_channel_suspend() - Suspend a GSI channel
+ * @gsi:       GSI pointer
+ * @channel_id:        Channel to suspend
+ *
+ * For IPA v4.0+, suspend is implemented by stopping the channel.
+ */
+int gsi_channel_suspend(struct gsi *gsi, u32 channel_id);
+
+/**
+ * gsi_channel_resume() - Resume a suspended GSI channel
+ * @gsi:       GSI pointer
+ * @channel_id:        Channel to resume
+ *
+ * For IPA v4.0+, the stopped channel is started again.
+ */
+int gsi_channel_resume(struct gsi *gsi, u32 channel_id);
 
 /**
  * gsi_init() - Initialize the GSI subsystem
index 8c795a6..1544564 100644 (file)
@@ -90,14 +90,12 @@ int gsi_trans_pool_init(struct gsi_trans_pool *pool, size_t size, u32 count,
 {
        void *virt;
 
-#ifdef IPA_VALIDATE
        if (!size)
                return -EINVAL;
        if (count < max_alloc)
                return -EINVAL;
        if (!max_alloc)
                return -EINVAL;
-#endif /* IPA_VALIDATE */
 
        /* By allocating a few extra entries in our pool (one less
         * than the maximum number that will be requested in a
@@ -140,14 +138,12 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool,
        dma_addr_t addr;
        void *virt;
 
-#ifdef IPA_VALIDATE
        if (!size)
                return -EINVAL;
        if (count < max_alloc)
                return -EINVAL;
        if (!max_alloc)
                return -EINVAL;
-#endif /* IPA_VALIDATE */
 
        /* Don't let allocations cross a power-of-two boundary */
        size = __roundup_pow_of_two(size);
@@ -188,8 +184,8 @@ static u32 gsi_trans_pool_alloc_common(struct gsi_trans_pool *pool, u32 count)
 {
        u32 offset;
 
-       /* assert(count > 0); */
-       /* assert(count <= pool->max_alloc); */
+       WARN_ON(!count);
+       WARN_ON(count > pool->max_alloc);
 
        /* Allocate from beginning if wrap would occur */
        if (count > pool->count - pool->free)
@@ -225,9 +221,10 @@ void *gsi_trans_pool_next(struct gsi_trans_pool *pool, void *element)
 {
        void *end = pool->base + pool->count * pool->size;
 
-       /* assert(element >= pool->base); */
-       /* assert(element < end); */
-       /* assert(pool->max_alloc == 1); */
+       WARN_ON(element < pool->base);
+       WARN_ON(element >= end);
+       WARN_ON(pool->max_alloc != 1);
+
        element += pool->size;
 
        return element < end ? element : pool->base;
@@ -332,7 +329,8 @@ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id,
        struct gsi_trans_info *trans_info;
        struct gsi_trans *trans;
 
-       /* assert(tre_count <= gsi_channel_trans_tre_max(gsi, channel_id)); */
+       if (WARN_ON(tre_count > gsi_channel_trans_tre_max(gsi, channel_id)))
+               return NULL;
 
        trans_info = &channel->trans_info;
 
@@ -408,7 +406,7 @@ void gsi_trans_cmd_add(struct gsi_trans *trans, void *buf, u32 size,
        u32 which = trans->used++;
        struct scatterlist *sg;
 
-       /* assert(which < trans->tre_count); */
+       WARN_ON(which >= trans->tre_count);
 
        /* Commands are quite different from data transfer requests.
         * Their payloads come from a pool whose memory is allocated
@@ -441,8 +439,10 @@ int gsi_trans_page_add(struct gsi_trans *trans, struct page *page, u32 size,
        struct scatterlist *sg = &trans->sgl[0];
        int ret;
 
-       /* assert(trans->tre_count == 1); */
-       /* assert(!trans->used); */
+       if (WARN_ON(trans->tre_count != 1))
+               return -EINVAL;
+       if (WARN_ON(trans->used))
+               return -EINVAL;
 
        sg_set_page(sg, page, size, offset);
        ret = dma_map_sg(trans->gsi->dev, sg, 1, trans->direction);
@@ -461,8 +461,10 @@ int gsi_trans_skb_add(struct gsi_trans *trans, struct sk_buff *skb)
        u32 used;
        int ret;
 
-       /* assert(trans->tre_count == 1); */
-       /* assert(!trans->used); */
+       if (WARN_ON(trans->tre_count != 1))
+               return -EINVAL;
+       if (WARN_ON(trans->used))
+               return -EINVAL;
 
        /* skb->len will not be 0 (checked early) */
        ret = skb_to_sgvec(skb, sg, 0, skb->len);
@@ -550,7 +552,7 @@ static void __gsi_trans_commit(struct gsi_trans *trans, bool ring_db)
        u32 avail;
        u32 i;
 
-       /* assert(trans->used > 0); */
+       WARN_ON(!trans->used);
 
        /* Consume the entries.  If we cross the end of the ring while
         * filling them we'll switch to the beginning to finish.
index 7444068..9fc880e 100644 (file)
@@ -23,34 +23,24 @@ struct icc_path;
 struct net_device;
 struct platform_device;
 
-struct ipa_clock;
+struct ipa_power;
 struct ipa_smp2p;
 struct ipa_interrupt;
 
-/**
- * enum ipa_flag - IPA state flags
- * @IPA_FLAG_RESUMED:  Whether resume from suspend has been signaled
- * @IPA_FLAG_COUNT:    Number of defined IPA flags
- */
-enum ipa_flag {
-       IPA_FLAG_RESUMED,
-       IPA_FLAG_COUNT,         /* Last; not a flag */
-};
-
 /**
  * struct ipa - IPA information
  * @gsi:               Embedded GSI structure
- * @flags:             Boolean state flags
  * @version:           IPA hardware version
  * @pdev:              Platform device
  * @completion:                Used to signal pipeline clear transfer complete
  * @nb:                        Notifier block used for remoteproc SSR
  * @notifier:          Remoteproc SSR notifier
  * @smp2p:             SMP2P information
- * @clock:             IPA clocking information
+ * @power:             IPA power information
  * @table_addr:                DMA address of filter/route table content
  * @table_virt:                Virtual address of filter/route table content
  * @interrupt:         IPA Interrupt information
+ * @uc_powered:                true if power is active by proxy for microcontroller
  * @uc_loaded:         true after microcontroller has reported it's ready
  * @reg_addr:          DMA address used for IPA register access
  * @reg_virt:          Virtual address used for IPA register access
@@ -82,19 +72,19 @@ enum ipa_flag {
  */
 struct ipa {
        struct gsi gsi;
-       DECLARE_BITMAP(flags, IPA_FLAG_COUNT);
        enum ipa_version version;
        struct platform_device *pdev;
        struct completion completion;
        struct notifier_block nb;
        void *notifier;
        struct ipa_smp2p *smp2p;
-       struct ipa_clock *clock;
+       struct ipa_power *power;
 
        dma_addr_t table_addr;
        __le64 *table_virt;
 
        struct ipa_interrupt *interrupt;
+       bool uc_powered;
        bool uc_loaded;
 
        dma_addr_t reg_addr;
@@ -144,11 +134,11 @@ struct ipa {
  *
  * Activities performed at the init stage can be done without requiring
  * any access to IPA hardware.  Activities performed at the config stage
- * require the IPA clock to be running, because they involve access
- * to IPA registers.  The setup stage is performed only after the GSI
- * hardware is ready (more on this below).  The setup stage allows
- * the AP to perform more complex initialization by issuing "immediate
- * commands" using a special interface to the IPA.
+ * require IPA power, because they involve access to IPA registers.
+ * The setup stage is performed only after the GSI hardware is ready
+ * (more on this below).  The setup stage allows the AP to perform
+ * more complex initialization by issuing "immediate commands" using
+ * a special interface to the IPA.
  *
  * This function, @ipa_setup(), starts the setup stage.
  *
diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c
deleted file mode 100644 (file)
index 69ef6ea..0000000
+++ /dev/null
@@ -1,331 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2018-2021 Linaro Ltd.
- */
-
-#include <linux/refcount.h>
-#include <linux/mutex.h>
-#include <linux/clk.h>
-#include <linux/device.h>
-#include <linux/interconnect.h>
-
-#include "ipa.h"
-#include "ipa_clock.h"
-#include "ipa_modem.h"
-#include "ipa_data.h"
-
-/**
- * DOC: IPA Clocking
- *
- * The "IPA Clock" manages both the IPA core clock and the interconnects
- * (buses) the IPA depends on as a single logical entity.  A reference count
- * is incremented by "get" operations and decremented by "put" operations.
- * Transitions of that count from 0 to 1 result in the clock and interconnects
- * being enabled, and transitions of the count from 1 to 0 cause them to be
- * disabled.  We currently operate the core clock at a fixed clock rate, and
- * all buses at a fixed average and peak bandwidth.  As more advanced IPA
- * features are enabled, we can make better use of clock and bus scaling.
- *
- * An IPA clock reference must be held for any access to IPA hardware.
- */
-
-/**
- * struct ipa_interconnect - IPA interconnect information
- * @path:              Interconnect path
- * @average_bandwidth: Average interconnect bandwidth (KB/second)
- * @peak_bandwidth:    Peak interconnect bandwidth (KB/second)
- */
-struct ipa_interconnect {
-       struct icc_path *path;
-       u32 average_bandwidth;
-       u32 peak_bandwidth;
-};
-
-/**
- * struct ipa_clock - IPA clocking information
- * @count:             Clocking reference count
- * @mutex:             Protects clock enable/disable
- * @core:              IPA core clock
- * @interconnect_count:        Number of elements in interconnect[]
- * @interconnect:      Interconnect array
- */
-struct ipa_clock {
-       refcount_t count;
-       struct mutex mutex; /* protects clock enable/disable */
-       struct clk *core;
-       u32 interconnect_count;
-       struct ipa_interconnect *interconnect;
-};
-
-static int ipa_interconnect_init_one(struct device *dev,
-                                    struct ipa_interconnect *interconnect,
-                                    const struct ipa_interconnect_data *data)
-{
-       struct icc_path *path;
-
-       path = of_icc_get(dev, data->name);
-       if (IS_ERR(path)) {
-               int ret = PTR_ERR(path);
-
-               dev_err_probe(dev, ret, "error getting %s interconnect\n",
-                             data->name);
-
-               return ret;
-       }
-
-       interconnect->path = path;
-       interconnect->average_bandwidth = data->average_bandwidth;
-       interconnect->peak_bandwidth = data->peak_bandwidth;
-
-       return 0;
-}
-
-static void ipa_interconnect_exit_one(struct ipa_interconnect *interconnect)
-{
-       icc_put(interconnect->path);
-       memset(interconnect, 0, sizeof(*interconnect));
-}
-
-/* Initialize interconnects required for IPA operation */
-static int ipa_interconnect_init(struct ipa_clock *clock, struct device *dev,
-                                const struct ipa_interconnect_data *data)
-{
-       struct ipa_interconnect *interconnect;
-       u32 count;
-       int ret;
-
-       count = clock->interconnect_count;
-       interconnect = kcalloc(count, sizeof(*interconnect), GFP_KERNEL);
-       if (!interconnect)
-               return -ENOMEM;
-       clock->interconnect = interconnect;
-
-       while (count--) {
-               ret = ipa_interconnect_init_one(dev, interconnect, data++);
-               if (ret)
-                       goto out_unwind;
-               interconnect++;
-       }
-
-       return 0;
-
-out_unwind:
-       while (interconnect-- > clock->interconnect)
-               ipa_interconnect_exit_one(interconnect);
-       kfree(clock->interconnect);
-       clock->interconnect = NULL;
-
-       return ret;
-}
-
-/* Inverse of ipa_interconnect_init() */
-static void ipa_interconnect_exit(struct ipa_clock *clock)
-{
-       struct ipa_interconnect *interconnect;
-
-       interconnect = clock->interconnect + clock->interconnect_count;
-       while (interconnect-- > clock->interconnect)
-               ipa_interconnect_exit_one(interconnect);
-       kfree(clock->interconnect);
-       clock->interconnect = NULL;
-}
-
-/* Currently we only use one bandwidth level, so just "enable" interconnects */
-static int ipa_interconnect_enable(struct ipa *ipa)
-{
-       struct ipa_interconnect *interconnect;
-       struct ipa_clock *clock = ipa->clock;
-       int ret;
-       u32 i;
-
-       interconnect = clock->interconnect;
-       for (i = 0; i < clock->interconnect_count; i++) {
-               ret = icc_set_bw(interconnect->path,
-                                interconnect->average_bandwidth,
-                                interconnect->peak_bandwidth);
-               if (ret)
-                       goto out_unwind;
-               interconnect++;
-       }
-
-       return 0;
-
-out_unwind:
-       while (interconnect-- > clock->interconnect)
-               (void)icc_set_bw(interconnect->path, 0, 0);
-
-       return ret;
-}
-
-/* To disable an interconnect, we just its bandwidth to 0 */
-static void ipa_interconnect_disable(struct ipa *ipa)
-{
-       struct ipa_interconnect *interconnect;
-       struct ipa_clock *clock = ipa->clock;
-       int result = 0;
-       u32 count;
-       int ret;
-
-       count = clock->interconnect_count;
-       interconnect = clock->interconnect + count;
-       while (count--) {
-               interconnect--;
-               ret = icc_set_bw(interconnect->path, 0, 0);
-               if (ret && !result)
-                       result = ret;
-       }
-
-       if (result)
-               dev_err(&ipa->pdev->dev,
-                       "error %d disabling IPA interconnects\n", ret);
-}
-
-/* Turn on IPA clocks, including interconnects */
-static int ipa_clock_enable(struct ipa *ipa)
-{
-       int ret;
-
-       ret = ipa_interconnect_enable(ipa);
-       if (ret)
-               return ret;
-
-       ret = clk_prepare_enable(ipa->clock->core);
-       if (ret)
-               ipa_interconnect_disable(ipa);
-
-       return ret;
-}
-
-/* Inverse of ipa_clock_enable() */
-static void ipa_clock_disable(struct ipa *ipa)
-{
-       clk_disable_unprepare(ipa->clock->core);
-       ipa_interconnect_disable(ipa);
-}
-
-/* Get an IPA clock reference, but only if the reference count is
- * already non-zero.  Returns true if the additional reference was
- * added successfully, or false otherwise.
- */
-bool ipa_clock_get_additional(struct ipa *ipa)
-{
-       return refcount_inc_not_zero(&ipa->clock->count);
-}
-
-/* Get an IPA clock reference.  If the reference count is non-zero, it is
- * incremented and return is immediate.  Otherwise it is checked again
- * under protection of the mutex, and if appropriate the IPA clock
- * is enabled.
- *
- * Incrementing the reference count is intentionally deferred until
- * after the clock is running and endpoints are resumed.
- */
-void ipa_clock_get(struct ipa *ipa)
-{
-       struct ipa_clock *clock = ipa->clock;
-       int ret;
-
-       /* If the clock is running, just bump the reference count */
-       if (ipa_clock_get_additional(ipa))
-               return;
-
-       /* Otherwise get the mutex and check again */
-       mutex_lock(&clock->mutex);
-
-       /* A reference might have been added before we got the mutex. */
-       if (ipa_clock_get_additional(ipa))
-               goto out_mutex_unlock;
-
-       ret = ipa_clock_enable(ipa);
-       if (ret) {
-               dev_err(&ipa->pdev->dev, "error %d enabling IPA clock\n", ret);
-               goto out_mutex_unlock;
-       }
-
-       refcount_set(&clock->count, 1);
-
-out_mutex_unlock:
-       mutex_unlock(&clock->mutex);
-}
-
-/* Attempt to remove an IPA clock reference.  If this represents the
- * last reference, disable the IPA clock under protection of the mutex.
- */
-void ipa_clock_put(struct ipa *ipa)
-{
-       struct ipa_clock *clock = ipa->clock;
-
-       /* If this is not the last reference there's nothing more to do */
-       if (!refcount_dec_and_mutex_lock(&clock->count, &clock->mutex))
-               return;
-
-       ipa_clock_disable(ipa);
-
-       mutex_unlock(&clock->mutex);
-}
-
-/* Return the current IPA core clock rate */
-u32 ipa_clock_rate(struct ipa *ipa)
-{
-       return ipa->clock ? (u32)clk_get_rate(ipa->clock->core) : 0;
-}
-
-/* Initialize IPA clocking */
-struct ipa_clock *
-ipa_clock_init(struct device *dev, const struct ipa_clock_data *data)
-{
-       struct ipa_clock *clock;
-       struct clk *clk;
-       int ret;
-
-       clk = clk_get(dev, "core");
-       if (IS_ERR(clk)) {
-               dev_err_probe(dev, PTR_ERR(clk), "error getting core clock\n");
-
-               return ERR_CAST(clk);
-       }
-
-       ret = clk_set_rate(clk, data->core_clock_rate);
-       if (ret) {
-               dev_err(dev, "error %d setting core clock rate to %u\n",
-                       ret, data->core_clock_rate);
-               goto err_clk_put;
-       }
-
-       clock = kzalloc(sizeof(*clock), GFP_KERNEL);
-       if (!clock) {
-               ret = -ENOMEM;
-               goto err_clk_put;
-       }
-       clock->core = clk;
-       clock->interconnect_count = data->interconnect_count;
-
-       ret = ipa_interconnect_init(clock, dev, data->interconnect_data);
-       if (ret)
-               goto err_kfree;
-
-       mutex_init(&clock->mutex);
-       refcount_set(&clock->count, 0);
-
-       return clock;
-
-err_kfree:
-       kfree(clock);
-err_clk_put:
-       clk_put(clk);
-
-       return ERR_PTR(ret);
-}
-
-/* Inverse of ipa_clock_init() */
-void ipa_clock_exit(struct ipa_clock *clock)
-{
-       struct clk *clk = clock->core;
-
-       WARN_ON(refcount_read(&clock->count) != 0);
-       mutex_destroy(&clock->mutex);
-       ipa_interconnect_exit(clock);
-       kfree(clock);
-       clk_put(clk);
-}
diff --git a/drivers/net/ipa/ipa_clock.h b/drivers/net/ipa/ipa_clock.h
deleted file mode 100644 (file)
index 1fe6347..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2018-2020 Linaro Ltd.
- */
-#ifndef _IPA_CLOCK_H_
-#define _IPA_CLOCK_H_
-
-struct device;
-
-struct ipa;
-struct ipa_clock_data;
-
-/**
- * ipa_clock_rate() - Return the current IPA core clock rate
- * @ipa:       IPA structure
- *
- * Return: The current clock rate (in Hz), or 0.
- */
-u32 ipa_clock_rate(struct ipa *ipa);
-
-/**
- * ipa_clock_init() - Initialize IPA clocking
- * @dev:       IPA device
- * @data:      Clock configuration data
- *
- * Return:     A pointer to an ipa_clock structure, or a pointer-coded error
- */
-struct ipa_clock *ipa_clock_init(struct device *dev,
-                                const struct ipa_clock_data *data);
-
-/**
- * ipa_clock_exit() - Inverse of ipa_clock_init()
- * @clock:     IPA clock pointer
- */
-void ipa_clock_exit(struct ipa_clock *clock);
-
-/**
- * ipa_clock_get() - Get an IPA clock reference
- * @ipa:       IPA pointer
- *
- * This call blocks if this is the first reference.
- */
-void ipa_clock_get(struct ipa *ipa);
-
-/**
- * ipa_clock_get_additional() - Get an IPA clock reference if not first
- * @ipa:       IPA pointer
- *
- * This returns immediately, and only takes a reference if not the first
- */
-bool ipa_clock_get_additional(struct ipa *ipa);
-
-/**
- * ipa_clock_put() - Drop an IPA clock reference
- * @ipa:       IPA pointer
- *
- * This drops a clock reference.  If the last reference is being dropped,
- * the clock is stopped and RX endpoints are suspended.  This call will
- * not block unless the last reference is dropped.
- */
-void ipa_clock_put(struct ipa *ipa);
-
-#endif /* _IPA_CLOCK_H_ */
index af44ca4..cff5173 100644 (file)
@@ -159,35 +159,49 @@ static void ipa_cmd_validate_build(void)
        BUILD_BUG_ON(TABLE_SIZE > field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK));
 #undef TABLE_COUNT_MAX
 #undef TABLE_SIZE
-}
 
-#ifdef IPA_VALIDATE
+       /* Hashed and non-hashed fields are assumed to be the same size */
+       BUILD_BUG_ON(field_max(IP_FLTRT_FLAGS_HASH_SIZE_FMASK) !=
+                    field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK));
+       BUILD_BUG_ON(field_max(IP_FLTRT_FLAGS_HASH_ADDR_FMASK) !=
+                    field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK));
+
+       /* Valid endpoint numbers must fit in the IP packet init command */
+       BUILD_BUG_ON(field_max(IPA_PACKET_INIT_DEST_ENDPOINT_FMASK) <
+                    IPA_ENDPOINT_MAX - 1);
+}
 
 /* Validate a memory region holding a table */
-bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem,
-                        bool route, bool ipv6, bool hashed)
+bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem, bool route)
 {
+       u32 offset_max = field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK);
+       u32 size_max = field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK);
+       const char *table = route ? "route" : "filter";
        struct device *dev = &ipa->pdev->dev;
-       u32 offset_max;
 
-       offset_max = hashed ? field_max(IP_FLTRT_FLAGS_HASH_ADDR_FMASK)
-                           : field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK);
+       /* Size must fit in the immediate command field that holds it */
+       if (mem->size > size_max) {
+               dev_err(dev, "%s table region size too large\n", table);
+               dev_err(dev, "    (0x%04x > 0x%04x)\n",
+                       mem->size, size_max);
+
+               return false;
+       }
+
+       /* Offset must fit in the immediate command field that holds it */
        if (mem->offset > offset_max ||
            ipa->mem_offset > offset_max - mem->offset) {
-               dev_err(dev, "IPv%c %s%s table region offset too large\n",
-                       ipv6 ? '6' : '4', hashed ? "hashed " : "",
-                       route ? "route" : "filter");
+               dev_err(dev, "%s table region offset too large\n", table);
                dev_err(dev, "    (0x%04x + 0x%04x > 0x%04x)\n",
                        ipa->mem_offset, mem->offset, offset_max);
 
                return false;
        }
 
+       /* Entire memory range must fit within IPA-local memory */
        if (mem->offset > ipa->mem_size ||
            mem->size > ipa->mem_size - mem->offset) {
-               dev_err(dev, "IPv%c %s%s table region out of range\n",
-                       ipv6 ? '6' : '4', hashed ? "hashed " : "",
-                       route ? "route" : "filter");
+               dev_err(dev, "%s table region out of range\n", table);
                dev_err(dev, "    (0x%04x + 0x%04x > 0x%04x)\n",
                        mem->offset, mem->size, ipa->mem_size);
 
@@ -331,7 +345,6 @@ bool ipa_cmd_data_valid(struct ipa *ipa)
        return true;
 }
 
-#endif /* IPA_VALIDATE */
 
 int ipa_cmd_pool_init(struct gsi_channel *channel, u32 tre_max)
 {
@@ -522,9 +535,6 @@ static void ipa_cmd_ip_packet_init_add(struct gsi_trans *trans, u8 endpoint_id)
        union ipa_cmd_payload *cmd_payload;
        dma_addr_t payload_addr;
 
-       /* assert(endpoint_id <
-                 field_max(IPA_PACKET_INIT_DEST_ENDPOINT_FMASK)); */
-
        cmd_payload = ipa_cmd_payload_alloc(ipa, &payload_addr);
        payload = &cmd_payload->ip_packet_init;
 
@@ -548,8 +558,9 @@ void ipa_cmd_dma_shared_mem_add(struct gsi_trans *trans, u32 offset, u16 size,
        u16 flags;
 
        /* size and offset must fit in 16 bit fields */
-       /* assert(size > 0 && size <= U16_MAX); */
-       /* assert(offset <= U16_MAX && ipa->mem_offset <= U16_MAX - offset); */
+       WARN_ON(!size);
+       WARN_ON(size > U16_MAX);
+       WARN_ON(offset > U16_MAX || ipa->mem_offset > U16_MAX - offset);
 
        offset += ipa->mem_offset;
 
@@ -588,8 +599,6 @@ static void ipa_cmd_ip_tag_status_add(struct gsi_trans *trans)
        union ipa_cmd_payload *cmd_payload;
        dma_addr_t payload_addr;
 
-       /* assert(tag <= field_max(IP_PACKET_TAG_STATUS_TAG_FMASK)); */
-
        cmd_payload = ipa_cmd_payload_alloc(ipa, &payload_addr);
        payload = &cmd_payload->ip_packet_tag_status;
 
index b992622..69cd085 100644 (file)
@@ -57,20 +57,16 @@ struct ipa_cmd_info {
        enum dma_data_direction direction;
 };
 
-#ifdef IPA_VALIDATE
-
 /**
  * ipa_cmd_table_valid() - Validate a memory region holding a table
  * @ipa:       - IPA pointer
  * @mem:       - IPA memory region descriptor
  * @route:     - Whether the region holds a route or filter table
- * @ipv6:      - Whether the table is for IPv6 or IPv4
- * @hashed:    - Whether the table is hashed or non-hashed
  *
  * Return:     true if region is valid, false otherwise
  */
 bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem,
-                           bool route, bool ipv6, bool hashed);
+                           bool route);
 
 /**
  * ipa_cmd_data_valid() - Validate command-realted configuration is valid
@@ -80,22 +76,6 @@ bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem,
  */
 bool ipa_cmd_data_valid(struct ipa *ipa);
 
-#else /* !IPA_VALIDATE */
-
-static inline bool ipa_cmd_table_valid(struct ipa *ipa,
-                                      const struct ipa_mem *mem, bool route,
-                                      bool ipv6, bool hashed)
-{
-       return true;
-}
-
-static inline bool ipa_cmd_data_valid(struct ipa *ipa)
-{
-       return true;
-}
-
-#endif /* !IPA_VALIDATE */
-
 /**
  * ipa_cmd_pool_init() - initialize command channel pools
  * @channel:   AP->IPA command TX GSI channel pointer
index 4c28189..06ddb85 100644 (file)
@@ -513,7 +513,7 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = {
 };
 
 /* Clock and interconnect configuration data for an SoC having IPA v3.1 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
        .core_clock_rate        = 16 * 1000 * 1000,     /* Hz */
        .interconnect_count     = ARRAY_SIZE(ipa_interconnect_data),
        .interconnect_data      = ipa_interconnect_data,
@@ -529,5 +529,5 @@ const struct ipa_data ipa_data_v3_1 = {
        .endpoint_data  = ipa_gsi_endpoint_data,
        .resource_data  = &ipa_resource_data,
        .mem_data       = &ipa_mem_data,
-       .clock_data     = &ipa_clock_data,
+       .power_data     = &ipa_power_data,
 };
index af536ef..760c22b 100644 (file)
@@ -394,7 +394,7 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = {
 };
 
 /* Clock and interconnect configuration data for an SoC having IPA v3.5.1 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
        .core_clock_rate        = 75 * 1000 * 1000,     /* Hz */
        .interconnect_count     = ARRAY_SIZE(ipa_interconnect_data),
        .interconnect_data      = ipa_interconnect_data,
@@ -414,5 +414,5 @@ const struct ipa_data ipa_data_v3_5_1 = {
        .endpoint_data  = ipa_gsi_endpoint_data,
        .resource_data  = &ipa_resource_data,
        .mem_data       = &ipa_mem_data,
-       .clock_data     = &ipa_clock_data,
+       .power_data     = &ipa_power_data,
 };
index 9353efb..fea9145 100644 (file)
@@ -105,6 +105,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                        .filter_support = true,
                        .config = {
                                .resource_group = IPA_RSRC_GROUP_SRC_UL_DL,
+                               .checksum       = true,
                                .qmap           = true,
                                .status_enable  = true,
                                .tx = {
@@ -128,6 +129,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                .endpoint = {
                        .config = {
                                .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL,
+                               .checksum       = true,
                                .qmap           = true,
                                .aggregation    = true,
                                .rx = {
@@ -368,24 +370,19 @@ static const struct ipa_mem_data ipa_mem_data = {
 static const struct ipa_interconnect_data ipa_interconnect_data[] = {
        {
                .name                   = "memory",
-               .peak_bandwidth         = 465000,       /* 465 MBps */
-               .average_bandwidth      = 80000,        /* 80 MBps */
-       },
-       /* Average rate is unused for the next two interconnects */
-       {
-               .name                   = "imem",
-               .peak_bandwidth         = 68570,        /* 68.57 MBps */
-               .average_bandwidth      = 80000,        /* 80 MBps (unused?) */
+               .peak_bandwidth         = 600000,       /* 600 MBps */
+               .average_bandwidth      = 150000,       /* 150 MBps */
        },
+       /* Average rate is unused for the next interconnect */
        {
                .name                   = "config",
-               .peak_bandwidth         = 30000,        /* 30 MBps */
+               .peak_bandwidth         = 74000,        /* 74 MBps */
                .average_bandwidth      = 0,            /* unused */
        },
 };
 
 /* Clock and interconnect configuration data for an SoC having IPA v4.11 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
        .core_clock_rate        = 60 * 1000 * 1000,     /* Hz */
        .interconnect_count     = ARRAY_SIZE(ipa_interconnect_data),
        .interconnect_data      = ipa_interconnect_data,
@@ -400,5 +397,5 @@ const struct ipa_data ipa_data_v4_11 = {
        .endpoint_data  = ipa_gsi_endpoint_data,
        .resource_data  = &ipa_resource_data,
        .mem_data       = &ipa_mem_data,
-       .clock_data     = &ipa_clock_data,
+       .power_data     = &ipa_power_data,
 };
index 3b09b7b..2a231e7 100644 (file)
@@ -360,7 +360,7 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = {
 };
 
 /* Clock and interconnect configuration data for an SoC having IPA v4.2 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
        .core_clock_rate        = 100 * 1000 * 1000,    /* Hz */
        .interconnect_count     = ARRAY_SIZE(ipa_interconnect_data),
        .interconnect_data      = ipa_interconnect_data,
@@ -376,5 +376,5 @@ const struct ipa_data ipa_data_v4_2 = {
        .endpoint_data  = ipa_gsi_endpoint_data,
        .resource_data  = &ipa_resource_data,
        .mem_data       = &ipa_mem_data,
-       .clock_data     = &ipa_clock_data,
+       .power_data     = &ipa_power_data,
 };
index a99b647..e62ab9c 100644 (file)
@@ -114,6 +114,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                        .filter_support = true,
                        .config = {
                                .resource_group = IPA_RSRC_GROUP_SRC_UL_DL,
+                               .checksum       = true,
                                .qmap           = true,
                                .status_enable  = true,
                                .tx = {
@@ -137,6 +138,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                .endpoint = {
                        .config = {
                                .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL,
+                               .checksum       = true,
                                .qmap           = true,
                                .aggregation    = true,
                                .rx = {
@@ -441,7 +443,7 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = {
 };
 
 /* Clock and interconnect configuration data for an SoC having IPA v4.5 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
        .core_clock_rate        = 150 * 1000 * 1000,    /* Hz (150?  60?) */
        .interconnect_count     = ARRAY_SIZE(ipa_interconnect_data),
        .interconnect_data      = ipa_interconnect_data,
@@ -456,5 +458,5 @@ const struct ipa_data ipa_data_v4_5 = {
        .endpoint_data  = ipa_gsi_endpoint_data,
        .resource_data  = &ipa_resource_data,
        .mem_data       = &ipa_mem_data,
-       .clock_data     = &ipa_clock_data,
+       .power_data     = &ipa_power_data,
 };
index 798d43e..2421b5a 100644 (file)
@@ -106,6 +106,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                        .filter_support = true,
                        .config = {
                                .resource_group = IPA_RSRC_GROUP_SRC_UL_DL,
+                               .checksum       = true,
                                .qmap           = true,
                                .status_enable  = true,
                                .tx = {
@@ -129,6 +130,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
                .endpoint = {
                        .config = {
                                .resource_group = IPA_RSRC_GROUP_DST_UL_DL_DPL,
+                               .checksum       = true,
                                .qmap           = true,
                                .aggregation    = true,
                                .rx = {
@@ -416,18 +418,13 @@ static const struct ipa_mem_data ipa_mem_data = {
 /* Interconnect rates are in 1000 byte/second units */
 static const struct ipa_interconnect_data ipa_interconnect_data[] = {
        {
-               .name                   = "ipa_to_llcc",
+               .name                   = "memory",
                .peak_bandwidth         = 600000,       /* 600 MBps */
                .average_bandwidth      = 150000,       /* 150 MBps */
        },
-       {
-               .name                   = "llcc_to_ebi1",
-               .peak_bandwidth         = 1804000,      /* 1.804 GBps */
-               .average_bandwidth      = 150000,       /* 150 MBps */
-       },
        /* Average rate is unused for the next interconnect */
        {
-               .name                   = "appss_to_ipa",
+               .name                   = "config",
                .peak_bandwidth         = 74000,        /* 74 MBps */
                .average_bandwidth      = 0,            /* unused */
        },
@@ -435,7 +432,7 @@ static const struct ipa_interconnect_data ipa_interconnect_data[] = {
 };
 
 /* Clock and interconnect configuration data for an SoC having IPA v4.9 */
-static const struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_power_data ipa_power_data = {
        .core_clock_rate        = 60 * 1000 * 1000,     /* Hz */
        .interconnect_count     = ARRAY_SIZE(ipa_interconnect_data),
        .interconnect_data      = ipa_interconnect_data,
@@ -450,5 +447,5 @@ const struct ipa_data ipa_data_v4_9 = {
        .endpoint_data  = ipa_gsi_endpoint_data,
        .resource_data  = &ipa_resource_data,
        .mem_data       = &ipa_mem_data,
-       .clock_data     = &ipa_clock_data,
+       .power_data     = &ipa_power_data,
 };
index 5bc244c..6d329e9 100644 (file)
@@ -19,7 +19,7 @@
  * IPA and GSI resources to use for a given platform.  This data is supplied
  * via the Device Tree match table, associated with a particular compatible
  * string.  The data defines information about how resources, endpoints and
- * channels, memory, clocking and so on are allocated and used for the
+ * channels, memory, power and so on are allocated and used for the
  * platform.
  *
  * Resources are data structures used internally by the IPA hardware.  The
@@ -265,12 +265,12 @@ struct ipa_interconnect_data {
 };
 
 /**
- * struct ipa_clock_data - description of IPA clock and interconnect rates
+ * struct ipa_power_data - description of IPA power configuration data
  * @core_clock_rate:   Core clock rate (Hz)
  * @interconnect_count:        Number of entries in the interconnect_data array
  * @interconnect_data: IPA interconnect configuration data
  */
-struct ipa_clock_data {
+struct ipa_power_data {
        u32 core_clock_rate;
        u32 interconnect_count;         /* # entries in interconnect_data[] */
        const struct ipa_interconnect_data *interconnect_data;
@@ -286,7 +286,7 @@ struct ipa_clock_data {
  * @endpoint_data:     IPA endpoint/GSI channel data
  * @resource_data:     IPA resource configuration data
  * @mem_data:          IPA memory region data
- * @clock_data:                IPA clock and interconnect data
+ * @power_data:                IPA power data
  */
 struct ipa_data {
        enum ipa_version version;
@@ -297,7 +297,7 @@ struct ipa_data {
        const struct ipa_gsi_endpoint_data *endpoint_data;
        const struct ipa_resource_data *resource_data;
        const struct ipa_mem_data *mem_data;
-       const struct ipa_clock_data *clock_data;
+       const struct ipa_power_data *power_data;
 };
 
 extern const struct ipa_data ipa_data_v3_1;
index ab02669..5528d97 100644 (file)
@@ -21,7 +21,7 @@
 #include "ipa_modem.h"
 #include "ipa_table.h"
 #include "ipa_gsi.h"
-#include "ipa_clock.h"
+#include "ipa_power.h"
 
 #define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0)
 
@@ -250,17 +250,18 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay)
 
        /* Suspend is not supported for IPA v4.0+.  Delay doesn't work
         * correctly on IPA v4.2.
-        *
-        * if (endpoint->toward_ipa)
-        *      assert(ipa->version != IPA_VERSION_4.2);
-        * else
-        *      assert(ipa->version < IPA_VERSION_4_0);
         */
+       if (endpoint->toward_ipa)
+               WARN_ON(ipa->version == IPA_VERSION_4_2);
+       else
+               WARN_ON(ipa->version >= IPA_VERSION_4_0);
+
        mask = endpoint->toward_ipa ? ENDP_DELAY_FMASK : ENDP_SUSPEND_FMASK;
 
        val = ioread32(ipa->reg_virt + offset);
-       /* Don't bother if it's already in the requested state */
        state = !!(val & mask);
+
+       /* Don't bother if it's already in the requested state */
        if (suspend_delay != state) {
                val ^= mask;
                iowrite32(val, ipa->reg_virt + offset);
@@ -273,7 +274,7 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay)
 static void
 ipa_endpoint_program_delay(struct ipa_endpoint *endpoint, bool enable)
 {
-       /* assert(endpoint->toward_ipa); */
+       WARN_ON(!endpoint->toward_ipa);
 
        /* Delay mode doesn't work properly for IPA v4.2 */
        if (endpoint->ipa->version != IPA_VERSION_4_2)
@@ -287,7 +288,8 @@ static bool ipa_endpoint_aggr_active(struct ipa_endpoint *endpoint)
        u32 offset;
        u32 val;
 
-       /* assert(mask & ipa->available); */
+       WARN_ON(!(mask & ipa->available));
+
        offset = ipa_reg_state_aggr_active_offset(ipa->version);
        val = ioread32(ipa->reg_virt + offset);
 
@@ -299,7 +301,8 @@ static void ipa_endpoint_force_close(struct ipa_endpoint *endpoint)
        u32 mask = BIT(endpoint->endpoint_id);
        struct ipa *ipa = endpoint->ipa;
 
-       /* assert(mask & ipa->available); */
+       WARN_ON(!(mask & ipa->available));
+
        iowrite32(mask, ipa->reg_virt + IPA_REG_AGGR_FORCE_CLOSE_OFFSET);
 }
 
@@ -338,7 +341,7 @@ ipa_endpoint_program_suspend(struct ipa_endpoint *endpoint, bool enable)
        if (endpoint->ipa->version >= IPA_VERSION_4_0)
                return enable;  /* For IPA v4.0+, no change made */
 
-       /* assert(!endpoint->toward_ipa); */
+       WARN_ON(endpoint->toward_ipa);
 
        suspended = ipa_endpoint_init_ctrl(endpoint, enable);
 
@@ -807,7 +810,7 @@ static u32 hol_block_timer_val(struct ipa *ipa, u32 microseconds)
                return hol_block_timer_qtime_val(ipa, microseconds);
 
        /* Use 64 bit arithmetic to avoid overflow... */
-       rate = ipa_clock_rate(ipa);
+       rate = ipa_core_clock_rate(ipa);
        ticks = DIV_ROUND_CLOSEST(microseconds * rate, 128 * USEC_PER_SEC);
        /* ...but we still need to fit into a 32-bit register */
        WARN_ON(ticks > U32_MAX);
@@ -1156,7 +1159,8 @@ static bool ipa_endpoint_skb_build(struct ipa_endpoint *endpoint,
        if (!endpoint->netdev)
                return false;
 
-       /* assert(len <= SKB_WITH_OVERHEAD(IPA_RX_BUFFER_SIZE-NET_SKB_PAD)); */
+       WARN_ON(len > SKB_WITH_OVERHEAD(IPA_RX_BUFFER_SIZE - NET_SKB_PAD));
+
        skb = build_skb(page_address(page), IPA_RX_BUFFER_SIZE);
        if (skb) {
                /* Reserve the headroom and account for the data */
@@ -1583,7 +1587,6 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
 {
        struct device *dev = &endpoint->ipa->pdev->dev;
        struct gsi *gsi = &endpoint->ipa->gsi;
-       bool stop_channel;
        int ret;
 
        if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id)))
@@ -1594,11 +1597,7 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
                (void)ipa_endpoint_program_suspend(endpoint, true);
        }
 
-       /* Starting with IPA v4.0, endpoints are suspended by stopping the
-        * underlying GSI channel rather than using endpoint suspend mode.
-        */
-       stop_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
-       ret = gsi_channel_suspend(gsi, endpoint->channel_id, stop_channel);
+       ret = gsi_channel_suspend(gsi, endpoint->channel_id);
        if (ret)
                dev_err(dev, "error %d suspending channel %u\n", ret,
                        endpoint->channel_id);
@@ -1608,7 +1607,6 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
 {
        struct device *dev = &endpoint->ipa->pdev->dev;
        struct gsi *gsi = &endpoint->ipa->gsi;
-       bool start_channel;
        int ret;
 
        if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id)))
@@ -1617,11 +1615,7 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
        if (!endpoint->toward_ipa)
                (void)ipa_endpoint_program_suspend(endpoint, false);
 
-       /* Starting with IPA v4.0, the underlying GSI channel must be
-        * restarted for resume.
-        */
-       start_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
-       ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel);
+       ret = gsi_channel_resume(gsi, endpoint->channel_id);
        if (ret)
                dev_err(dev, "error %d resuming channel %u\n", ret,
                        endpoint->channel_id);
index c46df0b..b35170a 100644 (file)
@@ -21,9 +21,9 @@
 
 #include <linux/types.h>
 #include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
 
 #include "ipa.h"
-#include "ipa_clock.h"
 #include "ipa_reg.h"
 #include "ipa_endpoint.h"
 #include "ipa_interrupt.h"
@@ -74,21 +74,30 @@ static void ipa_interrupt_process(struct ipa_interrupt *interrupt, u32 irq_id)
                iowrite32(mask, ipa->reg_virt + offset);
 }
 
-/* Process all IPA interrupt types that have been signaled */
-static void ipa_interrupt_process_all(struct ipa_interrupt *interrupt)
+/* IPA IRQ handler is threaded */
+static irqreturn_t ipa_isr_thread(int irq, void *dev_id)
 {
+       struct ipa_interrupt *interrupt = dev_id;
        struct ipa *ipa = interrupt->ipa;
        u32 enabled = interrupt->enabled;
+       struct device *dev;
+       u32 pending;
        u32 offset;
        u32 mask;
+       int ret;
+
+       dev = &ipa->pdev->dev;
+       ret = pm_runtime_get_sync(dev);
+       if (WARN_ON(ret < 0))
+               goto out_power_put;
 
        /* The status register indicates which conditions are present,
         * including conditions whose interrupt is not enabled.  Handle
         * only the enabled ones.
         */
        offset = ipa_reg_irq_stts_offset(ipa->version);
-       mask = ioread32(ipa->reg_virt + offset);
-       while ((mask &= enabled)) {
+       pending = ioread32(ipa->reg_virt + offset);
+       while ((mask = pending & enabled)) {
                do {
                        u32 irq_id = __ffs(mask);
 
@@ -96,43 +105,19 @@ static void ipa_interrupt_process_all(struct ipa_interrupt *interrupt)
 
                        ipa_interrupt_process(interrupt, irq_id);
                } while (mask);
-               mask = ioread32(ipa->reg_virt + offset);
+               pending = ioread32(ipa->reg_virt + offset);
        }
-}
-
-/* Threaded part of the IPA IRQ handler */
-static irqreturn_t ipa_isr_thread(int irq, void *dev_id)
-{
-       struct ipa_interrupt *interrupt = dev_id;
-
-       ipa_clock_get(interrupt->ipa);
-
-       ipa_interrupt_process_all(interrupt);
-
-       ipa_clock_put(interrupt->ipa);
-
-       return IRQ_HANDLED;
-}
-
-/* Hard part (i.e., "real" IRQ handler) of the IRQ handler */
-static irqreturn_t ipa_isr(int irq, void *dev_id)
-{
-       struct ipa_interrupt *interrupt = dev_id;
-       struct ipa *ipa = interrupt->ipa;
-       u32 offset;
-       u32 mask;
 
-       offset = ipa_reg_irq_stts_offset(ipa->version);
-       mask = ioread32(ipa->reg_virt + offset);
-       if (mask & interrupt->enabled)
-               return IRQ_WAKE_THREAD;
-
-       /* Nothing in the mask was supposed to cause an interrupt */
-       offset = ipa_reg_irq_clr_offset(ipa->version);
-       iowrite32(mask, ipa->reg_virt + offset);
-
-       dev_err(&ipa->pdev->dev, "%s: unexpected interrupt, mask 0x%08x\n",
-               __func__, mask);
+       /* If any disabled interrupts are pending, clear them */
+       if (pending) {
+               dev_dbg(dev, "clearing disabled IPA interrupts 0x%08x\n",
+                       pending);
+               offset = ipa_reg_irq_clr_offset(ipa->version);
+               iowrite32(pending, ipa->reg_virt + offset);
+       }
+out_power_put:
+       pm_runtime_mark_last_busy(dev);
+       (void)pm_runtime_put_autosuspend(dev);
 
        return IRQ_HANDLED;
 }
@@ -146,7 +131,7 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt,
        u32 offset;
        u32 val;
 
-       /* assert(mask & ipa->available); */
+       WARN_ON(!(mask & ipa->available));
 
        /* IPA version 3.0 does not support TX_SUSPEND interrupt control */
        if (ipa->version == IPA_VERSION_3_0)
@@ -206,7 +191,8 @@ void ipa_interrupt_add(struct ipa_interrupt *interrupt,
        struct ipa *ipa = interrupt->ipa;
        u32 offset;
 
-       /* assert(ipa_irq < IPA_IRQ_COUNT); */
+       WARN_ON(ipa_irq >= IPA_IRQ_COUNT);
+
        interrupt->handler[ipa_irq] = handler;
 
        /* Update the IPA interrupt mask to enable it */
@@ -222,7 +208,8 @@ ipa_interrupt_remove(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq)
        struct ipa *ipa = interrupt->ipa;
        u32 offset;
 
-       /* assert(ipa_irq < IPA_IRQ_COUNT); */
+       WARN_ON(ipa_irq >= IPA_IRQ_COUNT);
+
        /* Update the IPA interrupt mask to disable it */
        interrupt->enabled &= ~BIT(ipa_irq);
        offset = ipa_reg_irq_en_offset(ipa->version);
@@ -231,8 +218,8 @@ ipa_interrupt_remove(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq)
        interrupt->handler[ipa_irq] = NULL;
 }
 
-/* Set up the IPA interrupt framework */
-struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa)
+/* Configure the IPA interrupt framework */
+struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa)
 {
        struct device *dev = &ipa->pdev->dev;
        struct ipa_interrupt *interrupt;
@@ -258,7 +245,7 @@ struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa)
        offset = ipa_reg_irq_en_offset(ipa->version);
        iowrite32(0, ipa->reg_virt + offset);
 
-       ret = request_threaded_irq(irq, ipa_isr, ipa_isr_thread, IRQF_ONESHOT,
+       ret = request_threaded_irq(irq, NULL, ipa_isr_thread, IRQF_ONESHOT,
                                   "ipa", interrupt);
        if (ret) {
                dev_err(dev, "error %d requesting \"ipa\" IRQ\n", ret);
@@ -281,8 +268,8 @@ err_kfree:
        return ERR_PTR(ret);
 }
 
-/* Tear down the IPA interrupt framework */
-void ipa_interrupt_teardown(struct ipa_interrupt *interrupt)
+/* Inverse of ipa_interrupt_config() */
+void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt)
 {
        struct device *dev = &interrupt->ipa->pdev->dev;
        int ret;
index d5c486a..231390c 100644 (file)
@@ -86,17 +86,17 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt);
 void ipa_interrupt_simulate_suspend(struct ipa_interrupt *interrupt);
 
 /**
- * ipa_interrupt_setup() - Set up the IPA interrupt framework
+ * ipa_interrupt_config() - Configure the IPA interrupt framework
  * @ipa:       IPA pointer
  *
  * Return:     Pointer to IPA SMP2P info, or a pointer-coded error
  */
-struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa);
+struct ipa_interrupt *ipa_interrupt_config(struct ipa *ipa);
 
 /**
- * ipa_interrupt_teardown() - Tear down the IPA interrupt framework
+ * ipa_interrupt_deconfig() - Inverse of ipa_interrupt_config()
  * @interrupt: IPA interrupt structure
  */
-void ipa_interrupt_teardown(struct ipa_interrupt *interrupt);
+void ipa_interrupt_deconfig(struct ipa_interrupt *interrupt);
 
 #endif /* _IPA_INTERRUPT_H_ */
index 9810c61..cdfa98a 100644 (file)
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_address.h>
+#include <linux/pm_runtime.h>
 #include <linux/qcom_scm.h>
 #include <linux/soc/qcom/mdt_loader.h>
 
 #include "ipa.h"
-#include "ipa_clock.h"
+#include "ipa_power.h"
 #include "ipa_data.h"
 #include "ipa_endpoint.h"
 #include "ipa_resource.h"
 /* Divider for 19.2 MHz crystal oscillator clock to get common timer clock */
 #define IPA_XO_CLOCK_DIVIDER   192     /* 1 is subtracted where used */
 
-/**
- * ipa_suspend_handler() - Handle the suspend IPA interrupt
- * @ipa:       IPA pointer
- * @irq_id:    IPA interrupt type (unused)
- *
- * If an RX endpoint is in suspend state, and the IPA has a packet
- * destined for that endpoint, the IPA generates a SUSPEND interrupt
- * to inform the AP that it should resume the endpoint.  If we get
- * one of these interrupts we just resume everything.
- */
-static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
-{
-       /* Just report the event, and let system resume handle the rest.
-        * More than one endpoint could signal this; if so, ignore
-        * all but the first.
-        */
-       if (!test_and_set_bit(IPA_FLAG_RESUMED, ipa->flags))
-               pm_wakeup_dev_event(&ipa->pdev->dev, 0, true);
-
-       /* Acknowledge/clear the suspend interrupt on all endpoints */
-       ipa_interrupt_suspend_clear_all(ipa->interrupt);
-}
-
 /**
  * ipa_setup() - Set up IPA hardware
  * @ipa:       IPA pointer
@@ -124,19 +102,9 @@ int ipa_setup(struct ipa *ipa)
        if (ret)
                return ret;
 
-       ipa->interrupt = ipa_interrupt_setup(ipa);
-       if (IS_ERR(ipa->interrupt)) {
-               ret = PTR_ERR(ipa->interrupt);
-               goto err_gsi_teardown;
-       }
-       ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND,
-                         ipa_suspend_handler);
-
-       ipa_uc_setup(ipa);
-
-       ret = device_init_wakeup(dev, true);
+       ret = ipa_power_setup(ipa);
        if (ret)
-               goto err_uc_teardown;
+               goto err_gsi_teardown;
 
        ipa_endpoint_setup(ipa);
 
@@ -167,7 +135,7 @@ int ipa_setup(struct ipa *ipa)
        ipa_endpoint_default_route_set(ipa, exception_endpoint->endpoint_id);
 
        /* We're all set.  Now prepare for communication with the modem */
-       ret = ipa_modem_setup(ipa);
+       ret = ipa_qmi_setup(ipa);
        if (ret)
                goto err_default_route_clear;
 
@@ -184,11 +152,7 @@ err_command_disable:
        ipa_endpoint_disable_one(command_endpoint);
 err_endpoint_teardown:
        ipa_endpoint_teardown(ipa);
-       (void)device_init_wakeup(dev, false);
-err_uc_teardown:
-       ipa_uc_teardown(ipa);
-       ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
-       ipa_interrupt_teardown(ipa->interrupt);
+       ipa_power_teardown(ipa);
 err_gsi_teardown:
        gsi_teardown(&ipa->gsi);
 
@@ -204,17 +168,17 @@ static void ipa_teardown(struct ipa *ipa)
        struct ipa_endpoint *exception_endpoint;
        struct ipa_endpoint *command_endpoint;
 
-       ipa_modem_teardown(ipa);
+       /* We're going to tear everything down, as if setup never completed */
+       ipa->setup_complete = false;
+
+       ipa_qmi_teardown(ipa);
        ipa_endpoint_default_route_clear(ipa);
        exception_endpoint = ipa->name_map[IPA_ENDPOINT_AP_LAN_RX];
        ipa_endpoint_disable_one(exception_endpoint);
        command_endpoint = ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX];
        ipa_endpoint_disable_one(command_endpoint);
        ipa_endpoint_teardown(ipa);
-       (void)device_init_wakeup(&ipa->pdev->dev, false);
-       ipa_uc_teardown(ipa);
-       ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
-       ipa_interrupt_teardown(ipa->interrupt);
+       ipa_power_teardown(ipa);
        gsi_teardown(&ipa->gsi);
 }
 
@@ -253,9 +217,6 @@ ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data)
        const struct ipa_qsb_data *data1;
        u32 val;
 
-       /* assert(data->qsb_count > 0); */
-       /* assert(data->qsb_count < 3); */
-
        /* QMB 0 represents DDR; QMB 1 (if present) represents PCIe */
        data0 = &data->qsb_data[IPA_QSB_MASTER_DDR];
        if (data->qsb_count > 1)
@@ -289,12 +250,11 @@ ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data)
 /* Compute the value to use in the COUNTER_CFG register AGGR_GRANULARITY
  * field to represent the given number of microseconds.  The value is one
  * less than the number of timer ticks in the requested period.  0 is not
- * a valid granularity value.
+ * a valid granularity value (so for example @usec must be at least 16 for
+ * a TIMER_FREQUENCY of 32000).
  */
-static u32 ipa_aggr_granularity_val(u32 usec)
+static __always_inline u32 ipa_aggr_granularity_val(u32 usec)
 {
-       /* assert(usec != 0); */
-
        return DIV_ROUND_CLOSEST(usec * TIMER_FREQUENCY, USEC_PER_SEC) - 1;
 }
 
@@ -366,8 +326,8 @@ static void ipa_idle_indication_cfg(struct ipa *ipa,
  * @ipa:       IPA pointer
  *
  * Configures when the IPA signals it is idle to the global clock
- * controller, which can respond by scalling down the clock to
- * save power.
+ * controller, which can respond by scaling down the clock to save
+ * power.
  */
 static void ipa_hardware_dcd_config(struct ipa *ipa)
 {
@@ -457,48 +417,54 @@ static void ipa_hardware_deconfig(struct ipa *ipa)
  * @ipa:       IPA pointer
  * @data:      IPA configuration data
  *
- * Perform initialization requiring IPA clock to be enabled.
+ * Perform initialization requiring IPA power to be enabled.
  */
 static int ipa_config(struct ipa *ipa, const struct ipa_data *data)
 {
        int ret;
 
-       /* Get a clock reference to allow initialization.  This reference
-        * is held after initialization completes, and won't get dropped
-        * unless/until a system suspend request arrives.
-        */
-       ipa_clock_get(ipa);
-
        ipa_hardware_config(ipa, data);
 
-       ret = ipa_endpoint_config(ipa);
+       ret = ipa_mem_config(ipa);
        if (ret)
                goto err_hardware_deconfig;
 
-       ret = ipa_mem_config(ipa);
+       ipa->interrupt = ipa_interrupt_config(ipa);
+       if (IS_ERR(ipa->interrupt)) {
+               ret = PTR_ERR(ipa->interrupt);
+               ipa->interrupt = NULL;
+               goto err_mem_deconfig;
+       }
+
+       ipa_uc_config(ipa);
+
+       ret = ipa_endpoint_config(ipa);
        if (ret)
-               goto err_endpoint_deconfig;
+               goto err_uc_deconfig;
 
        ipa_table_config(ipa);          /* No deconfig required */
 
        /* Assign resource limitation to each group; no deconfig required */
        ret = ipa_resource_config(ipa, data->resource_data);
        if (ret)
-               goto err_mem_deconfig;
+               goto err_endpoint_deconfig;
 
        ret = ipa_modem_config(ipa);
        if (ret)
-               goto err_mem_deconfig;
+               goto err_endpoint_deconfig;
 
        return 0;
 
-err_mem_deconfig:
-       ipa_mem_deconfig(ipa);
 err_endpoint_deconfig:
        ipa_endpoint_deconfig(ipa);
+err_uc_deconfig:
+       ipa_uc_deconfig(ipa);
+       ipa_interrupt_deconfig(ipa->interrupt);
+       ipa->interrupt = NULL;
+err_mem_deconfig:
+       ipa_mem_deconfig(ipa);
 err_hardware_deconfig:
        ipa_hardware_deconfig(ipa);
-       ipa_clock_put(ipa);
 
        return ret;
 }
@@ -510,10 +476,12 @@ err_hardware_deconfig:
 static void ipa_deconfig(struct ipa *ipa)
 {
        ipa_modem_deconfig(ipa);
-       ipa_mem_deconfig(ipa);
        ipa_endpoint_deconfig(ipa);
+       ipa_uc_deconfig(ipa);
+       ipa_interrupt_deconfig(ipa->interrupt);
+       ipa->interrupt = NULL;
+       ipa_mem_deconfig(ipa);
        ipa_hardware_deconfig(ipa);
-       ipa_clock_put(ipa);
 }
 
 static int ipa_firmware_load(struct device *dev)
@@ -612,7 +580,6 @@ MODULE_DEVICE_TABLE(of, ipa_match);
  * */
 static void ipa_validate_build(void)
 {
-#ifdef IPA_VALIDATE
        /* At one time we assumed a 64-bit build, allowing some do_div()
         * calls to be replaced by simple division or modulo operations.
         * We currently only perform divide and modulo operations on u32,
@@ -646,7 +613,6 @@ static void ipa_validate_build(void)
        BUILD_BUG_ON(!ipa_aggr_granularity_val(IPA_AGGR_GRANULARITY));
        BUILD_BUG_ON(ipa_aggr_granularity_val(IPA_AGGR_GRANULARITY) >
                        field_max(AGGR_GRANULARITY_FMASK));
-#endif /* IPA_VALIDATE */
 }
 
 static bool ipa_version_valid(enum ipa_version version)
@@ -681,7 +647,7 @@ static bool ipa_version_valid(enum ipa_version version)
  * in several stages:
  *   - The "init" stage involves activities that can be initialized without
  *     access to the IPA hardware.
- *   - The "config" stage requires the IPA clock to be active so IPA registers
+ *   - The "config" stage requires IPA power to be active so IPA registers
  *     can be accessed, but does not require the use of IPA immediate commands.
  *   - The "setup" stage uses IPA immediate commands, and so requires the GSI
  *     layer to be initialized.
@@ -697,14 +663,14 @@ static int ipa_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        const struct ipa_data *data;
-       struct ipa_clock *clock;
+       struct ipa_power *power;
        bool modem_init;
        struct ipa *ipa;
        int ret;
 
        ipa_validate_build();
 
-       /* Get configuration data early; needed for clock initialization */
+       /* Get configuration data early; needed for power initialization */
        data = of_device_get_match_data(dev);
        if (!data) {
                dev_err(dev, "matched hardware not supported\n");
@@ -725,20 +691,20 @@ static int ipa_probe(struct platform_device *pdev)
        /* The clock and interconnects might not be ready when we're
         * probed, so might return -EPROBE_DEFER.
         */
-       clock = ipa_clock_init(dev, data->clock_data);
-       if (IS_ERR(clock))
-               return PTR_ERR(clock);
+       power = ipa_power_init(dev, data->power_data);
+       if (IS_ERR(power))
+               return PTR_ERR(power);
 
        /* No more EPROBE_DEFER.  Allocate and initialize the IPA structure */
        ipa = kzalloc(sizeof(*ipa), GFP_KERNEL);
        if (!ipa) {
                ret = -ENOMEM;
-               goto err_clock_exit;
+               goto err_power_exit;
        }
 
        ipa->pdev = pdev;
        dev_set_drvdata(dev, ipa);
-       ipa->clock = clock;
+       ipa->power = power;
        ipa->version = data->version;
        init_completion(&ipa->completion);
 
@@ -771,18 +737,23 @@ static int ipa_probe(struct platform_device *pdev)
        if (ret)
                goto err_table_exit;
 
+       /* Power needs to be active for config and setup */
+       ret = pm_runtime_get_sync(dev);
+       if (WARN_ON(ret < 0))
+               goto err_power_put;
+
        ret = ipa_config(ipa, data);
        if (ret)
-               goto err_modem_exit;
+               goto err_power_put;
 
        dev_info(dev, "IPA driver initialized");
 
        /* If the modem is doing early initialization, it will trigger a
-        * call to ipa_setup() call when it has finished.  In that case
-        * we're done here.
+        * call to ipa_setup() when it has finished.  In that case we're
+        * done here.
         */
        if (modem_init)
-               return 0;
+               goto done;
 
        /* Otherwise we need to load the firmware and have Trust Zone validate
         * and install it.  If that succeeds we can proceed with setup.
@@ -794,12 +765,16 @@ static int ipa_probe(struct platform_device *pdev)
        ret = ipa_setup(ipa);
        if (ret)
                goto err_deconfig;
+done:
+       pm_runtime_mark_last_busy(dev);
+       (void)pm_runtime_put_autosuspend(dev);
 
        return 0;
 
 err_deconfig:
        ipa_deconfig(ipa);
-err_modem_exit:
+err_power_put:
+       pm_runtime_put_noidle(dev);
        ipa_modem_exit(ipa);
 err_table_exit:
        ipa_table_exit(ipa);
@@ -813,8 +788,8 @@ err_reg_exit:
        ipa_reg_exit(ipa);
 err_kfree_ipa:
        kfree(ipa);
-err_clock_exit:
-       ipa_clock_exit(clock);
+err_power_exit:
+       ipa_power_exit(power);
 
        return ret;
 }
@@ -822,9 +797,14 @@ err_clock_exit:
 static int ipa_remove(struct platform_device *pdev)
 {
        struct ipa *ipa = dev_get_drvdata(&pdev->dev);
-       struct ipa_clock *clock = ipa->clock;
+       struct ipa_power *power = ipa->power;
+       struct device *dev = &pdev->dev;
        int ret;
 
+       ret = pm_runtime_get_sync(dev);
+       if (WARN_ON(ret < 0))
+               goto out_power_put;
+
        if (ipa->setup_complete) {
                ret = ipa_modem_stop(ipa);
                /* If starting or stopping is in progress, try once more */
@@ -839,6 +819,8 @@ static int ipa_remove(struct platform_device *pdev)
        }
 
        ipa_deconfig(ipa);
+out_power_put:
+       pm_runtime_put_noidle(dev);
        ipa_modem_exit(ipa);
        ipa_table_exit(ipa);
        ipa_endpoint_exit(ipa);
@@ -846,7 +828,7 @@ static int ipa_remove(struct platform_device *pdev)
        ipa_mem_exit(ipa);
        ipa_reg_exit(ipa);
        kfree(ipa);
-       ipa_clock_exit(clock);
+       ipa_power_exit(power);
 
        return 0;
 }
@@ -860,62 +842,6 @@ static void ipa_shutdown(struct platform_device *pdev)
                dev_err(&pdev->dev, "shutdown: remove returned %d\n", ret);
 }
 
-/**
- * ipa_suspend() - Power management system suspend callback
- * @dev:       IPA device structure
- *
- * Return:     Always returns zero
- *
- * Called by the PM framework when a system suspend operation is invoked.
- * Suspends endpoints and releases the clock reference held to keep
- * the IPA clock running until this point.
- */
-static int ipa_suspend(struct device *dev)
-{
-       struct ipa *ipa = dev_get_drvdata(dev);
-
-       /* When a suspended RX endpoint has a packet ready to receive, we
-        * get an IPA SUSPEND interrupt.  We trigger a system resume in
-        * that case, but only on the first such interrupt since suspend.
-        */
-       __clear_bit(IPA_FLAG_RESUMED, ipa->flags);
-
-       ipa_endpoint_suspend(ipa);
-
-       ipa_clock_put(ipa);
-
-       return 0;
-}
-
-/**
- * ipa_resume() - Power management system resume callback
- * @dev:       IPA device structure
- *
- * Return:     Always returns 0
- *
- * Called by the PM framework when a system resume operation is invoked.
- * Takes an IPA clock reference to keep the clock running until suspend,
- * and resumes endpoints.
- */
-static int ipa_resume(struct device *dev)
-{
-       struct ipa *ipa = dev_get_drvdata(dev);
-
-       /* This clock reference will keep the IPA out of suspend
-        * until we get a power management suspend request.
-        */
-       ipa_clock_get(ipa);
-
-       ipa_endpoint_resume(ipa);
-
-       return 0;
-}
-
-static const struct dev_pm_ops ipa_pm_ops = {
-       .suspend        = ipa_suspend,
-       .resume         = ipa_resume,
-};
-
 static const struct attribute_group *ipa_attribute_groups[] = {
        &ipa_attribute_group,
        &ipa_feature_attribute_group,
index af9aedb..ad116bc 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/if_rmnet.h>
+#include <linux/pm_runtime.h>
 #include <linux/remoteproc/qcom_rproc.h>
 
 #include "ipa.h"
@@ -19,6 +20,8 @@
 #include "ipa_modem.h"
 #include "ipa_smp2p.h"
 #include "ipa_qmi.h"
+#include "ipa_uc.h"
+#include "ipa_power.h"
 
 #define IPA_NETDEV_NAME                "rmnet_ipa%d"
 #define IPA_NETDEV_TAILROOM    0       /* for padding by mux layer */
@@ -31,9 +34,14 @@ enum ipa_modem_state {
        IPA_MODEM_STATE_STOPPING,
 };
 
-/** struct ipa_priv - IPA network device private data */
+/**
+ * struct ipa_priv - IPA network device private data
+ * @ipa:       IPA pointer
+ * @work:      Work structure used to wake the modem netdev TX queue
+ */
 struct ipa_priv {
        struct ipa *ipa;
+       struct work_struct work;
 };
 
 /** ipa_open() - Opens the modem network interface */
@@ -41,21 +49,33 @@ static int ipa_open(struct net_device *netdev)
 {
        struct ipa_priv *priv = netdev_priv(netdev);
        struct ipa *ipa = priv->ipa;
+       struct device *dev;
        int ret;
 
+       dev = &ipa->pdev->dev;
+       ret = pm_runtime_get_sync(dev);
+       if (ret < 0)
+               goto err_power_put;
+
        ret = ipa_endpoint_enable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
        if (ret)
-               return ret;
+               goto err_power_put;
+
        ret = ipa_endpoint_enable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
        if (ret)
                goto err_disable_tx;
 
        netif_start_queue(netdev);
 
+       pm_runtime_mark_last_busy(dev);
+       (void)pm_runtime_put_autosuspend(dev);
+
        return 0;
 
 err_disable_tx:
        ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
+err_power_put:
+       pm_runtime_put_noidle(dev);
 
        return ret;
 }
@@ -65,11 +85,21 @@ static int ipa_stop(struct net_device *netdev)
 {
        struct ipa_priv *priv = netdev_priv(netdev);
        struct ipa *ipa = priv->ipa;
+       struct device *dev;
+       int ret;
+
+       dev = &ipa->pdev->dev;
+       ret = pm_runtime_get_sync(dev);
+       if (ret < 0)
+               goto out_power_put;
 
        netif_stop_queue(netdev);
 
        ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
        ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
+out_power_put:
+       pm_runtime_mark_last_busy(dev);
+       (void)pm_runtime_put_autosuspend(dev);
 
        return 0;
 }
@@ -82,13 +112,15 @@ static int ipa_stop(struct net_device *netdev)
  * NETDEV_TX_OK: Success
  * NETDEV_TX_BUSY: Error while transmitting the skb. Try again later
  */
-static int ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t
+ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
        struct net_device_stats *stats = &netdev->stats;
        struct ipa_priv *priv = netdev_priv(netdev);
        struct ipa_endpoint *endpoint;
        struct ipa *ipa = priv->ipa;
        u32 skb_len = skb->len;
+       struct device *dev;
        int ret;
 
        if (!skb_len)
@@ -98,7 +130,35 @@ static int ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev)
        if (endpoint->data->qmap && skb->protocol != htons(ETH_P_MAP))
                goto err_drop_skb;
 
+       /* The hardware must be powered for us to transmit */
+       dev = &ipa->pdev->dev;
+       ret = pm_runtime_get(dev);
+       if (ret < 1) {
+               /* If a resume won't happen, just drop the packet */
+               if (ret < 0 && ret != -EINPROGRESS) {
+                       ipa_power_modem_queue_active(ipa);
+                       pm_runtime_put_noidle(dev);
+                       goto err_drop_skb;
+               }
+
+               /* No power (yet).  Stop the network stack from transmitting
+                * until we're resumed; ipa_modem_resume() arranges for the
+                * TX queue to be started again.
+                */
+               ipa_power_modem_queue_stop(ipa);
+
+               pm_runtime_put_noidle(dev);
+
+               return NETDEV_TX_BUSY;
+       }
+
+       ipa_power_modem_queue_active(ipa);
+
        ret = ipa_endpoint_skb_tx(endpoint, skb);
+
+       pm_runtime_mark_last_busy(dev);
+       (void)pm_runtime_put_autosuspend(dev);
+
        if (ret) {
                if (ret != -E2BIG)
                        return NETDEV_TX_BUSY;
@@ -169,12 +229,31 @@ void ipa_modem_suspend(struct net_device *netdev)
        struct ipa_priv *priv = netdev_priv(netdev);
        struct ipa *ipa = priv->ipa;
 
-       netif_stop_queue(netdev);
+       if (!(netdev->flags & IFF_UP))
+               return;
 
        ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
        ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
 }
 
+/**
+ * ipa_modem_wake_queue_work() - enable modem netdev queue
+ * @work:      Work structure
+ *
+ * Re-enable transmit on the modem network device.  This is called
+ * in (power management) work queue context, scheduled when resuming
+ * the modem.  We can't enable the queue directly in ipa_modem_resume()
+ * because transmits restart the instant the queue is awakened; but the
+ * device power state won't be ACTIVE until *after* ipa_modem_resume()
+ * returns.
+ */
+static void ipa_modem_wake_queue_work(struct work_struct *work)
+{
+       struct ipa_priv *priv = container_of(work, struct ipa_priv, work);
+
+       ipa_power_modem_queue_wake(priv->ipa);
+}
+
 /** ipa_modem_resume() - resume callback for runtime_pm
  * @dev: pointer to device
  *
@@ -185,10 +264,14 @@ void ipa_modem_resume(struct net_device *netdev)
        struct ipa_priv *priv = netdev_priv(netdev);
        struct ipa *ipa = priv->ipa;
 
+       if (!(netdev->flags & IFF_UP))
+               return;
+
        ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
        ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
 
-       netif_wake_queue(netdev);
+       /* Arrange for the TX queue to be restarted */
+       (void)queue_pm_work(&priv->work);
 }
 
 int ipa_modem_start(struct ipa *ipa)
@@ -216,13 +299,16 @@ int ipa_modem_start(struct ipa *ipa)
        SET_NETDEV_DEV(netdev, &ipa->pdev->dev);
        priv = netdev_priv(netdev);
        priv->ipa = ipa;
+       INIT_WORK(&priv->work, ipa_modem_wake_queue_work);
+       ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
+       ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
+       ipa->modem_netdev = netdev;
 
        ret = register_netdev(netdev);
-       if (!ret) {
-               ipa->modem_netdev = netdev;
-               ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
-               ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
-       } else {
+       if (ret) {
+               ipa->modem_netdev = NULL;
+               ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
+               ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
                free_netdev(netdev);
        }
 
@@ -256,13 +342,18 @@ int ipa_modem_stop(struct ipa *ipa)
        /* Prevent the modem from triggering a call to ipa_setup() */
        ipa_smp2p_disable(ipa);
 
-       /* Stop the queue and disable the endpoints if it's open */
+       /* Clean up the netdev and endpoints if it was started */
        if (netdev) {
-               (void)ipa_stop(netdev);
+               struct ipa_priv *priv = netdev_priv(netdev);
+
+               cancel_work_sync(&priv->work);
+               /* If it was opened, stop it first */
+               if (netdev->flags & IFF_UP)
+                       (void)ipa_stop(netdev);
+               unregister_netdev(netdev);
+               ipa->modem_netdev = NULL;
                ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
                ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
-               ipa->modem_netdev = NULL;
-               unregister_netdev(netdev);
                free_netdev(netdev);
        }
 
@@ -278,6 +369,12 @@ static void ipa_modem_crashed(struct ipa *ipa)
        struct device *dev = &ipa->pdev->dev;
        int ret;
 
+       ret = pm_runtime_get_sync(dev);
+       if (ret < 0) {
+               dev_err(dev, "error %d getting power to handle crash\n", ret);
+               goto out_power_put;
+       }
+
        ipa_endpoint_modem_pause_all(ipa, true);
 
        ipa_endpoint_modem_hol_block_clear_all(ipa);
@@ -302,6 +399,10 @@ static void ipa_modem_crashed(struct ipa *ipa)
        ret = ipa_mem_zero_modem(ipa);
        if (ret)
                dev_err(dev, "error %d zeroing modem memory regions\n", ret);
+
+out_power_put:
+       pm_runtime_mark_last_busy(dev);
+       (void)pm_runtime_put_autosuspend(dev);
 }
 
 static int ipa_modem_notify(struct notifier_block *nb, unsigned long action,
@@ -314,6 +415,7 @@ static int ipa_modem_notify(struct notifier_block *nb, unsigned long action,
        switch (action) {
        case QCOM_SSR_BEFORE_POWERUP:
                dev_info(dev, "received modem starting event\n");
+               ipa_uc_power(ipa);
                ipa_smp2p_notify_reset(ipa);
                break;
 
@@ -377,13 +479,3 @@ void ipa_modem_deconfig(struct ipa *ipa)
        ipa->notifier = NULL;
        memset(&ipa->nb, 0, sizeof(ipa->nb));
 }
-
-int ipa_modem_setup(struct ipa *ipa)
-{
-       return ipa_qmi_setup(ipa);
-}
-
-void ipa_modem_teardown(struct ipa *ipa)
-{
-       ipa_qmi_teardown(ipa);
-}
index 2de3e21..5e6e3d2 100644 (file)
@@ -7,7 +7,6 @@
 #define _IPA_MODEM_H_
 
 struct ipa;
-struct ipa_endpoint;
 struct net_device;
 struct sk_buff;
 
@@ -25,7 +24,4 @@ void ipa_modem_exit(struct ipa *ipa);
 int ipa_modem_config(struct ipa *ipa);
 void ipa_modem_deconfig(struct ipa *ipa);
 
-int ipa_modem_setup(struct ipa *ipa);
-void ipa_modem_teardown(struct ipa *ipa);
-
 #endif /* _IPA_MODEM_H_ */
diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c
new file mode 100644 (file)
index 0000000..b1c6c0f
--- /dev/null
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2018-2021 Linaro Ltd.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/interconnect.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/bitops.h>
+
+#include "ipa.h"
+#include "ipa_power.h"
+#include "ipa_endpoint.h"
+#include "ipa_modem.h"
+#include "ipa_data.h"
+
+/**
+ * DOC: IPA Power Management
+ *
+ * The IPA hardware is enabled when the IPA core clock and all the
+ * interconnects (buses) it depends on are enabled.  Runtime power
+ * management is used to determine whether the core clock and
+ * interconnects are enabled, and if not in use to be suspended
+ * automatically.
+ *
+ * The core clock currently runs at a fixed clock rate when enabled,
+ * an all interconnects use a fixed average and peak bandwidth.
+ */
+
+#define IPA_AUTOSUSPEND_DELAY  500     /* milliseconds */
+
+/**
+ * struct ipa_interconnect - IPA interconnect information
+ * @path:              Interconnect path
+ * @average_bandwidth: Average interconnect bandwidth (KB/second)
+ * @peak_bandwidth:    Peak interconnect bandwidth (KB/second)
+ */
+struct ipa_interconnect {
+       struct icc_path *path;
+       u32 average_bandwidth;
+       u32 peak_bandwidth;
+};
+
+/**
+ * enum ipa_power_flag - IPA power flags
+ * @IPA_POWER_FLAG_RESUMED:    Whether resume from suspend has been signaled
+ * @IPA_POWER_FLAG_SYSTEM:     Hardware is system (not runtime) suspended
+ * @IPA_POWER_FLAG_STOPPED:    Modem TX is disabled by ipa_start_xmit()
+ * @IPA_POWER_FLAG_STARTED:    Modem TX was enabled by ipa_runtime_resume()
+ * @IPA_POWER_FLAG_COUNT:      Number of defined power flags
+ */
+enum ipa_power_flag {
+       IPA_POWER_FLAG_RESUMED,
+       IPA_POWER_FLAG_SYSTEM,
+       IPA_POWER_FLAG_STOPPED,
+       IPA_POWER_FLAG_STARTED,
+       IPA_POWER_FLAG_COUNT,           /* Last; not a flag */
+};
+
+/**
+ * struct ipa_power - IPA power management information
+ * @dev:               IPA device pointer
+ * @core:              IPA core clock
+ * @spinlock:          Protects modem TX queue enable/disable
+ * @flags:             Boolean state flags
+ * @interconnect_count:        Number of elements in interconnect[]
+ * @interconnect:      Interconnect array
+ */
+struct ipa_power {
+       struct device *dev;
+       struct clk *core;
+       spinlock_t spinlock;    /* used with STOPPED/STARTED power flags */
+       DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT);
+       u32 interconnect_count;
+       struct ipa_interconnect *interconnect;
+};
+
+static int ipa_interconnect_init_one(struct device *dev,
+                                    struct ipa_interconnect *interconnect,
+                                    const struct ipa_interconnect_data *data)
+{
+       struct icc_path *path;
+
+       path = of_icc_get(dev, data->name);
+       if (IS_ERR(path)) {
+               int ret = PTR_ERR(path);
+
+               dev_err_probe(dev, ret, "error getting %s interconnect\n",
+                             data->name);
+
+               return ret;
+       }
+
+       interconnect->path = path;
+       interconnect->average_bandwidth = data->average_bandwidth;
+       interconnect->peak_bandwidth = data->peak_bandwidth;
+
+       return 0;
+}
+
+static void ipa_interconnect_exit_one(struct ipa_interconnect *interconnect)
+{
+       icc_put(interconnect->path);
+       memset(interconnect, 0, sizeof(*interconnect));
+}
+
+/* Initialize interconnects required for IPA operation */
+static int ipa_interconnect_init(struct ipa_power *power, struct device *dev,
+                                const struct ipa_interconnect_data *data)
+{
+       struct ipa_interconnect *interconnect;
+       u32 count;
+       int ret;
+
+       count = power->interconnect_count;
+       interconnect = kcalloc(count, sizeof(*interconnect), GFP_KERNEL);
+       if (!interconnect)
+               return -ENOMEM;
+       power->interconnect = interconnect;
+
+       while (count--) {
+               ret = ipa_interconnect_init_one(dev, interconnect, data++);
+               if (ret)
+                       goto out_unwind;
+               interconnect++;
+       }
+
+       return 0;
+
+out_unwind:
+       while (interconnect-- > power->interconnect)
+               ipa_interconnect_exit_one(interconnect);
+       kfree(power->interconnect);
+       power->interconnect = NULL;
+
+       return ret;
+}
+
+/* Inverse of ipa_interconnect_init() */
+static void ipa_interconnect_exit(struct ipa_power *power)
+{
+       struct ipa_interconnect *interconnect;
+
+       interconnect = power->interconnect + power->interconnect_count;
+       while (interconnect-- > power->interconnect)
+               ipa_interconnect_exit_one(interconnect);
+       kfree(power->interconnect);
+       power->interconnect = NULL;
+}
+
+/* Currently we only use one bandwidth level, so just "enable" interconnects */
+static int ipa_interconnect_enable(struct ipa *ipa)
+{
+       struct ipa_interconnect *interconnect;
+       struct ipa_power *power = ipa->power;
+       int ret;
+       u32 i;
+
+       interconnect = power->interconnect;
+       for (i = 0; i < power->interconnect_count; i++) {
+               ret = icc_set_bw(interconnect->path,
+                                interconnect->average_bandwidth,
+                                interconnect->peak_bandwidth);
+               if (ret) {
+                       dev_err(&ipa->pdev->dev,
+                               "error %d enabling %s interconnect\n",
+                               ret, icc_get_name(interconnect->path));
+                       goto out_unwind;
+               }
+               interconnect++;
+       }
+
+       return 0;
+
+out_unwind:
+       while (interconnect-- > power->interconnect)
+               (void)icc_set_bw(interconnect->path, 0, 0);
+
+       return ret;
+}
+
+/* To disable an interconnect, we just its bandwidth to 0 */
+static int ipa_interconnect_disable(struct ipa *ipa)
+{
+       struct ipa_interconnect *interconnect;
+       struct ipa_power *power = ipa->power;
+       struct device *dev = &ipa->pdev->dev;
+       int result = 0;
+       u32 count;
+       int ret;
+
+       count = power->interconnect_count;
+       interconnect = power->interconnect + count;
+       while (count--) {
+               interconnect--;
+               ret = icc_set_bw(interconnect->path, 0, 0);
+               if (ret) {
+                       dev_err(dev, "error %d disabling %s interconnect\n",
+                               ret, icc_get_name(interconnect->path));
+                       /* Try to disable all; record only the first error */
+                       if (!result)
+                               result = ret;
+               }
+       }
+
+       return result;
+}
+
+/* Enable IPA power, enabling interconnects and the core clock */
+static int ipa_power_enable(struct ipa *ipa)
+{
+       int ret;
+
+       ret = ipa_interconnect_enable(ipa);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(ipa->power->core);
+       if (ret) {
+               dev_err(&ipa->pdev->dev, "error %d enabling core clock\n", ret);
+               (void)ipa_interconnect_disable(ipa);
+       }
+
+       return ret;
+}
+
+/* Inverse of ipa_power_enable() */
+static int ipa_power_disable(struct ipa *ipa)
+{
+       clk_disable_unprepare(ipa->power->core);
+
+       return ipa_interconnect_disable(ipa);
+}
+
+static int ipa_runtime_suspend(struct device *dev)
+{
+       struct ipa *ipa = dev_get_drvdata(dev);
+
+       /* Endpoints aren't usable until setup is complete */
+       if (ipa->setup_complete) {
+               __clear_bit(IPA_POWER_FLAG_RESUMED, ipa->power->flags);
+               ipa_endpoint_suspend(ipa);
+               gsi_suspend(&ipa->gsi);
+       }
+
+       return ipa_power_disable(ipa);
+}
+
+static int ipa_runtime_resume(struct device *dev)
+{
+       struct ipa *ipa = dev_get_drvdata(dev);
+       int ret;
+
+       ret = ipa_power_enable(ipa);
+       if (WARN_ON(ret < 0))
+               return ret;
+
+       /* Endpoints aren't usable until setup is complete */
+       if (ipa->setup_complete) {
+               gsi_resume(&ipa->gsi);
+               ipa_endpoint_resume(ipa);
+       }
+
+       return 0;
+}
+
+static int ipa_suspend(struct device *dev)
+{
+       struct ipa *ipa = dev_get_drvdata(dev);
+
+       __set_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags);
+
+       return pm_runtime_force_suspend(dev);
+}
+
+static int ipa_resume(struct device *dev)
+{
+       struct ipa *ipa = dev_get_drvdata(dev);
+       int ret;
+
+       ret = pm_runtime_force_resume(dev);
+
+       __clear_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags);
+
+       return ret;
+}
+
+/* Return the current IPA core clock rate */
+u32 ipa_core_clock_rate(struct ipa *ipa)
+{
+       return ipa->power ? (u32)clk_get_rate(ipa->power->core) : 0;
+}
+
+/**
+ * ipa_suspend_handler() - Handle the suspend IPA interrupt
+ * @ipa:       IPA pointer
+ * @irq_id:    IPA interrupt type (unused)
+ *
+ * If an RX endpoint is suspended, and the IPA has a packet destined for
+ * that endpoint, the IPA generates a SUSPEND interrupt to inform the AP
+ * that it should resume the endpoint.  If we get one of these interrupts
+ * we just wake up the system.
+ */
+static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
+{
+       /* To handle an IPA interrupt we will have resumed the hardware
+        * just to handle the interrupt, so we're done.  If we are in a
+        * system suspend, trigger a system resume.
+        */
+       if (!__test_and_set_bit(IPA_POWER_FLAG_RESUMED, ipa->power->flags))
+               if (test_bit(IPA_POWER_FLAG_SYSTEM, ipa->power->flags))
+                       pm_wakeup_dev_event(&ipa->pdev->dev, 0, true);
+
+       /* Acknowledge/clear the suspend interrupt on all endpoints */
+       ipa_interrupt_suspend_clear_all(ipa->interrupt);
+}
+
+/* The next few functions coordinate stopping and starting the modem
+ * network device transmit queue.
+ *
+ * Transmit can be running concurrent with power resume, and there's a
+ * chance the resume completes before the transmit path stops the queue,
+ * leaving the queue in a stopped state.  The next two functions are used
+ * to avoid this: ipa_power_modem_queue_stop() is used by ipa_start_xmit()
+ * to conditionally stop the TX queue; and ipa_power_modem_queue_start()
+ * is used by ipa_runtime_resume() to conditionally restart it.
+ *
+ * Two flags and a spinlock are used.  If the queue is stopped, the STOPPED
+ * power flag is set.  And if the queue is started, the STARTED flag is set.
+ * The queue is only started on resume if the STOPPED flag is set.  And the
+ * queue is only started in ipa_start_xmit() if the STARTED flag is *not*
+ * set.  As a result, the queue remains operational if the two activites
+ * happen concurrently regardless of the order they complete.  The spinlock
+ * ensures the flag and TX queue operations are done atomically.
+ *
+ * The first function stops the modem netdev transmit queue, but only if
+ * the STARTED flag is *not* set.  That flag is cleared if it was set.
+ * If the queue is stopped, the STOPPED flag is set.  This is called only
+ * from the power ->runtime_resume operation.
+ */
+void ipa_power_modem_queue_stop(struct ipa *ipa)
+{
+       struct ipa_power *power = ipa->power;
+       unsigned long flags;
+
+       spin_lock_irqsave(&power->spinlock, flags);
+
+       if (!__test_and_clear_bit(IPA_POWER_FLAG_STARTED, power->flags)) {
+               netif_stop_queue(ipa->modem_netdev);
+               __set_bit(IPA_POWER_FLAG_STOPPED, power->flags);
+       }
+
+       spin_unlock_irqrestore(&power->spinlock, flags);
+}
+
+/* This function starts the modem netdev transmit queue, but only if the
+ * STOPPED flag is set.  That flag is cleared if it was set.  If the queue
+ * was restarted, the STARTED flag is set; this allows ipa_start_xmit()
+ * to skip stopping the queue in the event of a race.
+ */
+void ipa_power_modem_queue_wake(struct ipa *ipa)
+{
+       struct ipa_power *power = ipa->power;
+       unsigned long flags;
+
+       spin_lock_irqsave(&power->spinlock, flags);
+
+       if (__test_and_clear_bit(IPA_POWER_FLAG_STOPPED, power->flags)) {
+               __set_bit(IPA_POWER_FLAG_STARTED, power->flags);
+               netif_wake_queue(ipa->modem_netdev);
+       }
+
+       spin_unlock_irqrestore(&power->spinlock, flags);
+}
+
+/* This function clears the STARTED flag once the TX queue is operating */
+void ipa_power_modem_queue_active(struct ipa *ipa)
+{
+       clear_bit(IPA_POWER_FLAG_STARTED, ipa->power->flags);
+}
+
+int ipa_power_setup(struct ipa *ipa)
+{
+       int ret;
+
+       ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND,
+                         ipa_suspend_handler);
+
+       ret = device_init_wakeup(&ipa->pdev->dev, true);
+       if (ret)
+               ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
+
+       return ret;
+}
+
+void ipa_power_teardown(struct ipa *ipa)
+{
+       (void)device_init_wakeup(&ipa->pdev->dev, false);
+       ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
+}
+
+/* Initialize IPA power management */
+struct ipa_power *
+ipa_power_init(struct device *dev, const struct ipa_power_data *data)
+{
+       struct ipa_power *power;
+       struct clk *clk;
+       int ret;
+
+       clk = clk_get(dev, "core");
+       if (IS_ERR(clk)) {
+               dev_err_probe(dev, PTR_ERR(clk), "error getting core clock\n");
+
+               return ERR_CAST(clk);
+       }
+
+       ret = clk_set_rate(clk, data->core_clock_rate);
+       if (ret) {
+               dev_err(dev, "error %d setting core clock rate to %u\n",
+                       ret, data->core_clock_rate);
+               goto err_clk_put;
+       }
+
+       power = kzalloc(sizeof(*power), GFP_KERNEL);
+       if (!power) {
+               ret = -ENOMEM;
+               goto err_clk_put;
+       }
+       power->dev = dev;
+       power->core = clk;
+       spin_lock_init(&power->spinlock);
+       power->interconnect_count = data->interconnect_count;
+
+       ret = ipa_interconnect_init(power, dev, data->interconnect_data);
+       if (ret)
+               goto err_kfree;
+
+       pm_runtime_set_autosuspend_delay(dev, IPA_AUTOSUSPEND_DELAY);
+       pm_runtime_use_autosuspend(dev);
+       pm_runtime_enable(dev);
+
+       return power;
+
+err_kfree:
+       kfree(power);
+err_clk_put:
+       clk_put(clk);
+
+       return ERR_PTR(ret);
+}
+
+/* Inverse of ipa_power_init() */
+void ipa_power_exit(struct ipa_power *power)
+{
+       struct device *dev = power->dev;
+       struct clk *clk = power->core;
+
+       pm_runtime_disable(dev);
+       pm_runtime_dont_use_autosuspend(dev);
+       ipa_interconnect_exit(power);
+       kfree(power);
+       clk_put(clk);
+}
+
+const struct dev_pm_ops ipa_pm_ops = {
+       .suspend                = ipa_suspend,
+       .resume                 = ipa_resume,
+       .runtime_suspend        = ipa_runtime_suspend,
+       .runtime_resume         = ipa_runtime_resume,
+};
diff --git a/drivers/net/ipa/ipa_power.h b/drivers/net/ipa/ipa_power.h
new file mode 100644 (file)
index 0000000..2151805
--- /dev/null
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2018-2020 Linaro Ltd.
+ */
+#ifndef _IPA_POWER_H_
+#define _IPA_POWER_H_
+
+struct device;
+
+struct ipa;
+struct ipa_power_data;
+
+/* IPA device power management function block */
+extern const struct dev_pm_ops ipa_pm_ops;
+
+/**
+ * ipa_core_clock_rate() - Return the current IPA core clock rate
+ * @ipa:       IPA structure
+ *
+ * Return: The current clock rate (in Hz), or 0.
+ */
+u32 ipa_core_clock_rate(struct ipa *ipa);
+
+/**
+ * ipa_power_modem_queue_stop() - Possibly stop the modem netdev TX queue
+ * @ipa:       IPA pointer
+ */
+void ipa_power_modem_queue_stop(struct ipa *ipa);
+
+/**
+ * ipa_power_modem_queue_wake() - Possibly wake the modem netdev TX queue
+ * @ipa:       IPA pointer
+ */
+void ipa_power_modem_queue_wake(struct ipa *ipa);
+
+/**
+ * ipa_power_modem_queue_active() - Report modem netdev TX queue active
+ * @ipa:       IPA pointer
+ */
+void ipa_power_modem_queue_active(struct ipa *ipa);
+
+/**
+ * ipa_power_setup() - Set up IPA power management
+ * @ipa:       IPA pointer
+ *
+ * Return:     0 if successful, or a negative error code
+ */
+int ipa_power_setup(struct ipa *ipa);
+
+/**
+ * ipa_power_teardown() - Inverse of ipa_power_setup()
+ * @ipa:       IPA pointer
+ */
+void ipa_power_teardown(struct ipa *ipa);
+
+/**
+ * ipa_power_init() - Initialize IPA power management
+ * @dev:       IPA device
+ * @data:      Clock configuration data
+ *
+ * Return:     A pointer to an ipa_power structure, or a pointer-coded error
+ */
+struct ipa_power *ipa_power_init(struct device *dev,
+                                const struct ipa_power_data *data);
+
+/**
+ * ipa_power_exit() - Inverse of ipa_power_init()
+ * @power:     IPA power pointer
+ */
+void ipa_power_exit(struct ipa_power *power);
+
+#endif /* _IPA_POWER_H_ */
index 4661105..90f3aec 100644 (file)
@@ -467,10 +467,7 @@ static const struct qmi_ops ipa_client_ops = {
        .new_server     = ipa_client_new_server,
 };
 
-/* This is called by ipa_setup().  We can be informed via remoteproc that
- * the modem has shut down, in which case this function will be called
- * again to prepare for it coming back up again.
- */
+/* Set up for QMI message exchange */
 int ipa_qmi_setup(struct ipa *ipa)
 {
        struct ipa_qmi *ipa_qmi = &ipa->qmi;
@@ -526,6 +523,7 @@ err_server_handle_release:
        return ret;
 }
 
+/* Tear down IPA QMI handles */
 void ipa_qmi_teardown(struct ipa *ipa)
 {
        cancel_work_sync(&ipa->qmi.init_driver_work);
index b6f2055..856ef62 100644 (file)
@@ -39,7 +39,26 @@ struct ipa_qmi {
        bool indication_sent;
 };
 
+/**
+ * ipa_qmi_setup() - Set up for QMI message exchange
+ * @ipa:               IPA pointer
+ *
+ * This is called at the end of ipa_setup(), to prepare for the exchange
+ * of QMI messages that perform a "handshake" between the AP and modem.
+ * When the modem QMI server announces its presence, an AP request message
+ * supplies operating parameters to be used to the modem, and the modem
+ * acknowledges receipt of those parameters.  The modem will not touch the
+ * IPA hardware until this handshake is complete.
+ *
+ * If the modem crashes (or shuts down) a new handshake begins when the
+ * modem's QMI server is started again.
+ */
 int ipa_qmi_setup(struct ipa *ipa);
+
+/**
+ * ipa_qmi_teardown() - Tear down IPA QMI handles
+ * @ipa:               IPA pointer
+ */
 void ipa_qmi_teardown(struct ipa *ipa);
 
 #endif /* !_IPA_QMI_H_ */
index b89dec5..a5b3553 100644 (file)
@@ -99,7 +99,7 @@ struct ipa;
 static inline u32 arbitration_lock_disable_encoded(enum ipa_version version,
                                                   u32 mask)
 {
-       /* assert(version >= IPA_VERSION_4_0); */
+       WARN_ON(version < IPA_VERSION_4_0);
 
        if (version < IPA_VERSION_4_9)
                return u32_encode_bits(mask, GENMASK(20, 17));
@@ -116,7 +116,7 @@ static inline u32 full_flush_rsc_closure_en_encoded(enum ipa_version version,
 {
        u32 val = enable ? 1 : 0;
 
-       /* assert(version >= IPA_VERSION_4_5); */
+       WARN_ON(version < IPA_VERSION_4_5);
 
        if (version == IPA_VERSION_4_5 || version == IPA_VERSION_4_7)
                return u32_encode_bits(val, GENMASK(21, 21));
@@ -409,7 +409,7 @@ static inline u32 ipa_header_size_encoded(enum ipa_version version,
 
        val = u32_encode_bits(size, HDR_LEN_FMASK);
        if (version < IPA_VERSION_4_5) {
-               /* ipa_assert(header_size == size); */
+               WARN_ON(header_size != size);
                return val;
        }
 
@@ -429,7 +429,7 @@ static inline u32 ipa_metadata_offset_encoded(enum ipa_version version,
 
        val = u32_encode_bits(off, HDR_OFST_METADATA_FMASK);
        if (version < IPA_VERSION_4_5) {
-               /* ipa_assert(offset == off); */
+               WARN_ON(offset != off);
                return val;
        }
 
@@ -812,7 +812,7 @@ ipa_reg_irq_suspend_info_offset(enum ipa_version version)
 static inline u32
 ipa_reg_irq_suspend_en_ee_n_offset(enum ipa_version version, u32 ee)
 {
-       /* assert(version != IPA_VERSION_3_0); */
+       WARN_ON(version == IPA_VERSION_3_0);
 
        if (version < IPA_VERSION_4_9)
                return 0x00003034 + 0x1000 * ee;
@@ -830,7 +830,7 @@ ipa_reg_irq_suspend_en_offset(enum ipa_version version)
 static inline u32
 ipa_reg_irq_suspend_clr_ee_n_offset(enum ipa_version version, u32 ee)
 {
-       /* assert(version != IPA_VERSION_3_0); */
+       WARN_ON(version == IPA_VERSION_3_0);
 
        if (version < IPA_VERSION_4_9)
                return 0x00003038 + 0x1000 * ee;
index 3b2dc21..e3da95d 100644 (file)
@@ -29,7 +29,6 @@
 static bool ipa_resource_limits_valid(struct ipa *ipa,
                                      const struct ipa_resource_data *data)
 {
-#ifdef IPA_VALIDATION
        u32 group_count;
        u32 i;
        u32 j;
@@ -65,7 +64,7 @@ static bool ipa_resource_limits_valid(struct ipa *ipa,
                        if (resource->limits[j].min || resource->limits[j].max)
                                return false;
        }
-#endif /* !IPA_VALIDATION */
+
        return true;
 }
 
index 93270e5..df7639c 100644 (file)
@@ -9,13 +9,13 @@
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 #include <linux/panic_notifier.h>
+#include <linux/pm_runtime.h>
 #include <linux/soc/qcom/smem.h>
 #include <linux/soc/qcom/smem_state.h>
 
 #include "ipa_smp2p.h"
 #include "ipa.h"
 #include "ipa_uc.h"
-#include "ipa_clock.h"
 
 /**
  * DOC: IPA SMP2P communication with the modem
  * SMP2P is a primitive communication mechanism available between the AP and
  * the modem.  The IPA driver uses this for two purposes:  to enable the modem
  * to state that the GSI hardware is ready to use; and to communicate the
- * state of the IPA clock in the event of a crash.
+ * state of IPA power in the event of a crash.
  *
  * GSI needs to have early initialization completed before it can be used.
  * This initialization is done either by Trust Zone or by the modem.  In the
  * latter case, the modem uses an SMP2P interrupt to tell the AP IPA driver
  * when the GSI is ready to use.
  *
- * The modem is also able to inquire about the current state of the IPA
- * clock by trigging another SMP2P interrupt to the AP.  We communicate
- * whether the clock is enabled using two SMP2P state bits--one to
- * indicate the clock state (on or off), and a second to indicate the
- * clock state bit is valid.  The modem will poll the valid bit until it
- * is set, and at that time records whether the AP has the IPA clock enabled.
+ * The modem is also able to inquire about the current state of IPA
+ * power by trigging another SMP2P interrupt to the AP.  We communicate
+ * whether power is enabled using two SMP2P state bits--one to indicate
+ * the power state (on or off), and a second to indicate the power state
+ * bit is valid.  The modem will poll the valid bit until it is set, and
+ * at that time records whether the AP has IPA power enabled.
  *
  * Finally, if the AP kernel panics, we update the SMP2P state bits even if
  * we never receive an interrupt from the modem requesting this.
  * struct ipa_smp2p - IPA SMP2P information
  * @ipa:               IPA pointer
  * @valid_state:       SMEM state indicating enabled state is valid
- * @enabled_state:     SMEM state to indicate clock is enabled
+ * @enabled_state:     SMEM state to indicate power is enabled
  * @valid_bit:         Valid bit in 32-bit SMEM state mask
  * @enabled_bit:       Enabled bit in 32-bit SMEM state mask
  * @enabled_bit:       Enabled bit in 32-bit SMEM state mask
- * @clock_query_irq:   IPA interrupt triggered by modem for clock query
+ * @clock_query_irq:   IPA interrupt triggered by modem for power query
  * @setup_ready_irq:   IPA interrupt triggered by modem to signal GSI ready
- * @clock_on:          Whether IPA clock is on
- * @notified:          Whether modem has been notified of clock state
+ * @power_on:          Whether IPA power is on
+ * @notified:          Whether modem has been notified of power state
  * @disabled:          Whether setup ready interrupt handling is disabled
  * @mutex:             Mutex protecting ready-interrupt/shutdown interlock
  * @panic_notifier:    Panic notifier structure
@@ -65,7 +65,7 @@ struct ipa_smp2p {
        u32 enabled_bit;
        u32 clock_query_irq;
        u32 setup_ready_irq;
-       bool clock_on;
+       bool power_on;
        bool notified;
        bool disabled;
        struct mutex mutex;
@@ -73,28 +73,30 @@ struct ipa_smp2p {
 };
 
 /**
- * ipa_smp2p_notify() - use SMP2P to tell modem about IPA clock state
+ * ipa_smp2p_notify() - use SMP2P to tell modem about IPA power state
  * @smp2p:     SMP2P information
  *
  * This is called either when the modem has requested it (by triggering
- * the modem clock query IPA interrupt) or whenever the AP is shutting down
+ * the modem power query IPA interrupt) or whenever the AP is shutting down
  * (via a panic notifier).  It sets the two SMP2P state bits--one saying
- * whether the IPA clock is running, and the other indicating the first bit
+ * whether the IPA power is on, and the other indicating the first bit
  * is valid.
  */
 static void ipa_smp2p_notify(struct ipa_smp2p *smp2p)
 {
+       struct device *dev;
        u32 value;
        u32 mask;
 
        if (smp2p->notified)
                return;
 
-       smp2p->clock_on = ipa_clock_get_additional(smp2p->ipa);
+       dev = &smp2p->ipa->pdev->dev;
+       smp2p->power_on = pm_runtime_get_if_active(dev, true) > 0;
 
-       /* Signal whether the clock is enabled */
+       /* Signal whether the IPA power is enabled */
        mask = BIT(smp2p->enabled_bit);
-       value = smp2p->clock_on ? mask : 0;
+       value = smp2p->power_on ? mask : 0;
        qcom_smem_state_update_bits(smp2p->enabled_state, mask, value);
 
        /* Now indicate that the enabled flag is valid */
@@ -124,7 +126,7 @@ static int ipa_smp2p_panic_notifier(struct notifier_block *nb,
 
        ipa_smp2p_notify(smp2p);
 
-       if (smp2p->clock_on)
+       if (smp2p->power_on)
                ipa_uc_panic_notifier(smp2p->ipa);
 
        return NOTIFY_DONE;
@@ -150,19 +152,31 @@ static void ipa_smp2p_panic_notifier_unregister(struct ipa_smp2p *smp2p)
 static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id)
 {
        struct ipa_smp2p *smp2p = dev_id;
+       struct device *dev;
+       int ret;
 
        mutex_lock(&smp2p->mutex);
 
-       if (!smp2p->disabled) {
-               int ret;
+       if (smp2p->disabled)
+               goto out_mutex_unlock;
+       smp2p->disabled = true;         /* If any others arrive, ignore them */
 
-               ret = ipa_setup(smp2p->ipa);
-               if (ret)
-                       dev_err(&smp2p->ipa->pdev->dev,
-                               "error %d from ipa_setup()\n", ret);
-               smp2p->disabled = true;
+       /* Power needs to be active for setup */
+       dev = &smp2p->ipa->pdev->dev;
+       ret = pm_runtime_get_sync(dev);
+       if (ret < 0) {
+               dev_err(dev, "error %d getting power for setup\n", ret);
+               goto out_power_put;
        }
 
+       /* An error here won't cause driver shutdown, so warn if one occurs */
+       ret = ipa_setup(smp2p->ipa);
+       WARN(ret != 0, "error %d from ipa_setup()\n", ret);
+
+out_power_put:
+       pm_runtime_mark_last_busy(dev);
+       (void)pm_runtime_put_autosuspend(dev);
+out_mutex_unlock:
        mutex_unlock(&smp2p->mutex);
 
        return IRQ_HANDLED;
@@ -195,14 +209,17 @@ static void ipa_smp2p_irq_exit(struct ipa_smp2p *smp2p, u32 irq)
        free_irq(irq, smp2p);
 }
 
-/* Drop the clock reference if it was taken in ipa_smp2p_notify() */
-static void ipa_smp2p_clock_release(struct ipa *ipa)
+/* Drop the power reference if it was taken in ipa_smp2p_notify() */
+static void ipa_smp2p_power_release(struct ipa *ipa)
 {
-       if (!ipa->smp2p->clock_on)
+       struct device *dev = &ipa->pdev->dev;
+
+       if (!ipa->smp2p->power_on)
                return;
 
-       ipa_clock_put(ipa);
-       ipa->smp2p->clock_on = false;
+       pm_runtime_mark_last_busy(dev);
+       (void)pm_runtime_put_autosuspend(dev);
+       ipa->smp2p->power_on = false;
 }
 
 /* Initialize the IPA SMP2P subsystem */
@@ -236,7 +253,7 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init)
 
        smp2p->ipa = ipa;
 
-       /* These fields are needed by the clock query interrupt
+       /* These fields are needed by the power query interrupt
         * handler, so initialize them now.
         */
        mutex_init(&smp2p->mutex);
@@ -289,8 +306,8 @@ void ipa_smp2p_exit(struct ipa *ipa)
                ipa_smp2p_irq_exit(smp2p, smp2p->setup_ready_irq);
        ipa_smp2p_panic_notifier_unregister(smp2p);
        ipa_smp2p_irq_exit(smp2p, smp2p->clock_query_irq);
-       /* We won't get notified any more; drop clock reference (if any) */
-       ipa_smp2p_clock_release(ipa);
+       /* We won't get notified any more; drop power reference (if any) */
+       ipa_smp2p_power_release(ipa);
        ipa->smp2p = NULL;
        mutex_destroy(&smp2p->mutex);
        kfree(smp2p);
@@ -319,13 +336,13 @@ void ipa_smp2p_notify_reset(struct ipa *ipa)
        if (!smp2p->notified)
                return;
 
-       ipa_smp2p_clock_release(ipa);
+       ipa_smp2p_power_release(ipa);
 
-       /* Reset the clock enabled valid flag */
+       /* Reset the power enabled valid flag */
        mask = BIT(smp2p->valid_bit);
        qcom_smem_state_update_bits(smp2p->valid_state, mask, 0);
 
-       /* Mark the clock disabled for good measure... */
+       /* Mark the power disabled for good measure... */
        mask = BIT(smp2p->enabled_bit);
        qcom_smem_state_update_bits(smp2p->enabled_state, mask, 0);
 
index 2031943..99a9567 100644 (file)
@@ -39,7 +39,7 @@ void ipa_smp2p_disable(struct ipa *ipa);
  * ipa_smp2p_notify_reset() - Reset modem notification state
  * @ipa:       IPA pointer
  *
- * If the modem crashes it queries the IPA clock state.  In cleaning
+ * If the modem crashes it queries the IPA power state.  In cleaning
  * up after such a crash this is used to reset some state maintained
  * for managing this notification.
  */
index c617a91..2324e1b 100644 (file)
  */
 #define IPA_ZERO_RULE_SIZE             (2 * sizeof(__le32))
 
-#ifdef IPA_VALIDATE
-
 /* Check things that can be validated at build time. */
 static void ipa_table_validate_build(void)
 {
@@ -161,7 +159,7 @@ ipa_table_valid_one(struct ipa *ipa, enum ipa_mem_id mem_id, bool route)
        else
                size = (1 + IPA_FILTER_COUNT_MAX) * sizeof(__le64);
 
-       if (!ipa_cmd_table_valid(ipa, mem, route, ipv6, hashed))
+       if (!ipa_cmd_table_valid(ipa, mem, route))
                return false;
 
        /* mem->size >= size is sufficient, but we'll demand more */
@@ -169,7 +167,7 @@ ipa_table_valid_one(struct ipa *ipa, enum ipa_mem_id mem_id, bool route)
                return true;
 
        /* Hashed table regions can be zero size if hashing is not supported */
-       if (hashed && !mem->size)
+       if (ipa_table_hash_support(ipa) && !mem->size)
                return true;
 
        dev_err(dev, "%s table region %u size 0x%02x, expected 0x%02x\n",
@@ -183,14 +181,22 @@ bool ipa_table_valid(struct ipa *ipa)
 {
        bool valid;
 
-       valid = ipa_table_valid_one(IPA_MEM_V4_FILTER, false);
-       valid = valid && ipa_table_valid_one(IPA_MEM_V4_FILTER_HASHED, false);
-       valid = valid && ipa_table_valid_one(IPA_MEM_V6_FILTER, false);
-       valid = valid && ipa_table_valid_one(IPA_MEM_V6_FILTER_HASHED, false);
-       valid = valid && ipa_table_valid_one(IPA_MEM_V4_ROUTE, true);
-       valid = valid && ipa_table_valid_one(IPA_MEM_V4_ROUTE_HASHED, true);
-       valid = valid && ipa_table_valid_one(IPA_MEM_V6_ROUTE, true);
-       valid = valid && ipa_table_valid_one(IPA_MEM_V6_ROUTE_HASHED, true);
+       valid = ipa_table_valid_one(ipa, IPA_MEM_V4_FILTER, false);
+       valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_FILTER, false);
+       valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_ROUTE, true);
+       valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_ROUTE, true);
+
+       if (!ipa_table_hash_support(ipa))
+               return valid;
+
+       valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_FILTER_HASHED,
+                                            false);
+       valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_FILTER_HASHED,
+                                            false);
+       valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V4_ROUTE_HASHED,
+                                            true);
+       valid = valid && ipa_table_valid_one(ipa, IPA_MEM_V6_ROUTE_HASHED,
+                                            true);
 
        return valid;
 }
@@ -217,14 +223,6 @@ bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_map)
        return true;
 }
 
-#else /* !IPA_VALIDATE */
-static void ipa_table_validate_build(void)
-
-{
-}
-
-#endif /* !IPA_VALIDATE */
-
 /* Zero entry count means no table, so just return a 0 address */
 static dma_addr_t ipa_table_addr(struct ipa *ipa, bool filter_mask, u16 count)
 {
@@ -233,7 +231,7 @@ static dma_addr_t ipa_table_addr(struct ipa *ipa, bool filter_mask, u16 count)
        if (!count)
                return 0;
 
-/* assert(count <= max_t(u32, IPA_FILTER_COUNT_MAX, IPA_ROUTE_COUNT_MAX)); */
+       WARN_ON(count > max_t(u32, IPA_FILTER_COUNT_MAX, IPA_ROUTE_COUNT_MAX));
 
        /* Skip over the zero rule and possibly the filter mask */
        skip = filter_mask ? 1 : 2;
index 1e2be9f..b6a9a0d 100644 (file)
@@ -16,8 +16,6 @@ struct ipa;
 /* The maximum number of route table entries (IPv4, IPv6; hashed or not) */
 #define IPA_ROUTE_COUNT_MAX    15
 
-#ifdef IPA_VALIDATE
-
 /**
  * ipa_table_valid() - Validate route and filter table memory regions
  * @ipa:       IPA pointer
@@ -35,20 +33,6 @@ bool ipa_table_valid(struct ipa *ipa);
  */
 bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_mask);
 
-#else /* !IPA_VALIDATE */
-
-static inline bool ipa_table_valid(struct ipa *ipa)
-{
-       return true;
-}
-
-static inline bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_mask)
-{
-       return true;
-}
-
-#endif /* !IPA_VALIDATE */
-
 /**
  * ipa_table_hash_support() - Return true if hashed tables are supported
  * @ipa:       IPA pointer
index fd92198..856e55a 100644 (file)
@@ -7,9 +7,9 @@
 #include <linux/types.h>
 #include <linux/io.h>
 #include <linux/delay.h>
+#include <linux/pm_runtime.h>
 
 #include "ipa.h"
-#include "ipa_clock.h"
 #include "ipa_uc.h"
 
 /**
@@ -131,7 +131,7 @@ static void ipa_uc_event_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
        if (shared->event == IPA_UC_EVENT_ERROR)
                dev_err(dev, "microcontroller error event\n");
        else if (shared->event != IPA_UC_EVENT_LOG_INFO)
-               dev_err(dev, "unsupported microcontroller event %hhu\n",
+               dev_err(dev, "unsupported microcontroller event %u\n",
                        shared->event);
        /* The LOG_INFO event can be safely ignored */
 }
@@ -140,53 +140,77 @@ static void ipa_uc_event_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
 static void ipa_uc_response_hdlr(struct ipa *ipa, enum ipa_irq_id irq_id)
 {
        struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa);
+       struct device *dev = &ipa->pdev->dev;
 
        /* An INIT_COMPLETED response message is sent to the AP by the
         * microcontroller when it is operational.  Other than this, the AP
         * should only receive responses from the microcontroller when it has
         * sent it a request message.
         *
-        * We can drop the clock reference taken in ipa_uc_setup() once we
+        * We can drop the power reference taken in ipa_uc_power() once we
         * know the microcontroller has finished its initialization.
         */
        switch (shared->response) {
        case IPA_UC_RESPONSE_INIT_COMPLETED:
-               ipa->uc_loaded = true;
-               ipa_clock_put(ipa);
+               if (ipa->uc_powered) {
+                       ipa->uc_loaded = true;
+                       pm_runtime_mark_last_busy(dev);
+                       (void)pm_runtime_put_autosuspend(dev);
+                       ipa->uc_powered = false;
+               } else {
+                       dev_warn(dev, "unexpected init_completed response\n");
+               }
                break;
        default:
-               dev_warn(&ipa->pdev->dev,
-                        "unsupported microcontroller response %hhu\n",
+               dev_warn(dev, "unsupported microcontroller response %u\n",
                         shared->response);
                break;
        }
 }
 
-/* ipa_uc_setup() - Set up the microcontroller */
-void ipa_uc_setup(struct ipa *ipa)
+/* Configure the IPA microcontroller subsystem */
+void ipa_uc_config(struct ipa *ipa)
 {
-       /* The microcontroller needs the IPA clock running until it has
-        * completed its initialization.  It signals this by sending an
-        * INIT_COMPLETED response message to the AP.  This could occur after
-        * we have finished doing the rest of the IPA initialization, so we
-        * need to take an extra "proxy" reference, and hold it until we've
-        * received that signal.  (This reference is dropped in
-        * ipa_uc_response_hdlr(), above.)
-        */
-       ipa_clock_get(ipa);
-
+       ipa->uc_powered = false;
        ipa->uc_loaded = false;
        ipa_interrupt_add(ipa->interrupt, IPA_IRQ_UC_0, ipa_uc_event_handler);
        ipa_interrupt_add(ipa->interrupt, IPA_IRQ_UC_1, ipa_uc_response_hdlr);
 }
 
-/* Inverse of ipa_uc_setup() */
-void ipa_uc_teardown(struct ipa *ipa)
+/* Inverse of ipa_uc_config() */
+void ipa_uc_deconfig(struct ipa *ipa)
 {
+       struct device *dev = &ipa->pdev->dev;
+
        ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1);
        ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0);
-       if (!ipa->uc_loaded)
-               ipa_clock_put(ipa);
+       if (!ipa->uc_powered)
+               return;
+
+       pm_runtime_mark_last_busy(dev);
+       (void)pm_runtime_put_autosuspend(dev);
+}
+
+/* Take a proxy power reference for the microcontroller */
+void ipa_uc_power(struct ipa *ipa)
+{
+       static bool already;
+       struct device *dev;
+       int ret;
+
+       if (already)
+               return;
+       already = true;         /* Only do this on first boot */
+
+       /* This power reference dropped in ipa_uc_response_hdlr() above */
+       dev = &ipa->pdev->dev;
+       ret = pm_runtime_get_sync(dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(dev);
+               dev_err(dev, "error %d getting proxy power\n", ret);
+       } else {
+               ipa->uc_powered = true;
+       }
 }
 
 /* Send a command to the microcontroller */
index e851089..23847f9 100644 (file)
@@ -9,16 +9,30 @@
 struct ipa;
 
 /**
- * ipa_uc_setup() - set up the IPA microcontroller subsystem
+ * ipa_uc_config() - Configure the IPA microcontroller subsystem
  * @ipa:       IPA pointer
  */
-void ipa_uc_setup(struct ipa *ipa);
+void ipa_uc_config(struct ipa *ipa);
 
 /**
- * ipa_uc_teardown() - inverse of ipa_uc_setup()
+ * ipa_uc_deconfig() - Inverse of ipa_uc_config()
  * @ipa:       IPA pointer
  */
-void ipa_uc_teardown(struct ipa *ipa);
+void ipa_uc_deconfig(struct ipa *ipa);
+
+/**
+ * ipa_uc_power() - Take a proxy power reference for the microcontroller
+ * @ipa:       IPA pointer
+ *
+ * The first time the modem boots, it loads firmware for and starts the
+ * IPA-resident microcontroller.  The microcontroller signals that it
+ * has completed its initialization by sending an INIT_COMPLETED response
+ * message to the AP.  The AP must ensure the IPA is powered until
+ * it receives this message, and to do so we take a "proxy" clock
+ * reference on its behalf here.  Once we receive the INIT_COMPLETED
+ * message (in ipa_uc_response_hdlr()) we drop this power reference.
+ */
+void ipa_uc_power(struct ipa *ipa);
 
 /**
  * ipa_uc_panic_notifier()
index a707502..c0b21a5 100644 (file)
@@ -732,6 +732,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
        port = ipvlan_port_get_rtnl(dev);
 
        switch (event) {
+       case NETDEV_UP:
        case NETDEV_CHANGE:
                list_for_each_entry(ipvlan, &port->ipvlans, pnode)
                        netif_stacked_transfer_operstate(ipvlan->phy_dev,
index 80de976..35f46ad 100644 (file)
@@ -829,7 +829,7 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu)
        return 0;
 }
 
-static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int macvlan_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
        struct net_device *real_dev = macvlan_dev_real_dev(dev);
        const struct net_device_ops *ops = real_dev->netdev_ops;
@@ -845,8 +845,8 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                        break;
                fallthrough;
        case SIOCGHWTSTAMP:
-               if (netif_device_present(real_dev) && ops->ndo_do_ioctl)
-                       err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd);
+               if (netif_device_present(real_dev) && ops->ndo_eth_ioctl)
+                       err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
                break;
        }
 
@@ -1151,7 +1151,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
        .ndo_stop               = macvlan_stop,
        .ndo_start_xmit         = macvlan_start_xmit,
        .ndo_change_mtu         = macvlan_change_mtu,
-       .ndo_do_ioctl           = macvlan_do_ioctl,
+       .ndo_eth_ioctl          = macvlan_eth_ioctl,
        .ndo_fix_features       = macvlan_fix_features,
        .ndo_change_rx_flags    = macvlan_change_rx_flags,
        .ndo_set_mac_address    = macvlan_set_mac_address,
diff --git a/drivers/net/mctp/Kconfig b/drivers/net/mctp/Kconfig
new file mode 100644 (file)
index 0000000..d8f966c
--- /dev/null
@@ -0,0 +1,8 @@
+
+if MCTP
+
+menu "MCTP Device Drivers"
+
+endmenu
+
+endif
diff --git a/drivers/net/mctp/Makefile b/drivers/net/mctp/Makefile
new file mode 100644 (file)
index 0000000..e69de29
index 99a6c13..6da1fcb 100644 (file)
@@ -169,9 +169,10 @@ config MDIO_OCTEON
 config MDIO_IPQ4019
        tristate "Qualcomm IPQ4019 MDIO interface support"
        depends on HAS_IOMEM && OF_MDIO
+       depends on COMMON_CLK
        help
          This driver supports the MDIO interface found in Qualcomm
-         IPQ40xx series Soc-s.
+         IPQ40xx, IPQ60xx, IPQ807x and IPQ50xx series Soc-s.
 
 config MDIO_IPQ8064
        tristate "Qualcomm IPQ8064 MDIO interface support"
index 9cd71d8..0d7d3e1 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
+#include <linux/clk.h>
 
 #define MDIO_MODE_REG                          0x40
 #define MDIO_ADDR_REG                          0x44
 #define IPQ4019_MDIO_TIMEOUT   10000
 #define IPQ4019_MDIO_SLEEP             10
 
+/* MDIO clock source frequency is fixed to 100M */
+#define IPQ_MDIO_CLK_RATE      100000000
+
+#define IPQ_PHY_SET_DELAY_US   100000
+
 struct ipq4019_mdio_data {
        void __iomem    *membase;
+       void __iomem *eth_ldo_rdy;
+       struct clk *mdio_clk;
 };
 
 static int ipq4019_mdio_wait_busy(struct mii_bus *bus)
@@ -171,6 +179,30 @@ static int ipq4019_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
        return 0;
 }
 
+static int ipq_mdio_reset(struct mii_bus *bus)
+{
+       struct ipq4019_mdio_data *priv = bus->priv;
+       u32 val;
+       int ret;
+
+       /* To indicate CMN_PLL that ethernet_ldo has been ready if platform resource 1
+        * is specified in the device tree.
+        */
+       if (priv->eth_ldo_rdy) {
+               val = readl(priv->eth_ldo_rdy);
+               val |= BIT(0);
+               writel(val, priv->eth_ldo_rdy);
+               fsleep(IPQ_PHY_SET_DELAY_US);
+       }
+
+       /* Configure MDIO clock source frequency if clock is specified in the device tree */
+       ret = clk_set_rate(priv->mdio_clk, IPQ_MDIO_CLK_RATE);
+       if (ret)
+               return ret;
+
+       return clk_prepare_enable(priv->mdio_clk);
+}
+
 static int ipq4019_mdio_probe(struct platform_device *pdev)
 {
        struct ipq4019_mdio_data *priv;
@@ -187,9 +219,17 @@ static int ipq4019_mdio_probe(struct platform_device *pdev)
        if (IS_ERR(priv->membase))
                return PTR_ERR(priv->membase);
 
+       priv->mdio_clk = devm_clk_get_optional(&pdev->dev, "gcc_mdio_ahb_clk");
+       if (IS_ERR(priv->mdio_clk))
+               return PTR_ERR(priv->mdio_clk);
+
+       /* The platform resource is provided on the chipset IPQ5018 */
+       priv->eth_ldo_rdy = devm_platform_ioremap_resource(pdev, 1);
+
        bus->name = "ipq4019_mdio";
        bus->read = ipq4019_mdio_read;
        bus->write = ipq4019_mdio_write;
+       bus->reset = ipq_mdio_reset;
        bus->parent = &pdev->dev;
        snprintf(bus->id, MII_BUS_ID_SIZE, "%s%d", pdev->name, pdev->id);
 
@@ -215,6 +255,7 @@ static int ipq4019_mdio_remove(struct platform_device *pdev)
 
 static const struct of_device_id ipq4019_mdio_dt_ids[] = {
        { .compatible = "qcom,ipq4019-mdio" },
+       { .compatible = "qcom,ipq5018-mdio" },
        { }
 };
 MODULE_DEVICE_TABLE(of, ipq4019_mdio_dt_ids);
index 2d67e12..1ee592d 100644 (file)
@@ -134,7 +134,6 @@ static int mscc_miim_reset(struct mii_bus *bus)
 
 static int mscc_miim_probe(struct platform_device *pdev)
 {
-       struct resource *res;
        struct mii_bus *bus;
        struct mscc_miim_dev *dev;
        int ret;
@@ -157,13 +156,10 @@ static int mscc_miim_probe(struct platform_device *pdev)
                return PTR_ERR(dev->regs);
        }
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       if (res) {
-               dev->phy_regs = devm_ioremap_resource(&pdev->dev, res);
-               if (IS_ERR(dev->phy_regs)) {
-                       dev_err(&pdev->dev, "Unable to map internal phy registers\n");
-                       return PTR_ERR(dev->phy_regs);
-               }
+       dev->phy_regs = devm_platform_ioremap_resource(pdev, 1);
+       if (IS_ERR(dev->phy_regs)) {
+               dev_err(&pdev->dev, "Unable to map internal phy registers\n");
+               return PTR_ERR(dev->phy_regs);
        }
 
        ret = of_mdiobus_register(bus, pdev->dev.of_node);
diff --git a/drivers/net/mhi/Makefile b/drivers/net/mhi/Makefile
deleted file mode 100644 (file)
index f71b9f8..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_MHI_NET) += mhi_net.o
-
-mhi_net-y := net.o proto_mbim.o
diff --git a/drivers/net/mhi/mhi.h b/drivers/net/mhi/mhi.h
deleted file mode 100644 (file)
index 1d0c499..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
- */
-
-struct mhi_net_stats {
-       u64_stats_t rx_packets;
-       u64_stats_t rx_bytes;
-       u64_stats_t rx_errors;
-       u64_stats_t rx_dropped;
-       u64_stats_t rx_length_errors;
-       u64_stats_t tx_packets;
-       u64_stats_t tx_bytes;
-       u64_stats_t tx_errors;
-       u64_stats_t tx_dropped;
-       struct u64_stats_sync tx_syncp;
-       struct u64_stats_sync rx_syncp;
-};
-
-struct mhi_net_dev {
-       struct mhi_device *mdev;
-       struct net_device *ndev;
-       struct sk_buff *skbagg_head;
-       struct sk_buff *skbagg_tail;
-       const struct mhi_net_proto *proto;
-       void *proto_data;
-       struct delayed_work rx_refill;
-       struct mhi_net_stats stats;
-       u32 rx_queue_sz;
-       int msg_enable;
-       unsigned int mru;
-};
-
-struct mhi_net_proto {
-       int (*init)(struct mhi_net_dev *mhi_netdev);
-       struct sk_buff * (*tx_fixup)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb);
-       void (*rx)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb);
-};
-
-extern const struct mhi_net_proto proto_mbim;
diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
deleted file mode 100644 (file)
index e60e38c..0000000
+++ /dev/null
@@ -1,486 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
- */
-
-#include <linux/if_arp.h>
-#include <linux/mhi.h>
-#include <linux/mod_devicetable.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/u64_stats_sync.h>
-#include <linux/wwan.h>
-
-#include "mhi.h"
-
-#define MHI_NET_MIN_MTU                ETH_MIN_MTU
-#define MHI_NET_MAX_MTU                0xffff
-#define MHI_NET_DEFAULT_MTU    0x4000
-
-/* When set to false, the default netdev (link 0) is not created, and it's up
- * to user to create the link (via wwan rtnetlink).
- */
-static bool create_default_iface = true;
-module_param(create_default_iface, bool, 0);
-
-struct mhi_device_info {
-       const char *netname;
-       const struct mhi_net_proto *proto;
-};
-
-static int mhi_ndo_open(struct net_device *ndev)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-
-       /* Feed the rx buffer pool */
-       schedule_delayed_work(&mhi_netdev->rx_refill, 0);
-
-       /* Carrier is established via out-of-band channel (e.g. qmi) */
-       netif_carrier_on(ndev);
-
-       netif_start_queue(ndev);
-
-       return 0;
-}
-
-static int mhi_ndo_stop(struct net_device *ndev)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-
-       netif_stop_queue(ndev);
-       netif_carrier_off(ndev);
-       cancel_delayed_work_sync(&mhi_netdev->rx_refill);
-
-       return 0;
-}
-
-static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-       const struct mhi_net_proto *proto = mhi_netdev->proto;
-       struct mhi_device *mdev = mhi_netdev->mdev;
-       int err;
-
-       if (proto && proto->tx_fixup) {
-               skb = proto->tx_fixup(mhi_netdev, skb);
-               if (unlikely(!skb))
-                       goto exit_drop;
-       }
-
-       err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
-       if (unlikely(err)) {
-               net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
-                                   ndev->name, err);
-               dev_kfree_skb_any(skb);
-               goto exit_drop;
-       }
-
-       if (mhi_queue_is_full(mdev, DMA_TO_DEVICE))
-               netif_stop_queue(ndev);
-
-       return NETDEV_TX_OK;
-
-exit_drop:
-       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
-       u64_stats_inc(&mhi_netdev->stats.tx_dropped);
-       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
-
-       return NETDEV_TX_OK;
-}
-
-static void mhi_ndo_get_stats64(struct net_device *ndev,
-                               struct rtnl_link_stats64 *stats)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-       unsigned int start;
-
-       do {
-               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.rx_syncp);
-               stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets);
-               stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes);
-               stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors);
-               stats->rx_dropped = u64_stats_read(&mhi_netdev->stats.rx_dropped);
-               stats->rx_length_errors = u64_stats_read(&mhi_netdev->stats.rx_length_errors);
-       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start));
-
-       do {
-               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.tx_syncp);
-               stats->tx_packets = u64_stats_read(&mhi_netdev->stats.tx_packets);
-               stats->tx_bytes = u64_stats_read(&mhi_netdev->stats.tx_bytes);
-               stats->tx_errors = u64_stats_read(&mhi_netdev->stats.tx_errors);
-               stats->tx_dropped = u64_stats_read(&mhi_netdev->stats.tx_dropped);
-       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.tx_syncp, start));
-}
-
-static const struct net_device_ops mhi_netdev_ops = {
-       .ndo_open               = mhi_ndo_open,
-       .ndo_stop               = mhi_ndo_stop,
-       .ndo_start_xmit         = mhi_ndo_xmit,
-       .ndo_get_stats64        = mhi_ndo_get_stats64,
-};
-
-static void mhi_net_setup(struct net_device *ndev)
-{
-       ndev->header_ops = NULL;  /* No header */
-       ndev->type = ARPHRD_RAWIP;
-       ndev->hard_header_len = 0;
-       ndev->addr_len = 0;
-       ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
-       ndev->netdev_ops = &mhi_netdev_ops;
-       ndev->mtu = MHI_NET_DEFAULT_MTU;
-       ndev->min_mtu = MHI_NET_MIN_MTU;
-       ndev->max_mtu = MHI_NET_MAX_MTU;
-       ndev->tx_queue_len = 1000;
-}
-
-static struct sk_buff *mhi_net_skb_agg(struct mhi_net_dev *mhi_netdev,
-                                      struct sk_buff *skb)
-{
-       struct sk_buff *head = mhi_netdev->skbagg_head;
-       struct sk_buff *tail = mhi_netdev->skbagg_tail;
-
-       /* This is non-paged skb chaining using frag_list */
-       if (!head) {
-               mhi_netdev->skbagg_head = skb;
-               return skb;
-       }
-
-       if (!skb_shinfo(head)->frag_list)
-               skb_shinfo(head)->frag_list = skb;
-       else
-               tail->next = skb;
-
-       head->len += skb->len;
-       head->data_len += skb->len;
-       head->truesize += skb->truesize;
-
-       mhi_netdev->skbagg_tail = skb;
-
-       return mhi_netdev->skbagg_head;
-}
-
-static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
-                               struct mhi_result *mhi_res)
-{
-       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
-       const struct mhi_net_proto *proto = mhi_netdev->proto;
-       struct sk_buff *skb = mhi_res->buf_addr;
-       int free_desc_count;
-
-       free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
-
-       if (unlikely(mhi_res->transaction_status)) {
-               switch (mhi_res->transaction_status) {
-               case -EOVERFLOW:
-                       /* Packet can not fit in one MHI buffer and has been
-                        * split over multiple MHI transfers, do re-aggregation.
-                        * That usually means the device side MTU is larger than
-                        * the host side MTU/MRU. Since this is not optimal,
-                        * print a warning (once).
-                        */
-                       netdev_warn_once(mhi_netdev->ndev,
-                                        "Fragmented packets received, fix MTU?\n");
-                       skb_put(skb, mhi_res->bytes_xferd);
-                       mhi_net_skb_agg(mhi_netdev, skb);
-                       break;
-               case -ENOTCONN:
-                       /* MHI layer stopping/resetting the DL channel */
-                       dev_kfree_skb_any(skb);
-                       return;
-               default:
-                       /* Unknown error, simply drop */
-                       dev_kfree_skb_any(skb);
-                       u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
-                       u64_stats_inc(&mhi_netdev->stats.rx_errors);
-                       u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
-               }
-       } else {
-               skb_put(skb, mhi_res->bytes_xferd);
-
-               if (mhi_netdev->skbagg_head) {
-                       /* Aggregate the final fragment */
-                       skb = mhi_net_skb_agg(mhi_netdev, skb);
-                       mhi_netdev->skbagg_head = NULL;
-               }
-
-               u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
-               u64_stats_inc(&mhi_netdev->stats.rx_packets);
-               u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len);
-               u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
-
-               switch (skb->data[0] & 0xf0) {
-               case 0x40:
-                       skb->protocol = htons(ETH_P_IP);
-                       break;
-               case 0x60:
-                       skb->protocol = htons(ETH_P_IPV6);
-                       break;
-               default:
-                       skb->protocol = htons(ETH_P_MAP);
-                       break;
-               }
-
-               if (proto && proto->rx)
-                       proto->rx(mhi_netdev, skb);
-               else
-                       netif_rx(skb);
-       }
-
-       /* Refill if RX buffers queue becomes low */
-       if (free_desc_count >= mhi_netdev->rx_queue_sz / 2)
-               schedule_delayed_work(&mhi_netdev->rx_refill, 0);
-}
-
-static void mhi_net_ul_callback(struct mhi_device *mhi_dev,
-                               struct mhi_result *mhi_res)
-{
-       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
-       struct net_device *ndev = mhi_netdev->ndev;
-       struct mhi_device *mdev = mhi_netdev->mdev;
-       struct sk_buff *skb = mhi_res->buf_addr;
-
-       /* Hardware has consumed the buffer, so free the skb (which is not
-        * freed by the MHI stack) and perform accounting.
-        */
-       dev_consume_skb_any(skb);
-
-       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
-       if (unlikely(mhi_res->transaction_status)) {
-
-               /* MHI layer stopping/resetting the UL channel */
-               if (mhi_res->transaction_status == -ENOTCONN) {
-                       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
-                       return;
-               }
-
-               u64_stats_inc(&mhi_netdev->stats.tx_errors);
-       } else {
-               u64_stats_inc(&mhi_netdev->stats.tx_packets);
-               u64_stats_add(&mhi_netdev->stats.tx_bytes, mhi_res->bytes_xferd);
-       }
-       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
-
-       if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mdev, DMA_TO_DEVICE))
-               netif_wake_queue(ndev);
-}
-
-static void mhi_net_rx_refill_work(struct work_struct *work)
-{
-       struct mhi_net_dev *mhi_netdev = container_of(work, struct mhi_net_dev,
-                                                     rx_refill.work);
-       struct net_device *ndev = mhi_netdev->ndev;
-       struct mhi_device *mdev = mhi_netdev->mdev;
-       struct sk_buff *skb;
-       unsigned int size;
-       int err;
-
-       size = mhi_netdev->mru ? mhi_netdev->mru : READ_ONCE(ndev->mtu);
-
-       while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
-               skb = netdev_alloc_skb(ndev, size);
-               if (unlikely(!skb))
-                       break;
-
-               err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, size, MHI_EOT);
-               if (unlikely(err)) {
-                       net_err_ratelimited("%s: Failed to queue RX buf (%d)\n",
-                                           ndev->name, err);
-                       kfree_skb(skb);
-                       break;
-               }
-
-               /* Do not hog the CPU if rx buffers are consumed faster than
-                * queued (unlikely).
-                */
-               cond_resched();
-       }
-
-       /* If we're still starved of rx buffers, reschedule later */
-       if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mhi_netdev->rx_queue_sz)
-               schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
-}
-
-static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
-                          struct netlink_ext_ack *extack)
-{
-       const struct mhi_device_info *info;
-       struct mhi_device *mhi_dev = ctxt;
-       struct mhi_net_dev *mhi_netdev;
-       int err;
-
-       info = (struct mhi_device_info *)mhi_dev->id->driver_data;
-
-       /* For now we only support one link (link context 0), driver must be
-        * reworked to break 1:1 relationship for net MBIM and to forward setup
-        * call to rmnet(QMAP) otherwise.
-        */
-       if (if_id != 0)
-               return -EINVAL;
-
-       if (dev_get_drvdata(&mhi_dev->dev))
-               return -EBUSY;
-
-       mhi_netdev = wwan_netdev_drvpriv(ndev);
-
-       dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
-       mhi_netdev->ndev = ndev;
-       mhi_netdev->mdev = mhi_dev;
-       mhi_netdev->skbagg_head = NULL;
-       mhi_netdev->proto = info->proto;
-
-       INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
-       u64_stats_init(&mhi_netdev->stats.rx_syncp);
-       u64_stats_init(&mhi_netdev->stats.tx_syncp);
-
-       /* Start MHI channels */
-       err = mhi_prepare_for_transfer(mhi_dev);
-       if (err)
-               goto out_err;
-
-       /* Number of transfer descriptors determines size of the queue */
-       mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
-
-       if (extack)
-               err = register_netdevice(ndev);
-       else
-               err = register_netdev(ndev);
-       if (err)
-               goto out_err;
-
-       if (mhi_netdev->proto) {
-               err = mhi_netdev->proto->init(mhi_netdev);
-               if (err)
-                       goto out_err_proto;
-       }
-
-       return 0;
-
-out_err_proto:
-       unregister_netdevice(ndev);
-out_err:
-       free_netdev(ndev);
-       return err;
-}
-
-static void mhi_net_dellink(void *ctxt, struct net_device *ndev,
-                           struct list_head *head)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-       struct mhi_device *mhi_dev = ctxt;
-
-       if (head)
-               unregister_netdevice_queue(ndev, head);
-       else
-               unregister_netdev(ndev);
-
-       mhi_unprepare_from_transfer(mhi_dev);
-
-       kfree_skb(mhi_netdev->skbagg_head);
-
-       dev_set_drvdata(&mhi_dev->dev, NULL);
-}
-
-static const struct wwan_ops mhi_wwan_ops = {
-       .priv_size = sizeof(struct mhi_net_dev),
-       .setup = mhi_net_setup,
-       .newlink = mhi_net_newlink,
-       .dellink = mhi_net_dellink,
-};
-
-static int mhi_net_probe(struct mhi_device *mhi_dev,
-                        const struct mhi_device_id *id)
-{
-       const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
-       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
-       struct net_device *ndev;
-       int err;
-
-       err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev,
-                               WWAN_NO_DEFAULT_LINK);
-       if (err)
-               return err;
-
-       if (!create_default_iface)
-               return 0;
-
-       /* Create a default interface which is used as either RMNET real-dev,
-        * MBIM link 0 or ip link 0)
-        */
-       ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
-                           NET_NAME_PREDICTABLE, mhi_net_setup);
-       if (!ndev) {
-               err = -ENOMEM;
-               goto err_unregister;
-       }
-
-       SET_NETDEV_DEV(ndev, &mhi_dev->dev);
-
-       err = mhi_net_newlink(mhi_dev, ndev, 0, NULL);
-       if (err)
-               goto err_release;
-
-       return 0;
-
-err_release:
-       free_netdev(ndev);
-err_unregister:
-       wwan_unregister_ops(&cntrl->mhi_dev->dev);
-
-       return err;
-}
-
-static void mhi_net_remove(struct mhi_device *mhi_dev)
-{
-       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
-       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
-
-       /* WWAN core takes care of removing remaining links */
-       wwan_unregister_ops(&cntrl->mhi_dev->dev);
-
-       if (create_default_iface)
-               mhi_net_dellink(mhi_dev, mhi_netdev->ndev, NULL);
-}
-
-static const struct mhi_device_info mhi_hwip0 = {
-       .netname = "mhi_hwip%d",
-};
-
-static const struct mhi_device_info mhi_swip0 = {
-       .netname = "mhi_swip%d",
-};
-
-static const struct mhi_device_info mhi_hwip0_mbim = {
-       .netname = "mhi_mbim%d",
-       .proto = &proto_mbim,
-};
-
-static const struct mhi_device_id mhi_net_id_table[] = {
-       /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */
-       { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 },
-       /* Software data PATH (to modem CPU) */
-       { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 },
-       /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */
-       { .chan = "IP_HW0_MBIM", .driver_data = (kernel_ulong_t)&mhi_hwip0_mbim },
-       {}
-};
-MODULE_DEVICE_TABLE(mhi, mhi_net_id_table);
-
-static struct mhi_driver mhi_net_driver = {
-       .probe = mhi_net_probe,
-       .remove = mhi_net_remove,
-       .dl_xfer_cb = mhi_net_dl_callback,
-       .ul_xfer_cb = mhi_net_ul_callback,
-       .id_table = mhi_net_id_table,
-       .driver = {
-               .name = "mhi_net",
-               .owner = THIS_MODULE,
-       },
-};
-
-module_mhi_driver(mhi_net_driver);
-
-MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
-MODULE_DESCRIPTION("Network over MHI");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c
deleted file mode 100644 (file)
index bf1ad86..0000000
+++ /dev/null
@@ -1,304 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
- *
- * This driver copy some code from cdc_ncm, which is:
- * Copyright (C) ST-Ericsson 2010-2012
- * and cdc_mbim, which is:
- * Copyright (c) 2012  Smith Micro Software, Inc.
- * Copyright (c) 2012  Bjørn Mork <bjorn@mork.no>
- *
- */
-
-#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/mii.h>
-#include <linux/netdevice.h>
-#include <linux/wwan.h>
-#include <linux/skbuff.h>
-#include <linux/usb.h>
-#include <linux/usb/cdc.h>
-#include <linux/usb/usbnet.h>
-#include <linux/usb/cdc_ncm.h>
-
-#include "mhi.h"
-
-#define MBIM_NDP16_SIGN_MASK 0x00ffffff
-
-/* Usual WWAN MTU */
-#define MHI_MBIM_DEFAULT_MTU 1500
-
-/* 3500 allows to optimize skb allocation, the skbs will basically fit in
- * one 4K page. Large MBIM packets will simply be split over several MHI
- * transfers and chained by the MHI net layer (zerocopy).
- */
-#define MHI_MBIM_DEFAULT_MRU 3500
-
-struct mbim_context {
-       u16 rx_seq;
-       u16 tx_seq;
-};
-
-static void __mbim_length_errors_inc(struct mhi_net_dev *dev)
-{
-       u64_stats_update_begin(&dev->stats.rx_syncp);
-       u64_stats_inc(&dev->stats.rx_length_errors);
-       u64_stats_update_end(&dev->stats.rx_syncp);
-}
-
-static void __mbim_errors_inc(struct mhi_net_dev *dev)
-{
-       u64_stats_update_begin(&dev->stats.rx_syncp);
-       u64_stats_inc(&dev->stats.rx_errors);
-       u64_stats_update_end(&dev->stats.rx_syncp);
-}
-
-static int mbim_rx_verify_nth16(struct sk_buff *skb)
-{
-       struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
-       struct mbim_context *ctx = dev->proto_data;
-       struct usb_cdc_ncm_nth16 *nth16;
-       int len;
-
-       if (skb->len < sizeof(struct usb_cdc_ncm_nth16) +
-                       sizeof(struct usb_cdc_ncm_ndp16)) {
-               netif_dbg(dev, rx_err, dev->ndev, "frame too short\n");
-               __mbim_length_errors_inc(dev);
-               return -EINVAL;
-       }
-
-       nth16 = (struct usb_cdc_ncm_nth16 *)skb->data;
-
-       if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "invalid NTH16 signature <%#010x>\n",
-                         le32_to_cpu(nth16->dwSignature));
-               __mbim_errors_inc(dev);
-               return -EINVAL;
-       }
-
-       /* No limit on the block length, except the size of the data pkt */
-       len = le16_to_cpu(nth16->wBlockLength);
-       if (len > skb->len) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "NTB does not fit into the skb %u/%u\n", len,
-                         skb->len);
-               __mbim_length_errors_inc(dev);
-               return -EINVAL;
-       }
-
-       if (ctx->rx_seq + 1 != le16_to_cpu(nth16->wSequence) &&
-           (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) &&
-           !(ctx->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "sequence number glitch prev=%d curr=%d\n",
-                         ctx->rx_seq, le16_to_cpu(nth16->wSequence));
-       }
-       ctx->rx_seq = le16_to_cpu(nth16->wSequence);
-
-       return le16_to_cpu(nth16->wNdpIndex);
-}
-
-static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
-{
-       struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
-       int ret;
-
-       if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
-               netif_dbg(dev, rx_err, dev->ndev, "invalid DPT16 length <%u>\n",
-                         le16_to_cpu(ndp16->wLength));
-               return -EINVAL;
-       }
-
-       ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16))
-                       / sizeof(struct usb_cdc_ncm_dpe16));
-       ret--; /* Last entry is always a NULL terminator */
-
-       if (sizeof(struct usb_cdc_ncm_ndp16) +
-            ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "Invalid nframes = %d\n", ret);
-               return -EINVAL;
-       }
-
-       return ret;
-}
-
-static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb)
-{
-       struct net_device *ndev = mhi_netdev->ndev;
-       int ndpoffset;
-
-       /* Check NTB header and retrieve first NDP offset */
-       ndpoffset = mbim_rx_verify_nth16(skb);
-       if (ndpoffset < 0) {
-               net_err_ratelimited("%s: Incorrect NTB header\n", ndev->name);
-               goto error;
-       }
-
-       /* Process each NDP */
-       while (1) {
-               struct usb_cdc_ncm_ndp16 ndp16;
-               struct usb_cdc_ncm_dpe16 dpe16;
-               int nframes, n, dpeoffset;
-
-               if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) {
-                       net_err_ratelimited("%s: Incorrect NDP offset (%u)\n",
-                                           ndev->name, ndpoffset);
-                       __mbim_length_errors_inc(mhi_netdev);
-                       goto error;
-               }
-
-               /* Check NDP header and retrieve number of datagrams */
-               nframes = mbim_rx_verify_ndp16(skb, &ndp16);
-               if (nframes < 0) {
-                       net_err_ratelimited("%s: Incorrect NDP16\n", ndev->name);
-                       __mbim_length_errors_inc(mhi_netdev);
-                       goto error;
-               }
-
-                /* Only IP data type supported, no DSS in MHI context */
-               if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
-                               != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) {
-                       net_err_ratelimited("%s: Unsupported NDP type\n", ndev->name);
-                       __mbim_errors_inc(mhi_netdev);
-                       goto next_ndp;
-               }
-
-               /* Only primary IP session 0 (0x00) supported for now */
-               if (ndp16.dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) {
-                       net_err_ratelimited("%s: bad packet session\n", ndev->name);
-                       __mbim_errors_inc(mhi_netdev);
-                       goto next_ndp;
-               }
-
-               /* de-aggregate and deliver IP packets */
-               dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16);
-               for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) {
-                       u16 dgram_offset, dgram_len;
-                       struct sk_buff *skbn;
-
-                       if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16)))
-                               break;
-
-                       dgram_offset = le16_to_cpu(dpe16.wDatagramIndex);
-                       dgram_len = le16_to_cpu(dpe16.wDatagramLength);
-
-                       if (!dgram_offset || !dgram_len)
-                               break; /* null terminator */
-
-                       skbn = netdev_alloc_skb(ndev, dgram_len);
-                       if (!skbn)
-                               continue;
-
-                       skb_put(skbn, dgram_len);
-                       skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len);
-
-                       switch (skbn->data[0] & 0xf0) {
-                       case 0x40:
-                               skbn->protocol = htons(ETH_P_IP);
-                               break;
-                       case 0x60:
-                               skbn->protocol = htons(ETH_P_IPV6);
-                               break;
-                       default:
-                               net_err_ratelimited("%s: unknown protocol\n",
-                                                   ndev->name);
-                               __mbim_errors_inc(mhi_netdev);
-                               dev_kfree_skb_any(skbn);
-                               continue;
-                       }
-
-                       netif_rx(skbn);
-               }
-next_ndp:
-               /* Other NDP to process? */
-               ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex);
-               if (!ndpoffset)
-                       break;
-       }
-
-       /* free skb */
-       dev_consume_skb_any(skb);
-       return;
-error:
-       dev_kfree_skb_any(skb);
-}
-
-struct mbim_tx_hdr {
-       struct usb_cdc_ncm_nth16 nth16;
-       struct usb_cdc_ncm_ndp16 ndp16;
-       struct usb_cdc_ncm_dpe16 dpe16[2];
-} __packed;
-
-static struct sk_buff *mbim_tx_fixup(struct mhi_net_dev *mhi_netdev,
-                                    struct sk_buff *skb)
-{
-       struct mbim_context *ctx = mhi_netdev->proto_data;
-       unsigned int dgram_size = skb->len;
-       struct usb_cdc_ncm_nth16 *nth16;
-       struct usb_cdc_ncm_ndp16 *ndp16;
-       struct mbim_tx_hdr *mbim_hdr;
-
-       /* For now, this is a partial implementation of CDC MBIM, only one NDP
-        * is sent, containing the IP packet (no aggregation).
-        */
-
-       /* Ensure we have enough headroom for crafting MBIM header */
-       if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) {
-               dev_kfree_skb_any(skb);
-               return NULL;
-       }
-
-       mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr));
-
-       /* Fill NTB header */
-       nth16 = &mbim_hdr->nth16;
-       nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN);
-       nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
-       nth16->wSequence = cpu_to_le16(ctx->tx_seq++);
-       nth16->wBlockLength = cpu_to_le16(skb->len);
-       nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
-
-       /* Fill the unique NDP */
-       ndp16 = &mbim_hdr->ndp16;
-       ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN);
-       ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16)
-                                       + sizeof(struct usb_cdc_ncm_dpe16) * 2);
-       ndp16->wNextNdpIndex = 0;
-
-       /* Datagram follows the mbim header */
-       ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr));
-       ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size);
-
-       /* null termination */
-       ndp16->dpe16[1].wDatagramIndex = 0;
-       ndp16->dpe16[1].wDatagramLength = 0;
-
-       return skb;
-}
-
-static int mbim_init(struct mhi_net_dev *mhi_netdev)
-{
-       struct net_device *ndev = mhi_netdev->ndev;
-
-       mhi_netdev->proto_data = devm_kzalloc(&ndev->dev,
-                                             sizeof(struct mbim_context),
-                                             GFP_KERNEL);
-       if (!mhi_netdev->proto_data)
-               return -ENOMEM;
-
-       ndev->needed_headroom = sizeof(struct mbim_tx_hdr);
-       ndev->mtu = MHI_MBIM_DEFAULT_MTU;
-       mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU;
-
-       return 0;
-}
-
-const struct mhi_net_proto proto_mbim = {
-       .init = mbim_init,
-       .rx = mbim_rx,
-       .tx_fixup = mbim_tx_fixup,
-};
diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c
new file mode 100644 (file)
index 0000000..d127eb6
--- /dev/null
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* MHI Network driver - Network over MHI bus
+ *
+ * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
+ */
+
+#include <linux/if_arp.h>
+#include <linux/mhi.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+
+#define MHI_NET_MIN_MTU                ETH_MIN_MTU
+#define MHI_NET_MAX_MTU                0xffff
+#define MHI_NET_DEFAULT_MTU    0x4000
+
+struct mhi_net_stats {
+       u64_stats_t rx_packets;
+       u64_stats_t rx_bytes;
+       u64_stats_t rx_errors;
+       u64_stats_t tx_packets;
+       u64_stats_t tx_bytes;
+       u64_stats_t tx_errors;
+       u64_stats_t tx_dropped;
+       struct u64_stats_sync tx_syncp;
+       struct u64_stats_sync rx_syncp;
+};
+
+struct mhi_net_dev {
+       struct mhi_device *mdev;
+       struct net_device *ndev;
+       struct sk_buff *skbagg_head;
+       struct sk_buff *skbagg_tail;
+       struct delayed_work rx_refill;
+       struct mhi_net_stats stats;
+       u32 rx_queue_sz;
+       int msg_enable;
+       unsigned int mru;
+};
+
+struct mhi_device_info {
+       const char *netname;
+};
+
+static int mhi_ndo_open(struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+       /* Feed the rx buffer pool */
+       schedule_delayed_work(&mhi_netdev->rx_refill, 0);
+
+       /* Carrier is established via out-of-band channel (e.g. qmi) */
+       netif_carrier_on(ndev);
+
+       netif_start_queue(ndev);
+
+       return 0;
+}
+
+static int mhi_ndo_stop(struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+       netif_stop_queue(ndev);
+       netif_carrier_off(ndev);
+       cancel_delayed_work_sync(&mhi_netdev->rx_refill);
+
+       return 0;
+}
+
+static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+       struct mhi_device *mdev = mhi_netdev->mdev;
+       int err;
+
+       err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
+       if (unlikely(err)) {
+               net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
+                                   ndev->name, err);
+               dev_kfree_skb_any(skb);
+               goto exit_drop;
+       }
+
+       if (mhi_queue_is_full(mdev, DMA_TO_DEVICE))
+               netif_stop_queue(ndev);
+
+       return NETDEV_TX_OK;
+
+exit_drop:
+       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
+       u64_stats_inc(&mhi_netdev->stats.tx_dropped);
+       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+
+       return NETDEV_TX_OK;
+}
+
+static void mhi_ndo_get_stats64(struct net_device *ndev,
+                               struct rtnl_link_stats64 *stats)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+       unsigned int start;
+
+       do {
+               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.rx_syncp);
+               stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets);
+               stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes);
+               stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors);
+       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start));
+
+       do {
+               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.tx_syncp);
+               stats->tx_packets = u64_stats_read(&mhi_netdev->stats.tx_packets);
+               stats->tx_bytes = u64_stats_read(&mhi_netdev->stats.tx_bytes);
+               stats->tx_errors = u64_stats_read(&mhi_netdev->stats.tx_errors);
+               stats->tx_dropped = u64_stats_read(&mhi_netdev->stats.tx_dropped);
+       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.tx_syncp, start));
+}
+
+static const struct net_device_ops mhi_netdev_ops = {
+       .ndo_open               = mhi_ndo_open,
+       .ndo_stop               = mhi_ndo_stop,
+       .ndo_start_xmit         = mhi_ndo_xmit,
+       .ndo_get_stats64        = mhi_ndo_get_stats64,
+};
+
+static void mhi_net_setup(struct net_device *ndev)
+{
+       ndev->header_ops = NULL;  /* No header */
+       ndev->type = ARPHRD_RAWIP;
+       ndev->hard_header_len = 0;
+       ndev->addr_len = 0;
+       ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
+       ndev->netdev_ops = &mhi_netdev_ops;
+       ndev->mtu = MHI_NET_DEFAULT_MTU;
+       ndev->min_mtu = MHI_NET_MIN_MTU;
+       ndev->max_mtu = MHI_NET_MAX_MTU;
+       ndev->tx_queue_len = 1000;
+}
+
+static struct sk_buff *mhi_net_skb_agg(struct mhi_net_dev *mhi_netdev,
+                                      struct sk_buff *skb)
+{
+       struct sk_buff *head = mhi_netdev->skbagg_head;
+       struct sk_buff *tail = mhi_netdev->skbagg_tail;
+
+       /* This is non-paged skb chaining using frag_list */
+       if (!head) {
+               mhi_netdev->skbagg_head = skb;
+               return skb;
+       }
+
+       if (!skb_shinfo(head)->frag_list)
+               skb_shinfo(head)->frag_list = skb;
+       else
+               tail->next = skb;
+
+       head->len += skb->len;
+       head->data_len += skb->len;
+       head->truesize += skb->truesize;
+
+       mhi_netdev->skbagg_tail = skb;
+
+       return mhi_netdev->skbagg_head;
+}
+
+static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
+                               struct mhi_result *mhi_res)
+{
+       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+       struct sk_buff *skb = mhi_res->buf_addr;
+       int free_desc_count;
+
+       free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       if (unlikely(mhi_res->transaction_status)) {
+               switch (mhi_res->transaction_status) {
+               case -EOVERFLOW:
+                       /* Packet can not fit in one MHI buffer and has been
+                        * split over multiple MHI transfers, do re-aggregation.
+                        * That usually means the device side MTU is larger than
+                        * the host side MTU/MRU. Since this is not optimal,
+                        * print a warning (once).
+                        */
+                       netdev_warn_once(mhi_netdev->ndev,
+                                        "Fragmented packets received, fix MTU?\n");
+                       skb_put(skb, mhi_res->bytes_xferd);
+                       mhi_net_skb_agg(mhi_netdev, skb);
+                       break;
+               case -ENOTCONN:
+                       /* MHI layer stopping/resetting the DL channel */
+                       dev_kfree_skb_any(skb);
+                       return;
+               default:
+                       /* Unknown error, simply drop */
+                       dev_kfree_skb_any(skb);
+                       u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
+                       u64_stats_inc(&mhi_netdev->stats.rx_errors);
+                       u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
+               }
+       } else {
+               skb_put(skb, mhi_res->bytes_xferd);
+
+               if (mhi_netdev->skbagg_head) {
+                       /* Aggregate the final fragment */
+                       skb = mhi_net_skb_agg(mhi_netdev, skb);
+                       mhi_netdev->skbagg_head = NULL;
+               }
+
+               switch (skb->data[0] & 0xf0) {
+               case 0x40:
+                       skb->protocol = htons(ETH_P_IP);
+                       break;
+               case 0x60:
+                       skb->protocol = htons(ETH_P_IPV6);
+                       break;
+               default:
+                       skb->protocol = htons(ETH_P_MAP);
+                       break;
+               }
+
+               u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
+               u64_stats_inc(&mhi_netdev->stats.rx_packets);
+               u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len);
+               u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
+               netif_rx(skb);
+       }
+
+       /* Refill if RX buffers queue becomes low */
+       if (free_desc_count >= mhi_netdev->rx_queue_sz / 2)
+               schedule_delayed_work(&mhi_netdev->rx_refill, 0);
+}
+
+static void mhi_net_ul_callback(struct mhi_device *mhi_dev,
+                               struct mhi_result *mhi_res)
+{
+       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+       struct net_device *ndev = mhi_netdev->ndev;
+       struct mhi_device *mdev = mhi_netdev->mdev;
+       struct sk_buff *skb = mhi_res->buf_addr;
+
+       /* Hardware has consumed the buffer, so free the skb (which is not
+        * freed by the MHI stack) and perform accounting.
+        */
+       dev_consume_skb_any(skb);
+
+       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
+       if (unlikely(mhi_res->transaction_status)) {
+               /* MHI layer stopping/resetting the UL channel */
+               if (mhi_res->transaction_status == -ENOTCONN) {
+                       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+                       return;
+               }
+
+               u64_stats_inc(&mhi_netdev->stats.tx_errors);
+       } else {
+               u64_stats_inc(&mhi_netdev->stats.tx_packets);
+               u64_stats_add(&mhi_netdev->stats.tx_bytes, mhi_res->bytes_xferd);
+       }
+       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+
+       if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mdev, DMA_TO_DEVICE))
+               netif_wake_queue(ndev);
+}
+
+static void mhi_net_rx_refill_work(struct work_struct *work)
+{
+       struct mhi_net_dev *mhi_netdev = container_of(work, struct mhi_net_dev,
+                                                     rx_refill.work);
+       struct net_device *ndev = mhi_netdev->ndev;
+       struct mhi_device *mdev = mhi_netdev->mdev;
+       struct sk_buff *skb;
+       unsigned int size;
+       int err;
+
+       size = mhi_netdev->mru ? mhi_netdev->mru : READ_ONCE(ndev->mtu);
+
+       while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
+               skb = netdev_alloc_skb(ndev, size);
+               if (unlikely(!skb))
+                       break;
+
+               err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, size, MHI_EOT);
+               if (unlikely(err)) {
+                       net_err_ratelimited("%s: Failed to queue RX buf (%d)\n",
+                                           ndev->name, err);
+                       kfree_skb(skb);
+                       break;
+               }
+
+               /* Do not hog the CPU if rx buffers are consumed faster than
+                * queued (unlikely).
+                */
+               cond_resched();
+       }
+
+       /* If we're still starved of rx buffers, reschedule later */
+       if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mhi_netdev->rx_queue_sz)
+               schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
+}
+
+static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev;
+       int err;
+
+       mhi_netdev = netdev_priv(ndev);
+
+       dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
+       mhi_netdev->ndev = ndev;
+       mhi_netdev->mdev = mhi_dev;
+       mhi_netdev->skbagg_head = NULL;
+       mhi_netdev->mru = mhi_dev->mhi_cntrl->mru;
+
+       INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
+       u64_stats_init(&mhi_netdev->stats.rx_syncp);
+       u64_stats_init(&mhi_netdev->stats.tx_syncp);
+
+       /* Start MHI channels */
+       err = mhi_prepare_for_transfer(mhi_dev);
+       if (err)
+               goto out_err;
+
+       /* Number of transfer descriptors determines size of the queue */
+       mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       err = register_netdev(ndev);
+       if (err)
+               return err;
+
+       return 0;
+
+out_err:
+       free_netdev(ndev);
+       return err;
+}
+
+static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+       unregister_netdev(ndev);
+
+       mhi_unprepare_from_transfer(mhi_dev);
+
+       kfree_skb(mhi_netdev->skbagg_head);
+
+       dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+static int mhi_net_probe(struct mhi_device *mhi_dev,
+                        const struct mhi_device_id *id)
+{
+       const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
+       struct net_device *ndev;
+       int err;
+
+       ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
+                           NET_NAME_PREDICTABLE, mhi_net_setup);
+       if (!ndev)
+               return -ENOMEM;
+
+       SET_NETDEV_DEV(ndev, &mhi_dev->dev);
+
+       err = mhi_net_newlink(mhi_dev, ndev);
+       if (err) {
+               free_netdev(ndev);
+               return err;
+       }
+
+       return 0;
+}
+
+static void mhi_net_remove(struct mhi_device *mhi_dev)
+{
+       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+
+       mhi_net_dellink(mhi_dev, mhi_netdev->ndev);
+}
+
+static const struct mhi_device_info mhi_hwip0 = {
+       .netname = "mhi_hwip%d",
+};
+
+static const struct mhi_device_info mhi_swip0 = {
+       .netname = "mhi_swip%d",
+};
+
+static const struct mhi_device_id mhi_net_id_table[] = {
+       /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */
+       { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 },
+       /* Software data PATH (to modem CPU) */
+       { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 },
+       {}
+};
+MODULE_DEVICE_TABLE(mhi, mhi_net_id_table);
+
+static struct mhi_driver mhi_net_driver = {
+       .probe = mhi_net_probe,
+       .remove = mhi_net_remove,
+       .dl_xfer_cb = mhi_net_dl_callback,
+       .ul_xfer_cb = mhi_net_ul_callback,
+       .id_table = mhi_net_id_table,
+       .driver = {
+               .name = "mhi_net",
+               .owner = THIS_MODULE,
+       },
+};
+
+module_mhi_driver(mhi_net_driver);
+
+MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
+MODULE_DESCRIPTION("Network over MHI");
+MODULE_LICENSE("GPL v2");
index 779c3a9..22680f4 100644 (file)
@@ -49,10 +49,8 @@ static u32 mii_get_an(struct mii_if_info *mii, u16 addr)
  *
  * The @ecmd parameter is expected to have been cleared before calling
  * mii_ethtool_gset().
- *
- * Returns 0 for success, negative on error.
  */
-int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
+void mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 {
        struct net_device *dev = mii->dev;
        u16 bmcr, bmsr, ctrl1000 = 0, stat1000 = 0;
@@ -131,8 +129,6 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
        mii->full_duplex = ecmd->duplex;
 
        /* ignore maxtxpkt, maxrxpkt for now */
-
-       return 0;
 }
 
 /**
index ccec299..62d033a 100644 (file)
@@ -183,8 +183,6 @@ new_port_store(struct device *dev, struct device_attribute *attr,
               const char *buf, size_t count)
 {
        struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
-       struct nsim_dev *nsim_dev = dev_get_drvdata(dev);
-       struct devlink *devlink;
        unsigned int port_index;
        int ret;
 
@@ -195,12 +193,15 @@ new_port_store(struct device *dev, struct device_attribute *attr,
        if (ret)
                return ret;
 
-       devlink = priv_to_devlink(nsim_dev);
+       if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+               return -EBUSY;
+
+       if (nsim_bus_dev->in_reload) {
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+               return -EBUSY;
+       }
 
-       mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
-       devlink_reload_disable(devlink);
        ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
-       devlink_reload_enable(devlink);
        mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
        return ret ? ret : count;
 }
@@ -212,8 +213,6 @@ del_port_store(struct device *dev, struct device_attribute *attr,
               const char *buf, size_t count)
 {
        struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
-       struct nsim_dev *nsim_dev = dev_get_drvdata(dev);
-       struct devlink *devlink;
        unsigned int port_index;
        int ret;
 
@@ -224,12 +223,15 @@ del_port_store(struct device *dev, struct device_attribute *attr,
        if (ret)
                return ret;
 
-       devlink = priv_to_devlink(nsim_dev);
+       if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+               return -EBUSY;
+
+       if (nsim_bus_dev->in_reload) {
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+               return -EBUSY;
+       }
 
-       mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
-       devlink_reload_disable(devlink);
        ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
-       devlink_reload_enable(devlink);
        mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
        return ret ? ret : count;
 }
@@ -262,29 +264,31 @@ static struct device_type nsim_bus_dev_type = {
 };
 
 static struct nsim_bus_dev *
-nsim_bus_dev_new(unsigned int id, unsigned int port_count);
+nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues);
 
 static ssize_t
 new_device_store(struct bus_type *bus, const char *buf, size_t count)
 {
+       unsigned int id, port_count, num_queues;
        struct nsim_bus_dev *nsim_bus_dev;
-       unsigned int port_count;
-       unsigned int id;
        int err;
 
-       err = sscanf(buf, "%u %u", &id, &port_count);
+       err = sscanf(buf, "%u %u %u", &id, &port_count, &num_queues);
        switch (err) {
        case 1:
                port_count = 1;
                fallthrough;
        case 2:
+               num_queues = 1;
+               fallthrough;
+       case 3:
                if (id > INT_MAX) {
                        pr_err("Value of \"id\" is too big.\n");
                        return -EINVAL;
                }
                break;
        default:
-               pr_err("Format for adding new device is \"id port_count\" (uint uint).\n");
+               pr_err("Format for adding new device is \"id port_count num_queues\" (uint uint unit).\n");
                return -EINVAL;
        }
 
@@ -295,7 +299,7 @@ new_device_store(struct bus_type *bus, const char *buf, size_t count)
                goto err;
        }
 
-       nsim_bus_dev = nsim_bus_dev_new(id, port_count);
+       nsim_bus_dev = nsim_bus_dev_new(id, port_count, num_queues);
        if (IS_ERR(nsim_bus_dev)) {
                err = PTR_ERR(nsim_bus_dev);
                goto err;
@@ -397,7 +401,7 @@ static struct bus_type nsim_bus = {
 #define NSIM_BUS_DEV_MAX_VFS 4
 
 static struct nsim_bus_dev *
-nsim_bus_dev_new(unsigned int id, unsigned int port_count)
+nsim_bus_dev_new(unsigned int id, unsigned int port_count, unsigned int num_queues)
 {
        struct nsim_bus_dev *nsim_bus_dev;
        int err;
@@ -413,6 +417,7 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count)
        nsim_bus_dev->dev.bus = &nsim_bus;
        nsim_bus_dev->dev.type = &nsim_bus_dev_type;
        nsim_bus_dev->port_count = port_count;
+       nsim_bus_dev->num_queues = num_queues;
        nsim_bus_dev->initial_net = current->nsproxy->net_ns;
        nsim_bus_dev->max_vfs = NSIM_BUS_DEV_MAX_VFS;
        mutex_init(&nsim_bus_dev->nsim_bus_reload_lock);
index 6348307..54313bd 100644 (file)
@@ -864,16 +864,24 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change,
                                struct netlink_ext_ack *extack)
 {
        struct nsim_dev *nsim_dev = devlink_priv(devlink);
+       struct nsim_bus_dev *nsim_bus_dev;
+
+       nsim_bus_dev = nsim_dev->nsim_bus_dev;
+       if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+               return -EOPNOTSUPP;
 
        if (nsim_dev->dont_allow_reload) {
                /* For testing purposes, user set debugfs dont_allow_reload
                 * value to true. So forbid it.
                 */
                NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes");
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
                return -EOPNOTSUPP;
        }
+       nsim_bus_dev->in_reload = true;
 
        nsim_dev_reload_destroy(nsim_dev);
+       mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
        return 0;
 }
 
@@ -882,17 +890,26 @@ static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_actio
                              struct netlink_ext_ack *extack)
 {
        struct nsim_dev *nsim_dev = devlink_priv(devlink);
+       struct nsim_bus_dev *nsim_bus_dev;
+       int ret;
+
+       nsim_bus_dev = nsim_dev->nsim_bus_dev;
+       mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
+       nsim_bus_dev->in_reload = false;
 
        if (nsim_dev->fail_reload) {
                /* For testing purposes, user set debugfs fail_reload
                 * value to true. Fail right away.
                 */
                NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes");
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
                return -EINVAL;
        }
 
        *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
-       return nsim_dev_reload_create(nsim_dev, extack);
+       ret = nsim_dev_reload_create(nsim_dev, extack);
+       mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+       return ret;
 }
 
 static int nsim_dev_info_get(struct devlink *devlink,
@@ -1431,10 +1448,10 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
        struct devlink *devlink;
        int err;
 
-       devlink = devlink_alloc(&nsim_dev_devlink_ops, sizeof(*nsim_dev));
+       devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev),
+                                nsim_bus_dev->initial_net, &nsim_bus_dev->dev);
        if (!devlink)
                return -ENOMEM;
-       devlink_net_set(devlink, nsim_bus_dev->initial_net);
        nsim_dev = devlink_priv(devlink);
        nsim_dev->nsim_bus_dev = nsim_bus_dev;
        nsim_dev->switch_id.id_len = sizeof(nsim_dev->switch_id.id);
@@ -1453,7 +1470,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
        if (err)
                goto err_devlink_free;
 
-       err = devlink_register(devlink, &nsim_bus_dev->dev);
+       err = devlink_register(devlink);
        if (err)
                goto err_resources_unregister;
 
index c9ae525..b03a051 100644 (file)
@@ -43,7 +43,9 @@ nsim_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause)
 }
 
 static int nsim_get_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *coal)
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct netdevsim *ns = netdev_priv(dev);
 
@@ -52,7 +54,9 @@ static int nsim_get_coalesce(struct net_device *dev,
 }
 
 static int nsim_set_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *coal)
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct netdevsim *ns = netdev_priv(dev);
 
index 213d3e5..4300261 100644 (file)
@@ -1441,7 +1441,7 @@ static u64 nsim_fib_nexthops_res_occ_get(void *priv)
 static void nsim_fib_set_max_all(struct nsim_fib_data *data,
                                 struct devlink *devlink)
 {
-       enum nsim_resource_id res_ids[] = {
+       static const enum nsim_resource_id res_ids[] = {
                NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
                NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES,
                NSIM_RESOURCE_NEXTHOPS,
index c3aeb15..50572e0 100644 (file)
@@ -347,7 +347,8 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
        struct netdevsim *ns;
        int err;
 
-       dev = alloc_netdev(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup);
+       dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup,
+                             nsim_dev->nsim_bus_dev->num_queues);
        if (!dev)
                return ERR_PTR(-ENOMEM);
 
@@ -392,7 +393,8 @@ void nsim_destroy(struct netdevsim *ns)
 static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
                         struct netlink_ext_ack *extack)
 {
-       NL_SET_ERR_MSG_MOD(extack, "Please use: echo \"[ID] [PORT_COUNT]\" > /sys/bus/netdevsim/new_device");
+       NL_SET_ERR_MSG_MOD(extack,
+                          "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device");
        return -EOPNOTSUPP;
 }
 
index ae46295..793c86d 100644 (file)
@@ -352,6 +352,7 @@ struct nsim_bus_dev {
        struct device dev;
        struct list_head list;
        unsigned int port_count;
+       unsigned int num_queues; /* Number of queues for each port on this bus */
        struct net *initial_net; /* Purpose of this is to carry net pointer
                                  * during the probe time only.
                                  */
@@ -361,6 +362,7 @@ struct nsim_bus_dev {
        struct nsim_vf_config *vfconfigs;
        /* Lock for devlink->reload_enabled in netdevsim module */
        struct mutex nsim_bus_reload_lock;
+       bool in_reload;
        bool init;
 };
 
index 4bd6133..fb0a83d 100644 (file)
@@ -65,6 +65,9 @@ static const int xpcs_xlgmii_features[] = {
 };
 
 static const int xpcs_sgmii_features[] = {
+       ETHTOOL_LINK_MODE_Pause_BIT,
+       ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+       ETHTOOL_LINK_MODE_Autoneg_BIT,
        ETHTOOL_LINK_MODE_10baseT_Half_BIT,
        ETHTOOL_LINK_MODE_10baseT_Full_BIT,
        ETHTOOL_LINK_MODE_100baseT_Half_BIT,
@@ -75,6 +78,7 @@ static const int xpcs_sgmii_features[] = {
 };
 
 static const int xpcs_2500basex_features[] = {
+       ETHTOOL_LINK_MODE_Pause_BIT,
        ETHTOOL_LINK_MODE_Asym_Pause_BIT,
        ETHTOOL_LINK_MODE_Autoneg_BIT,
        ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
index c56f703..902495a 100644 (file)
@@ -207,6 +207,12 @@ config MARVELL_88X2222_PHY
          Support for the Marvell 88X2222 Dual-port Multi-speed Ethernet
          Transceiver.
 
+config MAXLINEAR_GPHY
+       tristate "Maxlinear Ethernet PHYs"
+       help
+         Support for the Maxlinear GPY115, GPY211, GPY212, GPY215,
+         GPY241, GPY245 PHYs.
+
 config MEDIATEK_GE_PHY
        tristate "MediaTek Gigabit Ethernet PHYs"
        help
@@ -230,6 +236,7 @@ config MICROCHIP_T1_PHY
 config MICROSEMI_PHY
        tristate "Microsemi PHYs"
        depends on MACSEC || MACSEC=n
+       depends on PTP_1588_CLOCK_OPTIONAL || !NETWORK_PHY_TIMESTAMPING
        select CRYPTO_LIB_AES if MACSEC
        help
          Currently supports VSC8514, VSC8530, VSC8531, VSC8540 and VSC8541 PHYs
@@ -247,6 +254,7 @@ config NATIONAL_PHY
 
 config NXP_C45_TJA11XX_PHY
        tristate "NXP C45 TJA11XX PHYs"
+       depends on PTP_1588_CLOCK_OPTIONAL
        help
          Enable support for NXP C45 TJA11XX PHYs.
          Currently supports only the TJA1103 PHY.
index 172bb19..b2728d0 100644 (file)
@@ -64,6 +64,7 @@ obj-$(CONFIG_LXT_PHY)         += lxt.o
 obj-$(CONFIG_MARVELL_10G_PHY)  += marvell10g.o
 obj-$(CONFIG_MARVELL_PHY)      += marvell.o
 obj-$(CONFIG_MARVELL_88X2222_PHY)      += marvell-88x2222.o
+obj-$(CONFIG_MAXLINEAR_GPHY)   += mxl-gpy.o
 obj-$(CONFIG_MEDIATEK_GE_PHY)  += mediatek-ge.o
 obj-$(CONFIG_MESON_GXL_PHY)    += meson-gxl.o
 obj-$(CONFIG_MICREL_KS8995MA)  += spi_ks8995.o
index 5d62b85..bdac087 100644 (file)
@@ -532,12 +532,6 @@ static int at8031_register_regulators(struct phy_device *phydev)
        return 0;
 }
 
-static bool at803x_match_phy_id(struct phy_device *phydev, u32 phy_id)
-{
-       return (phydev->phy_id & phydev->drv->phy_id_mask)
-               == (phy_id & phydev->drv->phy_id_mask);
-}
-
 static int at803x_parse_dt(struct phy_device *phydev)
 {
        struct device_node *node = phydev->mdio.dev.of_node;
@@ -602,8 +596,8 @@ static int at803x_parse_dt(struct phy_device *phydev)
                 *   to the AR8030 so there might be a good chance it works on
                 *   the AR8030 too.
                 */
-               if (at803x_match_phy_id(phydev, ATH8030_PHY_ID) ||
-                   at803x_match_phy_id(phydev, ATH8035_PHY_ID)) {
+               if (phydev->drv->phy_id == ATH8030_PHY_ID ||
+                   phydev->drv->phy_id == ATH8035_PHY_ID) {
                        priv->clk_25m_reg &= AT8035_CLK_OUT_MASK;
                        priv->clk_25m_mask &= AT8035_CLK_OUT_MASK;
                }
@@ -631,7 +625,7 @@ static int at803x_parse_dt(struct phy_device *phydev)
        /* Only supported on AR8031/AR8033, the AR8030/AR8035 use strapping
         * options.
         */
-       if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
+       if (phydev->drv->phy_id == ATH8031_PHY_ID) {
                if (of_property_read_bool(node, "qca,keep-pll-enabled"))
                        priv->flags |= AT803X_KEEP_PLL_ENABLED;
 
@@ -676,7 +670,7 @@ static int at803x_probe(struct phy_device *phydev)
         * Switch to the copper page, as otherwise we read
         * the PHY capabilities from the fiber side.
         */
-       if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
+       if (phydev->drv->phy_id == ATH8031_PHY_ID) {
                phy_lock_mdio_bus(phydev);
                ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
                phy_unlock_mdio_bus(phydev);
@@ -709,7 +703,7 @@ static int at803x_get_features(struct phy_device *phydev)
        if (err)
                return err;
 
-       if (!at803x_match_phy_id(phydev, ATH8031_PHY_ID))
+       if (phydev->drv->phy_id != ATH8031_PHY_ID)
                return 0;
 
        /* AR8031/AR8033 have different status registers
@@ -820,7 +814,7 @@ static int at803x_config_init(struct phy_device *phydev)
        if (ret < 0)
                return ret;
 
-       if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
+       if (phydev->drv->phy_id == ATH8031_PHY_ID) {
                ret = at8031_pll_config(phydev);
                if (ret < 0)
                        return ret;
index f7a2ec1..211b547 100644 (file)
@@ -326,11 +326,9 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev)
 
 static int dp8382x_disable_wol(struct phy_device *phydev)
 {
-       int value = DP83822_WOL_EN | DP83822_WOL_MAGIC_EN |
-                   DP83822_WOL_SECURE_ON;
-
-       return phy_clear_bits_mmd(phydev, DP83822_DEVADDR,
-                                 MII_DP83822_WOL_CFG, value);
+       return phy_clear_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG,
+                                 DP83822_WOL_EN | DP83822_WOL_MAGIC_EN |
+                                 DP83822_WOL_SECURE_ON);
 }
 
 static int dp83822_read_status(struct phy_device *phydev)
index d453ec0..3c03286 100644 (file)
@@ -8,11 +8,16 @@
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/of.h>
+#include <linux/bitfield.h>
 
+#define XWAY_MDIO_MIICTRL              0x17    /* mii control */
 #define XWAY_MDIO_IMASK                        0x19    /* interrupt mask */
 #define XWAY_MDIO_ISTAT                        0x1A    /* interrupt status */
 #define XWAY_MDIO_LED                  0x1B    /* led control */
 
+#define XWAY_MDIO_MIICTRL_RXSKEW_MASK  GENMASK(14, 12)
+#define XWAY_MDIO_MIICTRL_TXSKEW_MASK  GENMASK(10, 8)
+
 /* bit 15:12 are reserved */
 #define XWAY_MDIO_LED_LED3_EN          BIT(11) /* Enable the integrated function of LED3 */
 #define XWAY_MDIO_LED_LED2_EN          BIT(10) /* Enable the integrated function of LED2 */
 #define PHY_ID_PHY11G_VR9_1_2          0xD565A409
 #define PHY_ID_PHY22F_VR9_1_2          0xD565A419
 
+static const int xway_internal_delay[] = {0, 500, 1000, 1500, 2000, 2500,
+                                        3000, 3500};
+
+static int xway_gphy_rgmii_init(struct phy_device *phydev)
+{
+       struct device *dev = &phydev->mdio.dev;
+       unsigned int delay_size = ARRAY_SIZE(xway_internal_delay);
+       s32 int_delay;
+       int val = 0;
+
+       if (!phy_interface_is_rgmii(phydev))
+               return 0;
+
+       /* Existing behavior was to use default pin strapping delay in rgmii
+        * mode, but rgmii should have meant no delay.  Warn existing users,
+        * but do not change anything at the moment.
+        */
+       if (phydev->interface == PHY_INTERFACE_MODE_RGMII) {
+               u16 txskew, rxskew;
+
+               val = phy_read(phydev, XWAY_MDIO_MIICTRL);
+               if (val < 0)
+                       return val;
+
+               txskew = FIELD_GET(XWAY_MDIO_MIICTRL_TXSKEW_MASK, val);
+               rxskew = FIELD_GET(XWAY_MDIO_MIICTRL_RXSKEW_MASK, val);
+
+               if (txskew > 0 || rxskew > 0)
+                       phydev_warn(phydev,
+                                   "PHY has delays (e.g. via pin strapping), but phy-mode = 'rgmii'\n"
+                                   "Should be 'rgmii-id' to use internal delays txskew:%d ps rxskew:%d ps\n",
+                                   xway_internal_delay[txskew],
+                                   xway_internal_delay[rxskew]);
+               return 0;
+       }
+
+       if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+           phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
+               int_delay = phy_get_internal_delay(phydev, dev,
+                                                  xway_internal_delay,
+                                                  delay_size, true);
+
+               /* if rx-internal-delay-ps is missing, use default of 2.0 ns */
+               if (int_delay < 0)
+                       int_delay = 4; /* 2000 ps */
+
+               val |= FIELD_PREP(XWAY_MDIO_MIICTRL_RXSKEW_MASK, int_delay);
+       }
+
+       if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+           phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
+               int_delay = phy_get_internal_delay(phydev, dev,
+                                                  xway_internal_delay,
+                                                  delay_size, false);
+
+               /* if tx-internal-delay-ps is missing, use default of 2.0 ns */
+               if (int_delay < 0)
+                       int_delay = 4; /* 2000 ps */
+
+               val |= FIELD_PREP(XWAY_MDIO_MIICTRL_TXSKEW_MASK, int_delay);
+       }
+
+       return phy_modify(phydev, XWAY_MDIO_MIICTRL,
+                         XWAY_MDIO_MIICTRL_RXSKEW_MASK |
+                         XWAY_MDIO_MIICTRL_TXSKEW_MASK, val);
+}
+
 static int xway_gphy_config_init(struct phy_device *phydev)
 {
        int err;
@@ -204,6 +276,10 @@ static int xway_gphy_config_init(struct phy_device *phydev)
        phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED2H, ledxh);
        phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LED2L, ledxl);
 
+       err = xway_gphy_rgmii_init(phydev);
+       if (err)
+               return err;
+
        return 0;
 }
 
index 3de93c9..4fcfca4 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/marvell_phy.h>
 #include <linux/bitfield.h>
 #include <linux/of.h>
+#include <linux/sfp.h>
 
 #include <linux/io.h>
 #include <asm/irq.h>
@@ -46,6 +47,7 @@
 #define MII_MARVELL_MISC_TEST_PAGE     0x06
 #define MII_MARVELL_VCT7_PAGE          0x07
 #define MII_MARVELL_WOL_PAGE           0x11
+#define MII_MARVELL_MODE_PAGE          0x12
 
 #define MII_M1011_IEVENT               0x13
 #define MII_M1011_IEVENT_CLEAR         0x0000
 
 #define MII_88E1318S_PHY_WOL_CTRL                              0x10
 #define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS             BIT(12)
+#define MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE               BIT(13)
 #define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE    BIT(14)
 
 #define MII_PHY_LED_CTRL               16
 
 #define MII_88E1510_GEN_CTRL_REG_1             0x14
 #define MII_88E1510_GEN_CTRL_REG_1_MODE_MASK   0x7
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII  0x0     /* RGMII to copper */
 #define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII  0x1     /* SGMII to copper */
+/* RGMII to 1000BASE-X */
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_1000X    0x2
+/* RGMII to 100BASE-FX */
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_100FX    0x3
+/* RGMII to SGMII */
+#define MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII    0x4
 #define MII_88E1510_GEN_CTRL_REG_1_RESET       0x8000  /* Soft reset */
 
 #define MII_VCT5_TX_RX_MDI0_COUPLING   0x10
@@ -1746,13 +1756,19 @@ static void m88e1318_get_wol(struct phy_device *phydev,
 {
        int ret;
 
-       wol->supported = WAKE_MAGIC;
+       wol->supported = WAKE_MAGIC | WAKE_PHY;
        wol->wolopts = 0;
 
        ret = phy_read_paged(phydev, MII_MARVELL_WOL_PAGE,
                             MII_88E1318S_PHY_WOL_CTRL);
-       if (ret >= 0 && ret & MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE)
+       if (ret < 0)
+               return;
+
+       if (ret & MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE)
                wol->wolopts |= WAKE_MAGIC;
+
+       if (ret & MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE)
+               wol->wolopts |= WAKE_PHY;
 }
 
 static int m88e1318_set_wol(struct phy_device *phydev,
@@ -1764,7 +1780,7 @@ static int m88e1318_set_wol(struct phy_device *phydev,
        if (oldpage < 0)
                goto error;
 
-       if (wol->wolopts & WAKE_MAGIC) {
+       if (wol->wolopts & (WAKE_MAGIC | WAKE_PHY)) {
                /* Explicitly switch to page 0x00, just to be sure */
                err = marvell_write_page(phydev, MII_MARVELL_COPPER_PAGE);
                if (err < 0)
@@ -1796,7 +1812,9 @@ static int m88e1318_set_wol(struct phy_device *phydev,
                                   MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW);
                if (err < 0)
                        goto error;
+       }
 
+       if (wol->wolopts & WAKE_MAGIC) {
                err = marvell_write_page(phydev, MII_MARVELL_WOL_PAGE);
                if (err < 0)
                        goto error;
@@ -1837,6 +1855,30 @@ static int m88e1318_set_wol(struct phy_device *phydev,
                        goto error;
        }
 
+       if (wol->wolopts & WAKE_PHY) {
+               err = marvell_write_page(phydev, MII_MARVELL_WOL_PAGE);
+               if (err < 0)
+                       goto error;
+
+               /* Clear WOL status and enable link up event */
+               err = __phy_modify(phydev, MII_88E1318S_PHY_WOL_CTRL, 0,
+                                  MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS |
+                                  MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE);
+               if (err < 0)
+                       goto error;
+       } else {
+               err = marvell_write_page(phydev, MII_MARVELL_WOL_PAGE);
+               if (err < 0)
+                       goto error;
+
+               /* Clear WOL status and disable link up event */
+               err = __phy_modify(phydev, MII_88E1318S_PHY_WOL_CTRL,
+                                  MII_88E1318S_PHY_WOL_CTRL_LINK_UP_ENABLE,
+                                  MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS);
+               if (err < 0)
+                       goto error;
+       }
+
 error:
        return phy_restore_page(phydev, oldpage, err);
 }
@@ -2701,6 +2743,100 @@ static int marvell_probe(struct phy_device *phydev)
        return marvell_hwmon_probe(phydev);
 }
 
+static int m88e1510_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
+{
+       struct phy_device *phydev = upstream;
+       phy_interface_t interface;
+       struct device *dev;
+       int oldpage;
+       int ret = 0;
+       u16 mode;
+
+       __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
+
+       dev = &phydev->mdio.dev;
+
+       sfp_parse_support(phydev->sfp_bus, id, supported);
+       interface = sfp_select_interface(phydev->sfp_bus, supported);
+
+       dev_info(dev, "%s SFP module inserted\n", phy_modes(interface));
+
+       switch (interface) {
+       case PHY_INTERFACE_MODE_1000BASEX:
+               mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_1000X;
+
+               break;
+       case PHY_INTERFACE_MODE_100BASEX:
+               mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_100FX;
+
+               break;
+       case PHY_INTERFACE_MODE_SGMII:
+               mode = MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII_SGMII;
+
+               break;
+       default:
+               dev_err(dev, "Incompatible SFP module inserted\n");
+
+               return -EINVAL;
+       }
+
+       oldpage = phy_select_page(phydev, MII_MARVELL_MODE_PAGE);
+       if (oldpage < 0)
+               goto error;
+
+       ret = __phy_modify(phydev, MII_88E1510_GEN_CTRL_REG_1,
+                          MII_88E1510_GEN_CTRL_REG_1_MODE_MASK, mode);
+       if (ret < 0)
+               goto error;
+
+       ret = __phy_set_bits(phydev, MII_88E1510_GEN_CTRL_REG_1,
+                            MII_88E1510_GEN_CTRL_REG_1_RESET);
+
+error:
+       return phy_restore_page(phydev, oldpage, ret);
+}
+
+static void m88e1510_sfp_remove(void *upstream)
+{
+       struct phy_device *phydev = upstream;
+       int oldpage;
+       int ret = 0;
+
+       oldpage = phy_select_page(phydev, MII_MARVELL_MODE_PAGE);
+       if (oldpage < 0)
+               goto error;
+
+       ret = __phy_modify(phydev, MII_88E1510_GEN_CTRL_REG_1,
+                          MII_88E1510_GEN_CTRL_REG_1_MODE_MASK,
+                          MII_88E1510_GEN_CTRL_REG_1_MODE_RGMII);
+       if (ret < 0)
+               goto error;
+
+       ret = __phy_set_bits(phydev, MII_88E1510_GEN_CTRL_REG_1,
+                            MII_88E1510_GEN_CTRL_REG_1_RESET);
+
+error:
+       phy_restore_page(phydev, oldpage, ret);
+}
+
+static const struct sfp_upstream_ops m88e1510_sfp_ops = {
+       .module_insert = m88e1510_sfp_insert,
+       .module_remove = m88e1510_sfp_remove,
+       .attach = phy_sfp_attach,
+       .detach = phy_sfp_detach,
+};
+
+static int m88e1510_probe(struct phy_device *phydev)
+{
+       int err;
+
+       err = marvell_probe(phydev);
+       if (err)
+               return err;
+
+       return phy_sfp_probe(phydev, &m88e1510_sfp_ops);
+}
+
 static struct phy_driver marvell_drivers[] = {
        {
                .phy_id = MARVELL_PHY_ID_88E1101,
@@ -2927,7 +3063,7 @@ static struct phy_driver marvell_drivers[] = {
                .driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
                .features = PHY_GBIT_FIBRE_FEATURES,
                .flags = PHY_POLL_CABLE_TEST,
-               .probe = marvell_probe,
+               .probe = m88e1510_probe,
                .config_init = m88e1510_config_init,
                .config_aneg = m88e1510_config_aneg,
                .read_status = marvell_read_status,
index f4d758f..bd310e8 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/marvell_phy.h>
 #include <linux/phy.h>
 #include <linux/sfp.h>
+#include <linux/netdevice.h>
 
 #define MV_PHY_ALASKA_NBT_QUIRK_MASK   0xfffffffe
 #define MV_PHY_ALASKA_NBT_QUIRK_REV    (MARVELL_PHY_ID_88X3310 | 0xa)
@@ -104,6 +105,16 @@ enum {
        MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_NO_SGMII_AN       = 0x5,
        MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH        = 0x6,
        MV_V2_33X0_PORT_CTRL_MACTYPE_USXGMII                    = 0x7,
+       MV_V2_PORT_INTR_STS     = 0xf040,
+       MV_V2_PORT_INTR_MASK    = 0xf043,
+       MV_V2_PORT_INTR_STS_WOL_EN      = BIT(8),
+       MV_V2_MAGIC_PKT_WORD0   = 0xf06b,
+       MV_V2_MAGIC_PKT_WORD1   = 0xf06c,
+       MV_V2_MAGIC_PKT_WORD2   = 0xf06d,
+       /* Wake on LAN registers */
+       MV_V2_WOL_CTRL          = 0xf06e,
+       MV_V2_WOL_CTRL_CLEAR_STS        = BIT(15),
+       MV_V2_WOL_CTRL_MAGIC_PKT_EN     = BIT(0),
        /* Temperature control/read registers (88X3310 only) */
        MV_V2_TEMP_CTRL         = 0xf08a,
        MV_V2_TEMP_CTRL_MASK    = 0xc000,
@@ -1028,6 +1039,80 @@ static int mv2111_match_phy_device(struct phy_device *phydev)
        return mv211x_match_phy_device(phydev, false);
 }
 
+static void mv3110_get_wol(struct phy_device *phydev,
+                          struct ethtool_wolinfo *wol)
+{
+       int ret;
+
+       wol->supported = WAKE_MAGIC;
+       wol->wolopts = 0;
+
+       ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_WOL_CTRL);
+       if (ret < 0)
+               return;
+
+       if (ret & MV_V2_WOL_CTRL_MAGIC_PKT_EN)
+               wol->wolopts |= WAKE_MAGIC;
+}
+
+static int mv3110_set_wol(struct phy_device *phydev,
+                         struct ethtool_wolinfo *wol)
+{
+       int ret;
+
+       if (wol->wolopts & WAKE_MAGIC) {
+               /* Enable the WOL interrupt */
+               ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+                                      MV_V2_PORT_INTR_MASK,
+                                      MV_V2_PORT_INTR_STS_WOL_EN);
+               if (ret < 0)
+                       return ret;
+
+               /* Store the device address for the magic packet */
+               ret = phy_write_mmd(phydev, MDIO_MMD_VEND2,
+                                   MV_V2_MAGIC_PKT_WORD2,
+                                   ((phydev->attached_dev->dev_addr[5] << 8) |
+                                   phydev->attached_dev->dev_addr[4]));
+               if (ret < 0)
+                       return ret;
+
+               ret = phy_write_mmd(phydev, MDIO_MMD_VEND2,
+                                   MV_V2_MAGIC_PKT_WORD1,
+                                   ((phydev->attached_dev->dev_addr[3] << 8) |
+                                   phydev->attached_dev->dev_addr[2]));
+               if (ret < 0)
+                       return ret;
+
+               ret = phy_write_mmd(phydev, MDIO_MMD_VEND2,
+                                   MV_V2_MAGIC_PKT_WORD0,
+                                   ((phydev->attached_dev->dev_addr[1] << 8) |
+                                   phydev->attached_dev->dev_addr[0]));
+               if (ret < 0)
+                       return ret;
+
+               /* Clear WOL status and enable magic packet matching */
+               ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+                                      MV_V2_WOL_CTRL,
+                                      MV_V2_WOL_CTRL_MAGIC_PKT_EN |
+                                      MV_V2_WOL_CTRL_CLEAR_STS);
+               if (ret < 0)
+                       return ret;
+       } else {
+               /* Disable magic packet matching & reset WOL status bit */
+               ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2,
+                                    MV_V2_WOL_CTRL,
+                                    MV_V2_WOL_CTRL_MAGIC_PKT_EN,
+                                    MV_V2_WOL_CTRL_CLEAR_STS);
+               if (ret < 0)
+                       return ret;
+       }
+
+       /* Reset the clear WOL status bit as it does not self-clear */
+       return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2,
+                                 MV_V2_WOL_CTRL,
+                                 MV_V2_WOL_CTRL_CLEAR_STS);
+}
+
 static struct phy_driver mv3310_drivers[] = {
        {
                .phy_id         = MARVELL_PHY_ID_88X3310,
@@ -1047,6 +1132,8 @@ static struct phy_driver mv3310_drivers[] = {
                .set_tunable    = mv3310_set_tunable,
                .remove         = mv3310_remove,
                .set_loopback   = genphy_c45_loopback,
+               .get_wol        = mv3110_get_wol,
+               .set_wol        = mv3110_set_wol,
        },
        {
                .phy_id         = MARVELL_PHY_ID_88X3310,
@@ -1084,6 +1171,8 @@ static struct phy_driver mv3310_drivers[] = {
                .set_tunable    = mv3310_set_tunable,
                .remove         = mv3310_remove,
                .set_loopback   = genphy_c45_loopback,
+               .get_wol        = mv3110_get_wol,
+               .set_wol        = mv3110_set_wol,
        },
        {
                .phy_id         = MARVELL_PHY_ID_88E2110,
index 924ed5b..edb9516 100644 (file)
@@ -506,7 +506,7 @@ static int vsc85xx_ptp_cmp_init(struct phy_device *phydev, enum ts_blk blk)
 {
        struct vsc8531_private *vsc8531 = phydev->priv;
        bool base = phydev->mdio.addr == vsc8531->ts_base_addr;
-       u8 msgs[] = {
+       static const u8 msgs[] = {
                PTP_MSGTYPE_SYNC,
                PTP_MSGTYPE_DELAY_REQ
        };
@@ -847,7 +847,7 @@ static int vsc85xx_ts_ptp_action_flow(struct phy_device *phydev, enum ts_blk blk
 static int vsc85xx_ptp_conf(struct phy_device *phydev, enum ts_blk blk,
                            bool one_step, bool enable)
 {
-       u8 msgs[] = {
+       static const u8 msgs[] = {
                PTP_MSGTYPE_SYNC,
                PTP_MSGTYPE_DELAY_REQ
        };
@@ -1268,8 +1268,8 @@ static void vsc8584_set_input_clk_configured(struct phy_device *phydev)
 static int __vsc8584_init_ptp(struct phy_device *phydev)
 {
        struct vsc8531_private *vsc8531 = phydev->priv;
-       u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 };
-       u8  ltc_seq_a[] = { 8, 6, 5, 4, 2 };
+       static const u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 };
+       static const u8  ltc_seq_a[] = { 8, 6, 5, 4, 2 };
        u32 val;
 
        if (!vsc8584_is_1588_input_clk_configured(phydev)) {
diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c
new file mode 100644 (file)
index 0000000..2d5d508
--- /dev/null
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2021 Maxlinear Corporation
+ * Copyright (C) 2020 Intel Corporation
+ *
+ * Drivers for Maxlinear Ethernet GPY
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/bitfield.h>
+#include <linux/phy.h>
+#include <linux/netdevice.h>
+
+/* PHY ID */
+#define PHY_ID_GPYx15B_MASK    0xFFFFFFFC
+#define PHY_ID_GPY21xB_MASK    0xFFFFFFF9
+#define PHY_ID_GPY2xx          0x67C9DC00
+#define PHY_ID_GPY115B         0x67C9DF00
+#define PHY_ID_GPY115C         0x67C9DF10
+#define PHY_ID_GPY211B         0x67C9DE08
+#define PHY_ID_GPY211C         0x67C9DE10
+#define PHY_ID_GPY212B         0x67C9DE09
+#define PHY_ID_GPY212C         0x67C9DE20
+#define PHY_ID_GPY215B         0x67C9DF04
+#define PHY_ID_GPY215C         0x67C9DF20
+#define PHY_ID_GPY241B         0x67C9DE40
+#define PHY_ID_GPY241BM                0x67C9DE80
+#define PHY_ID_GPY245B         0x67C9DEC0
+
+#define PHY_MIISTAT            0x18    /* MII state */
+#define PHY_IMASK              0x19    /* interrupt mask */
+#define PHY_ISTAT              0x1A    /* interrupt status */
+#define PHY_FWV                        0x1E    /* firmware version */
+
+#define PHY_MIISTAT_SPD_MASK   GENMASK(2, 0)
+#define PHY_MIISTAT_DPX                BIT(3)
+#define PHY_MIISTAT_LS         BIT(10)
+
+#define PHY_MIISTAT_SPD_10     0
+#define PHY_MIISTAT_SPD_100    1
+#define PHY_MIISTAT_SPD_1000   2
+#define PHY_MIISTAT_SPD_2500   4
+
+#define PHY_IMASK_WOL          BIT(15) /* Wake-on-LAN */
+#define PHY_IMASK_ANC          BIT(10) /* Auto-Neg complete */
+#define PHY_IMASK_ADSC         BIT(5)  /* Link auto-downspeed detect */
+#define PHY_IMASK_DXMC         BIT(2)  /* Duplex mode change */
+#define PHY_IMASK_LSPC         BIT(1)  /* Link speed change */
+#define PHY_IMASK_LSTC         BIT(0)  /* Link state change */
+#define PHY_IMASK_MASK         (PHY_IMASK_LSTC | \
+                                PHY_IMASK_LSPC | \
+                                PHY_IMASK_DXMC | \
+                                PHY_IMASK_ADSC | \
+                                PHY_IMASK_ANC)
+
+#define PHY_FWV_REL_MASK       BIT(15)
+#define PHY_FWV_TYPE_MASK      GENMASK(11, 8)
+#define PHY_FWV_MINOR_MASK     GENMASK(7, 0)
+
+/* SGMII */
+#define VSPEC1_SGMII_CTRL      0x08
+#define VSPEC1_SGMII_CTRL_ANEN BIT(12)         /* Aneg enable */
+#define VSPEC1_SGMII_CTRL_ANRS BIT(9)          /* Restart Aneg */
+#define VSPEC1_SGMII_ANEN_ANRS (VSPEC1_SGMII_CTRL_ANEN | \
+                                VSPEC1_SGMII_CTRL_ANRS)
+
+/* WoL */
+#define VPSPEC2_WOL_CTL                0x0E06
+#define VPSPEC2_WOL_AD01       0x0E08
+#define VPSPEC2_WOL_AD23       0x0E09
+#define VPSPEC2_WOL_AD45       0x0E0A
+#define WOL_EN                 BIT(0)
+
+static const struct {
+       int type;
+       int minor;
+} ver_need_sgmii_reaneg[] = {
+       {7, 0x6D},
+       {8, 0x6D},
+       {9, 0x73},
+};
+
+static int gpy_config_init(struct phy_device *phydev)
+{
+       int ret;
+
+       /* Mask all interrupts */
+       ret = phy_write(phydev, PHY_IMASK, 0);
+       if (ret)
+               return ret;
+
+       /* Clear all pending interrupts */
+       ret = phy_read(phydev, PHY_ISTAT);
+       return ret < 0 ? ret : 0;
+}
+
+static int gpy_probe(struct phy_device *phydev)
+{
+       int ret;
+
+       if (!phydev->is_c45) {
+               ret = phy_get_c45_ids(phydev);
+               if (ret < 0)
+                       return ret;
+       }
+
+       /* Show GPY PHY FW version in dmesg */
+       ret = phy_read(phydev, PHY_FWV);
+       if (ret < 0)
+               return ret;
+
+       phydev_info(phydev, "Firmware Version: 0x%04X (%s)\n", ret,
+                   (ret & PHY_FWV_REL_MASK) ? "release" : "test");
+
+       return 0;
+}
+
+static bool gpy_sgmii_need_reaneg(struct phy_device *phydev)
+{
+       int fw_ver, fw_type, fw_minor;
+       size_t i;
+
+       fw_ver = phy_read(phydev, PHY_FWV);
+       if (fw_ver < 0)
+               return true;
+
+       fw_type = FIELD_GET(PHY_FWV_TYPE_MASK, fw_ver);
+       fw_minor = FIELD_GET(PHY_FWV_MINOR_MASK, fw_ver);
+
+       for (i = 0; i < ARRAY_SIZE(ver_need_sgmii_reaneg); i++) {
+               if (fw_type != ver_need_sgmii_reaneg[i].type)
+                       continue;
+               if (fw_minor < ver_need_sgmii_reaneg[i].minor)
+                       return true;
+               break;
+       }
+
+       return false;
+}
+
+static bool gpy_2500basex_chk(struct phy_device *phydev)
+{
+       int ret;
+
+       ret = phy_read(phydev, PHY_MIISTAT);
+       if (ret < 0) {
+               phydev_err(phydev, "Error: MDIO register access failed: %d\n",
+                          ret);
+               return false;
+       }
+
+       if (!(ret & PHY_MIISTAT_LS) ||
+           FIELD_GET(PHY_MIISTAT_SPD_MASK, ret) != PHY_MIISTAT_SPD_2500)
+               return false;
+
+       phydev->speed = SPEED_2500;
+       phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
+       phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL,
+                      VSPEC1_SGMII_CTRL_ANEN, 0);
+       return true;
+}
+
+static bool gpy_sgmii_aneg_en(struct phy_device *phydev)
+{
+       int ret;
+
+       ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL);
+       if (ret < 0) {
+               phydev_err(phydev, "Error: MMD register access failed: %d\n",
+                          ret);
+               return true;
+       }
+
+       return (ret & VSPEC1_SGMII_CTRL_ANEN) ? true : false;
+}
+
+static int gpy_config_aneg(struct phy_device *phydev)
+{
+       bool changed = false;
+       u32 adv;
+       int ret;
+
+       if (phydev->autoneg == AUTONEG_DISABLE) {
+               /* Configure half duplex with genphy_setup_forced,
+                * because genphy_c45_pma_setup_forced does not support.
+                */
+               return phydev->duplex != DUPLEX_FULL
+                       ? genphy_setup_forced(phydev)
+                       : genphy_c45_pma_setup_forced(phydev);
+       }
+
+       ret = genphy_c45_an_config_aneg(phydev);
+       if (ret < 0)
+               return ret;
+       if (ret > 0)
+               changed = true;
+
+       adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
+       ret = phy_modify_changed(phydev, MII_CTRL1000,
+                                ADVERTISE_1000FULL | ADVERTISE_1000HALF,
+                                adv);
+       if (ret < 0)
+               return ret;
+       if (ret > 0)
+               changed = true;
+
+       ret = genphy_c45_check_and_restart_aneg(phydev, changed);
+       if (ret < 0)
+               return ret;
+
+       if (phydev->interface == PHY_INTERFACE_MODE_USXGMII ||
+           phydev->interface == PHY_INTERFACE_MODE_INTERNAL)
+               return 0;
+
+       /* No need to trigger re-ANEG if link speed is 2.5G or SGMII ANEG is
+        * disabled.
+        */
+       if (!gpy_sgmii_need_reaneg(phydev) || gpy_2500basex_chk(phydev) ||
+           !gpy_sgmii_aneg_en(phydev))
+               return 0;
+
+       /* There is a design constraint in GPY2xx device where SGMII AN is
+        * only triggered when there is change of speed. If, PHY link
+        * partner`s speed is still same even after PHY TPI is down and up
+        * again, SGMII AN is not triggered and hence no new in-band message
+        * from GPY to MAC side SGMII.
+        * This could cause an issue during power up, when PHY is up prior to
+        * MAC. At this condition, once MAC side SGMII is up, MAC side SGMII
+        * wouldn`t receive new in-band message from GPY with correct link
+        * status, speed and duplex info.
+        *
+        * 1) If PHY is already up and TPI link status is still down (such as
+        *    hard reboot), TPI link status is polled for 4 seconds before
+        *    retriggerring SGMII AN.
+        * 2) If PHY is already up and TPI link status is also up (such as soft
+        *    reboot), polling of TPI link status is not needed and SGMII AN is
+        *    immediately retriggered.
+        * 3) Other conditions such as PHY is down, speed change etc, skip
+        *    retriggering SGMII AN. Note: in case of speed change, GPY FW will
+        *    initiate SGMII AN.
+        */
+
+       if (phydev->state != PHY_UP)
+               return 0;
+
+       ret = phy_read_poll_timeout(phydev, MII_BMSR, ret, ret & BMSR_LSTATUS,
+                                   20000, 4000000, false);
+       if (ret == -ETIMEDOUT)
+               return 0;
+       else if (ret < 0)
+               return ret;
+
+       /* Trigger SGMII AN. */
+       return phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL,
+                             VSPEC1_SGMII_CTRL_ANRS, VSPEC1_SGMII_CTRL_ANRS);
+}
+
+static void gpy_update_interface(struct phy_device *phydev)
+{
+       int ret;
+
+       /* Interface mode is fixed for USXGMII and integrated PHY */
+       if (phydev->interface == PHY_INTERFACE_MODE_USXGMII ||
+           phydev->interface == PHY_INTERFACE_MODE_INTERNAL)
+               return;
+
+       /* Automatically switch SERDES interface between SGMII and 2500-BaseX
+        * according to speed. Disable ANEG in 2500-BaseX mode.
+        */
+       switch (phydev->speed) {
+       case SPEED_2500:
+               phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
+               ret = phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL,
+                                    VSPEC1_SGMII_CTRL_ANEN, 0);
+               if (ret < 0)
+                       phydev_err(phydev,
+                                  "Error: Disable of SGMII ANEG failed: %d\n",
+                                  ret);
+               break;
+       case SPEED_1000:
+       case SPEED_100:
+       case SPEED_10:
+               phydev->interface = PHY_INTERFACE_MODE_SGMII;
+               if (gpy_sgmii_aneg_en(phydev))
+                       break;
+               /* Enable and restart SGMII ANEG for 10/100/1000Mbps link speed
+                * if ANEG is disabled (in 2500-BaseX mode).
+                */
+               ret = phy_modify_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_SGMII_CTRL,
+                                    VSPEC1_SGMII_ANEN_ANRS,
+                                    VSPEC1_SGMII_ANEN_ANRS);
+               if (ret < 0)
+                       phydev_err(phydev,
+                                  "Error: Enable of SGMII ANEG failed: %d\n",
+                                  ret);
+               break;
+       }
+}
+
+static int gpy_read_status(struct phy_device *phydev)
+{
+       int ret;
+
+       ret = genphy_update_link(phydev);
+       if (ret)
+               return ret;
+
+       phydev->speed = SPEED_UNKNOWN;
+       phydev->duplex = DUPLEX_UNKNOWN;
+       phydev->pause = 0;
+       phydev->asym_pause = 0;
+
+       if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
+               ret = genphy_c45_read_lpa(phydev);
+               if (ret < 0)
+                       return ret;
+
+               /* Read the link partner's 1G advertisement */
+               ret = phy_read(phydev, MII_STAT1000);
+               if (ret < 0)
+                       return ret;
+               mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, ret);
+       } else if (phydev->autoneg == AUTONEG_DISABLE) {
+               linkmode_zero(phydev->lp_advertising);
+       }
+
+       ret = phy_read(phydev, PHY_MIISTAT);
+       if (ret < 0)
+               return ret;
+
+       phydev->link = (ret & PHY_MIISTAT_LS) ? 1 : 0;
+       phydev->duplex = (ret & PHY_MIISTAT_DPX) ? DUPLEX_FULL : DUPLEX_HALF;
+       switch (FIELD_GET(PHY_MIISTAT_SPD_MASK, ret)) {
+       case PHY_MIISTAT_SPD_10:
+               phydev->speed = SPEED_10;
+               break;
+       case PHY_MIISTAT_SPD_100:
+               phydev->speed = SPEED_100;
+               break;
+       case PHY_MIISTAT_SPD_1000:
+               phydev->speed = SPEED_1000;
+               break;
+       case PHY_MIISTAT_SPD_2500:
+               phydev->speed = SPEED_2500;
+               break;
+       }
+
+       if (phydev->link)
+               gpy_update_interface(phydev);
+
+       return 0;
+}
+
+static int gpy_config_intr(struct phy_device *phydev)
+{
+       u16 mask = 0;
+
+       if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+               mask = PHY_IMASK_MASK;
+
+       return phy_write(phydev, PHY_IMASK, mask);
+}
+
+static irqreturn_t gpy_handle_interrupt(struct phy_device *phydev)
+{
+       int reg;
+
+       reg = phy_read(phydev, PHY_ISTAT);
+       if (reg < 0) {
+               phy_error(phydev);
+               return IRQ_NONE;
+       }
+
+       if (!(reg & PHY_IMASK_MASK))
+               return IRQ_NONE;
+
+       phy_trigger_machine(phydev);
+
+       return IRQ_HANDLED;
+}
+
+static int gpy_set_wol(struct phy_device *phydev,
+                      struct ethtool_wolinfo *wol)
+{
+       struct net_device *attach_dev = phydev->attached_dev;
+       int ret;
+
+       if (wol->wolopts & WAKE_MAGIC) {
+               /* MAC address - Byte0:Byte1:Byte2:Byte3:Byte4:Byte5
+                * VPSPEC2_WOL_AD45 = Byte0:Byte1
+                * VPSPEC2_WOL_AD23 = Byte2:Byte3
+                * VPSPEC2_WOL_AD01 = Byte4:Byte5
+                */
+               ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+                                      VPSPEC2_WOL_AD45,
+                                      ((attach_dev->dev_addr[0] << 8) |
+                                      attach_dev->dev_addr[1]));
+               if (ret < 0)
+                       return ret;
+
+               ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+                                      VPSPEC2_WOL_AD23,
+                                      ((attach_dev->dev_addr[2] << 8) |
+                                      attach_dev->dev_addr[3]));
+               if (ret < 0)
+                       return ret;
+
+               ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+                                      VPSPEC2_WOL_AD01,
+                                      ((attach_dev->dev_addr[4] << 8) |
+                                      attach_dev->dev_addr[5]));
+               if (ret < 0)
+                       return ret;
+
+               /* Enable the WOL interrupt */
+               ret = phy_write(phydev, PHY_IMASK, PHY_IMASK_WOL);
+               if (ret < 0)
+                       return ret;
+
+               /* Enable magic packet matching */
+               ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2,
+                                      VPSPEC2_WOL_CTL,
+                                      WOL_EN);
+               if (ret < 0)
+                       return ret;
+
+               /* Clear the interrupt status register.
+                * Only WoL is enabled so clear all.
+                */
+               ret = phy_read(phydev, PHY_ISTAT);
+               if (ret < 0)
+                       return ret;
+       } else {
+               /* Disable magic packet matching */
+               ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2,
+                                        VPSPEC2_WOL_CTL,
+                                        WOL_EN);
+               if (ret < 0)
+                       return ret;
+       }
+
+       if (wol->wolopts & WAKE_PHY) {
+               /* Enable the link state change interrupt */
+               ret = phy_set_bits(phydev, PHY_IMASK, PHY_IMASK_LSTC);
+               if (ret < 0)
+                       return ret;
+
+               /* Clear the interrupt status register */
+               ret = phy_read(phydev, PHY_ISTAT);
+               if (ret < 0)
+                       return ret;
+
+               if (ret & (PHY_IMASK_MASK & ~PHY_IMASK_LSTC))
+                       phy_trigger_machine(phydev);
+
+               return 0;
+       }
+
+       /* Disable the link state change interrupt */
+       return phy_clear_bits(phydev, PHY_IMASK, PHY_IMASK_LSTC);
+}
+
+static void gpy_get_wol(struct phy_device *phydev,
+                       struct ethtool_wolinfo *wol)
+{
+       int ret;
+
+       wol->supported = WAKE_MAGIC | WAKE_PHY;
+       wol->wolopts = 0;
+
+       ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, VPSPEC2_WOL_CTL);
+       if (ret & WOL_EN)
+               wol->wolopts |= WAKE_MAGIC;
+
+       ret = phy_read(phydev, PHY_IMASK);
+       if (ret & PHY_IMASK_LSTC)
+               wol->wolopts |= WAKE_PHY;
+}
+
+static int gpy_loopback(struct phy_device *phydev, bool enable)
+{
+       int ret;
+
+       ret = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK,
+                        enable ? BMCR_LOOPBACK : 0);
+       if (!ret) {
+               /* It takes some time for PHY device to switch
+                * into/out-of loopback mode.
+                */
+               msleep(100);
+       }
+
+       return ret;
+}
+
+static struct phy_driver gpy_drivers[] = {
+       {
+               PHY_ID_MATCH_MODEL(PHY_ID_GPY2xx),
+               .name           = "Maxlinear Ethernet GPY2xx",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+       {
+               .phy_id         = PHY_ID_GPY115B,
+               .phy_id_mask    = PHY_ID_GPYx15B_MASK,
+               .name           = "Maxlinear Ethernet GPY115B",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+       {
+               PHY_ID_MATCH_MODEL(PHY_ID_GPY115C),
+               .name           = "Maxlinear Ethernet GPY115C",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+       {
+               .phy_id         = PHY_ID_GPY211B,
+               .phy_id_mask    = PHY_ID_GPY21xB_MASK,
+               .name           = "Maxlinear Ethernet GPY211B",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+       {
+               PHY_ID_MATCH_MODEL(PHY_ID_GPY211C),
+               .name           = "Maxlinear Ethernet GPY211C",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+       {
+               .phy_id         = PHY_ID_GPY212B,
+               .phy_id_mask    = PHY_ID_GPY21xB_MASK,
+               .name           = "Maxlinear Ethernet GPY212B",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+       {
+               PHY_ID_MATCH_MODEL(PHY_ID_GPY212C),
+               .name           = "Maxlinear Ethernet GPY212C",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+       {
+               .phy_id         = PHY_ID_GPY215B,
+               .phy_id_mask    = PHY_ID_GPYx15B_MASK,
+               .name           = "Maxlinear Ethernet GPY215B",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+       {
+               PHY_ID_MATCH_MODEL(PHY_ID_GPY215C),
+               .name           = "Maxlinear Ethernet GPY215C",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+       {
+               PHY_ID_MATCH_MODEL(PHY_ID_GPY241B),
+               .name           = "Maxlinear Ethernet GPY241B",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+       {
+               PHY_ID_MATCH_MODEL(PHY_ID_GPY241BM),
+               .name           = "Maxlinear Ethernet GPY241BM",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+       {
+               PHY_ID_MATCH_MODEL(PHY_ID_GPY245B),
+               .name           = "Maxlinear Ethernet GPY245B",
+               .get_features   = genphy_c45_pma_read_abilities,
+               .config_init    = gpy_config_init,
+               .probe          = gpy_probe,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+               .config_aneg    = gpy_config_aneg,
+               .aneg_done      = genphy_c45_aneg_done,
+               .read_status    = gpy_read_status,
+               .config_intr    = gpy_config_intr,
+               .handle_interrupt = gpy_handle_interrupt,
+               .set_wol        = gpy_set_wol,
+               .get_wol        = gpy_get_wol,
+               .set_loopback   = gpy_loopback,
+       },
+};
+module_phy_driver(gpy_drivers);
+
+static struct mdio_device_id __maybe_unused gpy_tbl[] = {
+       {PHY_ID_MATCH_MODEL(PHY_ID_GPY2xx)},
+       {PHY_ID_GPY115B, PHY_ID_GPYx15B_MASK},
+       {PHY_ID_MATCH_MODEL(PHY_ID_GPY115C)},
+       {PHY_ID_GPY211B, PHY_ID_GPY21xB_MASK},
+       {PHY_ID_MATCH_MODEL(PHY_ID_GPY211C)},
+       {PHY_ID_GPY212B, PHY_ID_GPY21xB_MASK},
+       {PHY_ID_MATCH_MODEL(PHY_ID_GPY212C)},
+       {PHY_ID_GPY215B, PHY_ID_GPYx15B_MASK},
+       {PHY_ID_MATCH_MODEL(PHY_ID_GPY215C)},
+       {PHY_ID_MATCH_MODEL(PHY_ID_GPY241B)},
+       {PHY_ID_MATCH_MODEL(PHY_ID_GPY241BM)},
+       {PHY_ID_MATCH_MODEL(PHY_ID_GPY245B)},
+       { }
+};
+MODULE_DEVICE_TABLE(mdio, gpy_tbl);
+
+MODULE_DESCRIPTION("Maxlinear Ethernet GPY Driver");
+MODULE_AUTHOR("Xu Liang");
+MODULE_LICENSE("GPL");
index afd7afa..9944cc5 100644 (file)
 #define MII_INTSRC_LINK_FAIL           BIT(10)
 #define MII_INTSRC_LINK_UP             BIT(9)
 #define MII_INTSRC_MASK                        (MII_INTSRC_LINK_FAIL | MII_INTSRC_LINK_UP)
-#define MII_INTSRC_TEMP_ERR            BIT(1)
 #define MII_INTSRC_UV_ERR              BIT(3)
+#define MII_INTSRC_TEMP_ERR            BIT(1)
 
 #define MII_INTEN                      22
 #define MII_INTEN_LINK_FAIL            BIT(10)
 #define MII_INTEN_LINK_UP              BIT(9)
+#define MII_INTEN_UV_ERR               BIT(3)
+#define MII_INTEN_TEMP_ERR             BIT(1)
 
 #define MII_COMMSTAT                   23
 #define MII_COMMSTAT_LINK_UP           BIT(15)
@@ -607,7 +609,8 @@ static int tja11xx_config_intr(struct phy_device *phydev)
                if (err)
                        return err;
 
-               value = MII_INTEN_LINK_FAIL | MII_INTEN_LINK_UP;
+               value = MII_INTEN_LINK_FAIL | MII_INTEN_LINK_UP |
+                       MII_INTEN_UV_ERR | MII_INTEN_TEMP_ERR;
                err = phy_write(phydev, MII_INTEN, value);
        } else {
                err = phy_write(phydev, MII_INTEN, value);
@@ -622,6 +625,7 @@ static int tja11xx_config_intr(struct phy_device *phydev)
 
 static irqreturn_t tja11xx_handle_interrupt(struct phy_device *phydev)
 {
+       struct device *dev = &phydev->mdio.dev;
        int irq_status;
 
        irq_status = phy_read(phydev, MII_INTSRC);
@@ -630,6 +634,11 @@ static irqreturn_t tja11xx_handle_interrupt(struct phy_device *phydev)
                return IRQ_NONE;
        }
 
+       if (irq_status & MII_INTSRC_TEMP_ERR)
+               dev_warn(dev, "Overtemperature error detected (temp > 155C°).\n");
+       if (irq_status & MII_INTSRC_UV_ERR)
+               dev_warn(dev, "Undervoltage error detected.\n");
+
        if (!(irq_status & MII_INTSRC_MASK))
                return IRQ_NONE;
 
index 8eeb26d..f124a8a 100644 (file)
@@ -426,7 +426,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
 EXPORT_SYMBOL(phy_mii_ioctl);
 
 /**
- * phy_do_ioctl - generic ndo_do_ioctl implementation
+ * phy_do_ioctl - generic ndo_eth_ioctl implementation
  * @dev: the net_device struct
  * @ifr: &struct ifreq for socket ioctl's
  * @cmd: ioctl cmd to execute
@@ -441,7 +441,7 @@ int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 EXPORT_SYMBOL(phy_do_ioctl);
 
 /**
- * phy_do_ioctl_running - generic ndo_do_ioctl implementation but test first
+ * phy_do_ioctl_running - generic ndo_eth_ioctl implementation but test first
  *
  * @dev: the net_device struct
  * @ifr: &struct ifreq for socket ioctl's
index 5d5f9a9..9e2891d 100644 (file)
@@ -233,11 +233,9 @@ static DEFINE_MUTEX(phy_fixup_lock);
 
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
-       struct device_driver *drv = phydev->mdio.dev.driver;
-       struct phy_driver *phydrv = to_phy_driver(drv);
        struct net_device *netdev = phydev->attached_dev;
 
-       if (!drv || !phydrv->suspend)
+       if (!phydev->drv->suspend)
                return false;
 
        /* PHY not attached? May suspend if the PHY has not already been
@@ -968,6 +966,20 @@ void phy_device_remove(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_device_remove);
 
+/**
+ * phy_get_c45_ids - Read 802.3-c45 IDs for phy device.
+ * @phydev: phy_device structure to read 802.3-c45 IDs
+ *
+ * Returns zero on success, %-EIO on bus access error, or %-ENODEV if
+ * the "devices in package" is invalid.
+ */
+int phy_get_c45_ids(struct phy_device *phydev)
+{
+       return get_phy_c45_ids(phydev->mdio.bus, phydev->mdio.addr,
+                              &phydev->c45_ids);
+}
+EXPORT_SYMBOL(phy_get_c45_ids);
+
 /**
  * phy_find_first - finds the first PHY device on the bus
  * @bus: the target MII bus
@@ -1807,11 +1819,10 @@ EXPORT_SYMBOL(phy_resume);
 
 int phy_loopback(struct phy_device *phydev, bool enable)
 {
-       struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
        int ret = 0;
 
-       if (!phydrv)
-               return -ENODEV;
+       if (!phydev->drv)
+               return -EIO;
 
        mutex_lock(&phydev->lock);
 
@@ -1825,8 +1836,8 @@ int phy_loopback(struct phy_device *phydev, bool enable)
                goto out;
        }
 
-       if (phydrv->set_loopback)
-               ret = phydrv->set_loopback(phydev, enable);
+       if (phydev->drv->set_loopback)
+               ret = phydev->drv->set_loopback(phydev, enable);
        else
                ret = genphy_loopback(phydev, enable);
 
index eb29ef5..2cdf9f9 100644 (file)
@@ -942,10 +942,11 @@ static void phylink_phy_change(struct phy_device *phydev, bool up)
 
        phylink_run_resolve(pl);
 
-       phylink_dbg(pl, "phy link %s %s/%s/%s\n", up ? "up" : "down",
+       phylink_dbg(pl, "phy link %s %s/%s/%s/%s\n", up ? "up" : "down",
                    phy_modes(phydev->interface),
                    phy_speed_to_str(phydev->speed),
-                   phy_duplex_to_str(phydev->duplex));
+                   phy_duplex_to_str(phydev->duplex),
+                   phylink_pause_to_str(pl->phy_state.pause));
 }
 
 static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy,
@@ -1457,15 +1458,11 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
                return phy_ethtool_ksettings_set(pl->phydev, kset);
        }
 
-       linkmode_copy(support, pl->supported);
        config = pl->link_config;
-       config.an_enabled = kset->base.autoneg == AUTONEG_ENABLE;
 
-       /* Mask out unsupported advertisements, and force the autoneg bit */
+       /* Mask out unsupported advertisements */
        linkmode_and(config.advertising, kset->link_modes.advertising,
-                    support);
-       linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising,
-                        config.an_enabled);
+                    pl->supported);
 
        /* FIXME: should we reject autoneg if phy/mac does not support it? */
        switch (kset->base.autoneg) {
@@ -1474,7 +1471,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
                 * duplex.
                 */
                s = phy_lookup_setting(kset->base.speed, kset->base.duplex,
-                                      support, false);
+                                      pl->supported, false);
                if (!s)
                        return -EINVAL;
 
@@ -1515,6 +1512,12 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
        /* We have ruled out the case with a PHY attached, and the
         * fixed-link cases.  All that is left are in-band links.
         */
+       config.an_enabled = kset->base.autoneg == AUTONEG_ENABLE;
+       linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising,
+                        config.an_enabled);
+
+       /* Validate without changing the current supported mask. */
+       linkmode_copy(support, pl->supported);
        if (phylink_validate(pl, support, &config))
                return -EINVAL;
 
index 151c2a3..8dcb49e 100644 (file)
@@ -27,12 +27,28 @@ struct gmii2rgmii {
        struct mdio_device *mdio;
 };
 
-static int xgmiitorgmii_read_status(struct phy_device *phydev)
+static void xgmiitorgmii_configure(struct gmii2rgmii *priv, int speed)
 {
-       struct gmii2rgmii *priv = mdiodev_get_drvdata(&phydev->mdio);
        struct mii_bus *bus = priv->mdio->bus;
        int addr = priv->mdio->addr;
-       u16 val = 0;
+       u16 val;
+
+       val = mdiobus_read(bus, addr, XILINX_GMII2RGMII_REG);
+       val &= ~XILINX_GMII2RGMII_SPEED_MASK;
+
+       if (speed == SPEED_1000)
+               val |= BMCR_SPEED1000;
+       else if (speed == SPEED_100)
+               val |= BMCR_SPEED100;
+       else
+               val |= BMCR_SPEED10;
+
+       mdiobus_write(bus, addr, XILINX_GMII2RGMII_REG, val);
+}
+
+static int xgmiitorgmii_read_status(struct phy_device *phydev)
+{
+       struct gmii2rgmii *priv = mdiodev_get_drvdata(&phydev->mdio);
        int err;
 
        if (priv->phy_drv->read_status)
@@ -42,17 +58,24 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev)
        if (err < 0)
                return err;
 
-       val = mdiobus_read(bus, addr, XILINX_GMII2RGMII_REG);
-       val &= ~XILINX_GMII2RGMII_SPEED_MASK;
+       xgmiitorgmii_configure(priv, phydev->speed);
 
-       if (phydev->speed == SPEED_1000)
-               val |= BMCR_SPEED1000;
-       else if (phydev->speed == SPEED_100)
-               val |= BMCR_SPEED100;
+       return 0;
+}
+
+static int xgmiitorgmii_set_loopback(struct phy_device *phydev, bool enable)
+{
+       struct gmii2rgmii *priv = mdiodev_get_drvdata(&phydev->mdio);
+       int err;
+
+       if (priv->phy_drv->set_loopback)
+               err = priv->phy_drv->set_loopback(phydev, enable);
        else
-               val |= BMCR_SPEED10;
+               err = genphy_loopback(phydev, enable);
+       if (err < 0)
+               return err;
 
-       mdiobus_write(bus, addr, XILINX_GMII2RGMII_REG, val);
+       xgmiitorgmii_configure(priv, phydev->speed);
 
        return 0;
 }
@@ -90,6 +113,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev)
        memcpy(&priv->conv_phy_drv, priv->phy_dev->drv,
               sizeof(struct phy_driver));
        priv->conv_phy_drv.read_status = xgmiitorgmii_read_status;
+       priv->conv_phy_drv.set_loopback = xgmiitorgmii_set_loopback;
        mdiodev_set_drvdata(&priv->phy_dev->mdio, priv);
        priv->phy_dev->drv = &priv->conv_phy_drv;
 
index e26cf91..82d6094 100644 (file)
@@ -84,6 +84,7 @@ static const char version[] = "NET3 PLIP version 2.4-parport gniibe@mri.co.jp\n"
     extra grounds are 18,19,20,21,22,23,24
 */
 
+#include <linux/compat.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
@@ -150,7 +151,8 @@ static int plip_hard_header_cache(const struct neighbour *neigh,
                                   struct hh_cache *hh, __be16 type);
 static int plip_open(struct net_device *dev);
 static int plip_close(struct net_device *dev);
-static int plip_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+static int plip_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                              void __user *data, int cmd);
 static int plip_preempt(void *handle);
 static void plip_wakeup(void *handle);
 
@@ -265,7 +267,7 @@ static const struct net_device_ops plip_netdev_ops = {
        .ndo_open                = plip_open,
        .ndo_stop                = plip_close,
        .ndo_start_xmit          = plip_tx_packet,
-       .ndo_do_ioctl            = plip_ioctl,
+       .ndo_siocdevprivate      = plip_siocdevprivate,
        .ndo_set_mac_address     = eth_mac_addr,
        .ndo_validate_addr       = eth_validate_addr,
 };
@@ -1207,7 +1209,8 @@ plip_wakeup(void *handle)
 }
 
 static int
-plip_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+plip_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                   void __user *data, int cmd)
 {
        struct net_local *nl = netdev_priv(dev);
        struct plipconf *pc = (struct plipconf *) &rq->ifr_ifru;
@@ -1215,6 +1218,9 @@ plip_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
        if (cmd != SIOCDEVPLIP)
                return -EOPNOTSUPP;
 
+       if (in_compat_syscall())
+               return -EOPNOTSUPP;
+
        switch(pc->pcmd) {
        case PLIP_GET_TIMEOUT:
                pc->trigger = nl->trigger;
index 7a099c3..fb52cd1 100644 (file)
@@ -1463,11 +1463,11 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static int
-ppp_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ppp_net_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                      void __user *addr, int cmd)
 {
        struct ppp *ppp = netdev_priv(dev);
        int err = -EFAULT;
-       void __user *addr = (void __user *) ifr->ifr_ifru.ifru_data;
        struct ppp_stats stats;
        struct ppp_comp_stats cstats;
        char *vers;
@@ -1596,7 +1596,7 @@ static const struct net_device_ops ppp_netdev_ops = {
        .ndo_init        = ppp_dev_init,
        .ndo_uninit      = ppp_dev_uninit,
        .ndo_start_xmit  = ppp_start_xmit,
-       .ndo_do_ioctl    = ppp_net_ioctl,
+       .ndo_siocdevprivate = ppp_net_siocdevprivate,
        .ndo_get_stats64 = ppp_get_stats64,
        .ndo_fill_forward_path = ppp_fill_forward_path,
 };
@@ -1744,7 +1744,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
                   a four-byte PPP header on each packet */
                *(u8 *)skb_push(skb, 2) = 1;
                if (ppp->pass_filter &&
-                   BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
+                   bpf_prog_run(ppp->pass_filter, skb) == 0) {
                        if (ppp->debug & 1)
                                netdev_printk(KERN_DEBUG, ppp->dev,
                                              "PPP: outbound frame "
@@ -1754,7 +1754,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
                }
                /* if this packet passes the active filter, record the time */
                if (!(ppp->active_filter &&
-                     BPF_PROG_RUN(ppp->active_filter, skb) == 0))
+                     bpf_prog_run(ppp->active_filter, skb) == 0))
                        ppp->last_xmit = jiffies;
                skb_pull(skb, 2);
 #else
@@ -2468,7 +2468,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 
                        *(u8 *)skb_push(skb, 2) = 0;
                        if (ppp->pass_filter &&
-                           BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
+                           bpf_prog_run(ppp->pass_filter, skb) == 0) {
                                if (ppp->debug & 1)
                                        netdev_printk(KERN_DEBUG, ppp->dev,
                                                      "PPP: inbound frame "
@@ -2477,7 +2477,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
                                return;
                        }
                        if (!(ppp->active_filter &&
-                             BPF_PROG_RUN(ppp->active_filter, skb) == 0))
+                             bpf_prog_run(ppp->active_filter, skb) == 0))
                                ppp->last_recv = jiffies;
                        __skb_pull(skb, 2);
                } else
index e88af97..f01c9db 100644 (file)
@@ -78,7 +78,8 @@ struct sb1000_private {
 /* prototypes for Linux interface */
 extern int sb1000_probe(struct net_device *dev);
 static int sb1000_open(struct net_device *dev);
-static int sb1000_dev_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd);
+static int sb1000_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                                void __user *data, int cmd);
 static netdev_tx_t sb1000_start_xmit(struct sk_buff *skb,
                                     struct net_device *dev);
 static irqreturn_t sb1000_interrupt(int irq, void *dev_id);
@@ -135,7 +136,7 @@ MODULE_DEVICE_TABLE(pnp, sb1000_pnp_ids);
 static const struct net_device_ops sb1000_netdev_ops = {
        .ndo_open               = sb1000_open,
        .ndo_start_xmit         = sb1000_start_xmit,
-       .ndo_do_ioctl           = sb1000_dev_ioctl,
+       .ndo_siocdevprivate     = sb1000_siocdevprivate,
        .ndo_stop               = sb1000_close,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
@@ -987,7 +988,8 @@ sb1000_open(struct net_device *dev)
        return 0;                                       /* Always succeed */
 }
 
-static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int sb1000_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                                void __user *data, int cmd)
 {
        char* name;
        unsigned char version[2];
@@ -1011,7 +1013,7 @@ static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                stats[2] = dev->stats.rx_packets;
                stats[3] = dev->stats.rx_errors;
                stats[4] = dev->stats.rx_dropped;
-               if(copy_to_user(ifr->ifr_data, stats, sizeof(stats)))
+               if (copy_to_user(data, stats, sizeof(stats)))
                        return -EFAULT;
                status = 0;
                break;
@@ -1019,21 +1021,21 @@ static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        case SIOCGCMFIRMWARE:           /* get firmware version */
                if ((status = sb1000_get_firmware_version(ioaddr, name, version, 1)))
                        return status;
-               if(copy_to_user(ifr->ifr_data, version, sizeof(version)))
+               if (copy_to_user(data, version, sizeof(version)))
                        return -EFAULT;
                break;
 
        case SIOCGCMFREQUENCY:          /* get frequency */
                if ((status = sb1000_get_frequency(ioaddr, name, &frequency)))
                        return status;
-               if(put_user(frequency, (int __user *) ifr->ifr_data))
+               if (put_user(frequency, (int __user *)data))
                        return -EFAULT;
                break;
 
        case SIOCSCMFREQUENCY:          /* set frequency */
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
-               if(get_user(frequency, (int __user *) ifr->ifr_data))
+               if (get_user(frequency, (int __user *)data))
                        return -EFAULT;
                if ((status = sb1000_set_frequency(ioaddr, name, frequency)))
                        return status;
@@ -1042,14 +1044,14 @@ static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        case SIOCGCMPIDS:                       /* get PIDs */
                if ((status = sb1000_get_PIDs(ioaddr, name, PID)))
                        return status;
-               if(copy_to_user(ifr->ifr_data, PID, sizeof(PID)))
+               if (copy_to_user(data, PID, sizeof(PID)))
                        return -EFAULT;
                break;
 
        case SIOCSCMPIDS:                       /* set PIDs */
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
-               if(copy_from_user(PID, ifr->ifr_data, sizeof(PID)))
+               if (copy_from_user(PID, data, sizeof(PID)))
                        return -EFAULT;
                if ((status = sb1000_set_PIDs(ioaddr, name, PID)))
                        return status;
index dc84cb8..5435b56 100644 (file)
@@ -62,6 +62,7 @@
  */
 
 #define SL_CHECK_TRANSMIT
+#include <linux/compat.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 
@@ -108,7 +109,7 @@ static void slip_unesc6(struct slip *sl, unsigned char c);
 #ifdef CONFIG_SLIP_SMART
 static void sl_keepalive(struct timer_list *t);
 static void sl_outfill(struct timer_list *t);
-static int sl_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int sl_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd);
 #endif
 
 /********************************
@@ -647,7 +648,7 @@ static const struct net_device_ops sl_netdev_ops = {
        .ndo_change_mtu         = sl_change_mtu,
        .ndo_tx_timeout         = sl_tx_timeout,
 #ifdef CONFIG_SLIP_SMART
-       .ndo_do_ioctl           = sl_ioctl,
+       .ndo_siocdevprivate     = sl_siocdevprivate,
 #endif
 };
 
@@ -1179,11 +1180,12 @@ static int slip_ioctl(struct tty_struct *tty, struct file *file,
 
 /* VSV changes start here */
 #ifdef CONFIG_SLIP_SMART
-/* function do_ioctl called from net/core/dev.c
+/* function sl_siocdevprivate called from net/core/dev.c
    to allow get/set outfill/keepalive parameter
    by ifconfig                                 */
 
-static int sl_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int sl_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                            void __user *data, int cmd)
 {
        struct slip *sl = netdev_priv(dev);
        unsigned long *p = (unsigned long *)&rq->ifr_ifru;
@@ -1191,6 +1193,9 @@ static int sl_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
        if (sl == NULL)         /* Allocation failed ?? */
                return -ENODEV;
 
+       if (in_compat_syscall())
+               return -EOPNOTSUPP;
+
        spin_lock_bh(&sl->lock);
 
        if (!sl->tty) {
index 32aef8a..b095a4b 100644 (file)
@@ -197,7 +197,7 @@ static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv,
        fp = rcu_dereference_bh(lb_priv->fp);
        if (unlikely(!fp))
                return 0;
-       lhash = BPF_PROG_RUN(fp, skb);
+       lhash = bpf_prog_run(fp, skb);
        c = (char *) &lhash;
        return c[0] ^ c[1] ^ c[2] ^ c[3];
 }
index 2ced021..fecc9a1 100644 (file)
@@ -3510,7 +3510,9 @@ static void tun_set_msglevel(struct net_device *dev, u32 value)
 }
 
 static int tun_get_coalesce(struct net_device *dev,
-                           struct ethtool_coalesce *ec)
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct tun_struct *tun = netdev_priv(dev);
 
@@ -3520,7 +3522,9 @@ static int tun_get_coalesce(struct net_device *dev,
 }
 
 static int tun_set_coalesce(struct net_device *dev,
-                           struct ethtool_coalesce *ec)
+                           struct ethtool_coalesce *ec,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct tun_struct *tun = netdev_priv(dev);
 
index dc87e8c..30821f6 100644 (file)
@@ -197,7 +197,7 @@ static const struct net_device_ops ax88172_netdev_ops = {
        .ndo_get_stats64        = dev_get_tstats64,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = asix_ioctl,
+       .ndo_eth_ioctl          = asix_ioctl,
        .ndo_set_rx_mode        = ax88172_set_multicast,
 };
 
@@ -587,7 +587,7 @@ static const struct net_device_ops ax88772_netdev_ops = {
        .ndo_get_stats64        = dev_get_tstats64,
        .ndo_set_mac_address    = asix_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_set_rx_mode        = asix_set_multicast,
 };
 
@@ -706,7 +706,6 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
        u8 buf[ETH_ALEN] = {0}, chipcode = 0;
        struct asix_common_private *priv;
        int ret, i;
-       u32 phyid;
 
        priv = devm_kzalloc(&dev->udev->dev, sizeof(*priv), GFP_KERNEL);
        if (!priv)
@@ -767,10 +766,6 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
                return ret;
        }
 
-       /* Read PHYID register *AFTER* the PHY was reset properly */
-       phyid = asix_get_phyid(dev);
-       netdev_dbg(dev->net, "PHYID=0x%08x\n", phyid);
-
        /* Asix framing packs multiple eth frames into a 2K usb bulk transfer */
        if (dev->driver_info->flags & FLAG_FRAMING_AX) {
                /* hard_mtu  is still the default - the device does not support
@@ -1105,7 +1100,7 @@ static const struct net_device_ops ax88178_netdev_ops = {
        .ndo_set_mac_address    = asix_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = asix_set_multicast,
-       .ndo_do_ioctl           = asix_ioctl,
+       .ndo_eth_ioctl          = asix_ioctl,
        .ndo_change_mtu         = ax88178_change_mtu,
 };
 
@@ -1220,6 +1215,7 @@ static const struct driver_info ax88772b_info = {
        .unbind = ax88772_unbind,
        .status = asix_status,
        .reset = ax88772_reset,
+       .stop = ax88772_stop,
        .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR |
                 FLAG_MULTI_PACKET,
        .rx_fixup = asix_rx_fixup_common,
index 530947d..d9777d9 100644 (file)
@@ -109,7 +109,7 @@ static const struct net_device_ops ax88172a_netdev_ops = {
        .ndo_get_stats64        = dev_get_tstats64,
        .ndo_set_mac_address    = asix_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_set_rx_mode        = asix_set_multicast,
 };
 
index c131671..f25448a 100644 (file)
@@ -1035,7 +1035,7 @@ static const struct net_device_ops ax88179_netdev_ops = {
        .ndo_change_mtu         = ax88179_change_mtu,
        .ndo_set_mac_address    = ax88179_set_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = ax88179_ioctl,
+       .ndo_eth_ioctl          = ax88179_ioctl,
        .ndo_set_rx_mode        = ax88179_set_multicast,
        .ndo_set_features       = ax88179_set_features,
 };
index 8d1f69d..e1da910 100644 (file)
@@ -253,7 +253,8 @@ static int usbpn_close(struct net_device *dev)
        return usb_set_interface(pnd->usb, num, !pnd->active_setting);
 }
 
-static int usbpn_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int usbpn_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                               void __user *data, int cmd)
 {
        struct if_phonet_req *req = (struct if_phonet_req *)ifr;
 
@@ -269,7 +270,7 @@ static const struct net_device_ops usbpn_ops = {
        .ndo_open       = usbpn_open,
        .ndo_stop       = usbpn_close,
        .ndo_start_xmit = usbpn_xmit,
-       .ndo_do_ioctl   = usbpn_ioctl,
+       .ndo_siocdevprivate = usbpn_siocdevprivate,
 };
 
 static void usbpn_setup(struct net_device *dev)
index 89cc61d..907f98b 100644 (file)
@@ -345,7 +345,7 @@ static const struct net_device_ops dm9601_netdev_ops = {
        .ndo_change_mtu         = usbnet_change_mtu,
        .ndo_get_stats64        = dev_get_tstats64,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = dm9601_ioctl,
+       .ndo_eth_ioctl          = dm9601_ioctl,
        .ndo_set_rx_mode        = dm9601_set_multicast,
        .ndo_set_mac_address    = dm9601_set_mac_address,
 };
index dec96e8..24bc1e6 100644 (file)
@@ -1079,8 +1079,7 @@ static void hso_init_termios(struct ktermios *termios)
        tty_termios_encode_baud_rate(termios, 115200, 115200);
 }
 
-static void _hso_serial_set_termios(struct tty_struct *tty,
-                                   struct ktermios *old)
+static void _hso_serial_set_termios(struct tty_struct *tty)
 {
        struct hso_serial *serial = tty->driver_data;
 
@@ -1262,7 +1261,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp)
        if (serial->port.count == 1) {
                serial->rx_state = RX_IDLE;
                /* Force default termio settings */
-               _hso_serial_set_termios(tty, NULL);
+               _hso_serial_set_termios(tty);
                tasklet_setup(&serial->unthrottle_tasklet,
                              hso_unthrottle_tasklet);
                result = hso_start_serial_device(serial->parent, GFP_KERNEL);
@@ -1394,7 +1393,7 @@ static void hso_serial_set_termios(struct tty_struct *tty, struct ktermios *old)
        /* the actual setup */
        spin_lock_irqsave(&serial->serial_lock, flags);
        if (serial->port.count)
-               _hso_serial_set_termios(tty, old);
+               _hso_serial_set_termios(tty);
        else
                tty->termios = *old;
        spin_unlock_irqrestore(&serial->serial_lock, flags);
@@ -2353,7 +2352,7 @@ static int remove_net_device(struct hso_device *hso_dev)
 }
 
 /* Frees our network device */
-static void hso_free_net_device(struct hso_device *hso_dev, bool bailout)
+static void hso_free_net_device(struct hso_device *hso_dev)
 {
        int i;
        struct hso_net *hso_net = dev2net(hso_dev);
@@ -2376,7 +2375,7 @@ static void hso_free_net_device(struct hso_device *hso_dev, bool bailout)
        kfree(hso_net->mux_bulk_tx_buf);
        hso_net->mux_bulk_tx_buf = NULL;
 
-       if (hso_net->net && !bailout)
+       if (hso_net->net)
                free_netdev(hso_net->net);
 
        kfree(hso_dev);
@@ -3133,7 +3132,7 @@ static void hso_free_interface(struct usb_interface *interface)
                                rfkill_unregister(rfk);
                                rfkill_destroy(rfk);
                        }
-                       hso_free_net_device(network_table[i], false);
+                       hso_free_net_device(network_table[i]);
                }
        }
 }
index 207e59e..06e2181 100644 (file)
@@ -443,7 +443,7 @@ static int ipheth_probe(struct usb_interface *intf,
 
        netdev->netdev_ops = &ipheth_netdev_ops;
        netdev->watchdog_timeo = IPHETH_TX_TIMEOUT;
-       strcpy(netdev->name, "eth%d");
+       strscpy(netdev->name, "eth%d", sizeof(netdev->name));
 
        dev = netdev_priv(netdev);
        dev->udev = udev;
index 6d092d7..793f8fb 100644 (file)
 
 #define MAX_RX_FIFO_SIZE               (12 * 1024)
 #define MAX_TX_FIFO_SIZE               (12 * 1024)
+
+#define FLOW_THRESHOLD(n)              ((((n) + 511) / 512) & 0x7F)
+#define FLOW_CTRL_THRESHOLD(on, off)   ((FLOW_THRESHOLD(on)  << 0) | \
+                                        (FLOW_THRESHOLD(off) << 8))
+
+/* Flow control turned on when Rx FIFO level rises above this level (bytes) */
+#define FLOW_ON_SS                     9216
+#define FLOW_ON_HS                     8704
+
+/* Flow control turned off when Rx FIFO level falls below this level (bytes) */
+#define FLOW_OFF_SS                    4096
+#define FLOW_OFF_HS                    1024
+
 #define DEFAULT_BURST_CAP_SIZE         (MAX_TX_FIFO_SIZE)
 #define DEFAULT_BULK_IN_DELAY          (0x0800)
 #define MAX_SINGLE_PACKET_SIZE         (9000)
 /* statistic update interval (mSec) */
 #define STAT_UPDATE_TIMER              (1 * 1000)
 
+/* time to wait for MAC or FCT to stop (jiffies) */
+#define HW_DISABLE_TIMEOUT             (HZ / 10)
+
+/* time to wait between polling MAC or FCT state (ms) */
+#define HW_DISABLE_DELAY_MS            1
+
 /* defines interrupts from interrupt EP */
 #define MAX_INT_EP                     (32)
 #define INT_EP_INTEP                   (31)
@@ -341,6 +360,7 @@ struct usb_context {
 #define EVENT_DEV_ASLEEP               7
 #define EVENT_DEV_OPEN                 8
 #define EVENT_STAT_UPDATE              9
+#define EVENT_DEV_DISCONNECT           10
 
 struct statstage {
        struct mutex                    access_lock;    /* for stats access */
@@ -370,7 +390,6 @@ struct lan78xx_net {
        struct sk_buff_head     rxq;
        struct sk_buff_head     txq;
        struct sk_buff_head     done;
-       struct sk_buff_head     rxq_pause;
        struct sk_buff_head     txq_pend;
 
        struct tasklet_struct   bh;
@@ -381,8 +400,9 @@ struct lan78xx_net {
        struct urb              *urb_intr;
        struct usb_anchor       deferred;
 
+       struct mutex            dev_mutex; /* serialise open/stop wrt suspend/resume */
        struct mutex            phy_mutex; /* for phy access */
-       unsigned                pipe_in, pipe_out, pipe_intr;
+       unsigned int            pipe_in, pipe_out, pipe_intr;
 
        u32                     hard_mtu;       /* count any extra framing */
        size_t                  rx_urb_size;    /* size for rx urbs */
@@ -392,8 +412,7 @@ struct lan78xx_net {
        wait_queue_head_t       *wait;
        unsigned char           suspend_count;
 
-       unsigned                maxpacket;
-       struct timer_list       delay;
+       unsigned int            maxpacket;
        struct timer_list       stat_monitor;
 
        unsigned long           data[5];
@@ -426,9 +445,13 @@ MODULE_PARM_DESC(msg_level, "Override default message level");
 
 static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data)
 {
-       u32 *buf = kmalloc(sizeof(u32), GFP_KERNEL);
+       u32 *buf;
        int ret;
 
+       if (test_bit(EVENT_DEV_DISCONNECT, &dev->flags))
+               return -ENODEV;
+
+       buf = kmalloc(sizeof(u32), GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
 
@@ -439,7 +462,7 @@ static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data)
        if (likely(ret >= 0)) {
                le32_to_cpus(buf);
                *data = *buf;
-       } else {
+       } else if (net_ratelimit()) {
                netdev_warn(dev->net,
                            "Failed to read register index 0x%08x. ret = %d",
                            index, ret);
@@ -452,9 +475,13 @@ static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data)
 
 static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data)
 {
-       u32 *buf = kmalloc(sizeof(u32), GFP_KERNEL);
+       u32 *buf;
        int ret;
 
+       if (test_bit(EVENT_DEV_DISCONNECT, &dev->flags))
+               return -ENODEV;
+
+       buf = kmalloc(sizeof(u32), GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
 
@@ -465,7 +492,8 @@ static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data)
                              USB_VENDOR_REQUEST_WRITE_REGISTER,
                              USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                              0, index, buf, 4, USB_CTRL_SET_TIMEOUT);
-       if (unlikely(ret < 0)) {
+       if (unlikely(ret < 0) &&
+           net_ratelimit()) {
                netdev_warn(dev->net,
                            "Failed to write register index 0x%08x. ret = %d",
                            index, ret);
@@ -476,6 +504,26 @@ static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data)
        return ret;
 }
 
+static int lan78xx_update_reg(struct lan78xx_net *dev, u32 reg, u32 mask,
+                             u32 data)
+{
+       int ret;
+       u32 buf;
+
+       ret = lan78xx_read_reg(dev, reg, &buf);
+       if (ret < 0)
+               return ret;
+
+       buf &= ~mask;
+       buf |= (mask & data);
+
+       ret = lan78xx_write_reg(dev, reg, buf);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
 static int lan78xx_read_stats(struct lan78xx_net *dev,
                              struct lan78xx_statstage *data)
 {
@@ -501,7 +549,7 @@ static int lan78xx_read_stats(struct lan78xx_net *dev,
        if (likely(ret >= 0)) {
                src = (u32 *)stats;
                dst = (u32 *)data;
-               for (i = 0; i < sizeof(*stats)/sizeof(u32); i++) {
+               for (i = 0; i < sizeof(*stats) / sizeof(u32); i++) {
                        le32_to_cpus(&src[i]);
                        dst[i] = src[i];
                }
@@ -515,10 +563,11 @@ static int lan78xx_read_stats(struct lan78xx_net *dev,
        return ret;
 }
 
-#define check_counter_rollover(struct1, dev_stats, member) {   \
-       if (struct1->member < dev_stats.saved.member)           \
-               dev_stats.rollover_count.member++;              \
-       }
+#define check_counter_rollover(struct1, dev_stats, member)             \
+       do {                                                            \
+               if ((struct1)->member < (dev_stats).saved.member)       \
+                       (dev_stats).rollover_count.member++;            \
+       } while (0)
 
 static void lan78xx_check_stat_rollover(struct lan78xx_net *dev,
                                        struct lan78xx_statstage *stats)
@@ -844,9 +893,9 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
 
        for (i = 0; i < length; i++) {
                lan78xx_write_reg(dev, OTP_ADDR1,
-                                       ((offset + i) >> 8) & OTP_ADDR1_15_11);
+                                 ((offset + i) >> 8) & OTP_ADDR1_15_11);
                lan78xx_write_reg(dev, OTP_ADDR2,
-                                       ((offset + i) & OTP_ADDR2_10_3));
+                                 ((offset + i) & OTP_ADDR2_10_3));
 
                lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_);
                lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
@@ -900,9 +949,9 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset,
 
        for (i = 0; i < length; i++) {
                lan78xx_write_reg(dev, OTP_ADDR1,
-                                       ((offset + i) >> 8) & OTP_ADDR1_15_11);
+                                 ((offset + i) >> 8) & OTP_ADDR1_15_11);
                lan78xx_write_reg(dev, OTP_ADDR2,
-                                       ((offset + i) & OTP_ADDR2_10_3));
+                                 ((offset + i) & OTP_ADDR2_10_3));
                lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]);
                lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_);
                lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
@@ -959,7 +1008,7 @@ static int lan78xx_dataport_wait_not_busy(struct lan78xx_net *dev)
                usleep_range(40, 100);
        }
 
-       netdev_warn(dev->net, "lan78xx_dataport_wait_not_busy timed out");
+       netdev_warn(dev->net, "%s timed out", __func__);
 
        return -EIO;
 }
@@ -972,7 +1021,7 @@ static int lan78xx_dataport_write(struct lan78xx_net *dev, u32 ram_select,
        int i, ret;
 
        if (usb_autopm_get_interface(dev->intf) < 0)
-                       return 0;
+               return 0;
 
        mutex_lock(&pdata->dataport_mutex);
 
@@ -1045,9 +1094,9 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param)
        for (i = 1; i < NUM_OF_MAF; i++) {
                lan78xx_write_reg(dev, MAF_HI(i), 0);
                lan78xx_write_reg(dev, MAF_LO(i),
-                                       pdata->pfilter_table[i][1]);
+                                 pdata->pfilter_table[i][1]);
                lan78xx_write_reg(dev, MAF_HI(i),
-                                       pdata->pfilter_table[i][0]);
+                                 pdata->pfilter_table[i][0]);
        }
 
        lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
@@ -1066,11 +1115,12 @@ static void lan78xx_set_multicast(struct net_device *netdev)
                            RFE_CTL_DA_PERFECT_ | RFE_CTL_MCAST_HASH_);
 
        for (i = 0; i < DP_SEL_VHF_HASH_LEN; i++)
-                       pdata->mchash_table[i] = 0;
+               pdata->mchash_table[i] = 0;
+
        /* pfilter_table[0] has own HW address */
        for (i = 1; i < NUM_OF_MAF; i++) {
-                       pdata->pfilter_table[i][0] =
-                       pdata->pfilter_table[i][1] = 0;
+               pdata->pfilter_table[i][0] = 0;
+               pdata->pfilter_table[i][1] = 0;
        }
 
        pdata->rfe_ctl |= RFE_CTL_BCAST_EN_;
@@ -1134,9 +1184,9 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex,
                flow |= FLOW_CR_RX_FCEN_;
 
        if (dev->udev->speed == USB_SPEED_SUPER)
-               fct_flow = 0x817;
+               fct_flow = FLOW_CTRL_THRESHOLD(FLOW_ON_SS, FLOW_OFF_SS);
        else if (dev->udev->speed == USB_SPEED_HIGH)
-               fct_flow = 0x211;
+               fct_flow = FLOW_CTRL_THRESHOLD(FLOW_ON_HS, FLOW_OFF_HS);
 
        netif_dbg(dev, link, dev->net, "rx pause %s, tx pause %s",
                  (cap & FLOW_CTRL_RX ? "enabled" : "disabled"),
@@ -1150,6 +1200,52 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex,
        return 0;
 }
 
+static int lan78xx_mac_reset(struct lan78xx_net *dev)
+{
+       unsigned long start_time = jiffies;
+       u32 val;
+       int ret;
+
+       mutex_lock(&dev->phy_mutex);
+
+       /* Resetting the device while there is activity on the MDIO
+        * bus can result in the MAC interface locking up and not
+        * completing register access transactions.
+        */
+       ret = lan78xx_phy_wait_not_busy(dev);
+       if (ret < 0)
+               goto done;
+
+       ret = lan78xx_read_reg(dev, MAC_CR, &val);
+       if (ret < 0)
+               goto done;
+
+       val |= MAC_CR_RST_;
+       ret = lan78xx_write_reg(dev, MAC_CR, val);
+       if (ret < 0)
+               goto done;
+
+       /* Wait for the reset to complete before allowing any further
+        * MAC register accesses otherwise the MAC may lock up.
+        */
+       do {
+               ret = lan78xx_read_reg(dev, MAC_CR, &val);
+               if (ret < 0)
+                       goto done;
+
+               if (!(val & MAC_CR_RST_)) {
+                       ret = 0;
+                       goto done;
+               }
+       } while (!time_after(jiffies, start_time + HZ));
+
+       ret = -ETIMEDOUT;
+done:
+       mutex_unlock(&dev->phy_mutex);
+
+       return ret;
+}
+
 static int lan78xx_link_reset(struct lan78xx_net *dev)
 {
        struct phy_device *phydev = dev->net->phydev;
@@ -1160,7 +1256,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
        /* clear LAN78xx interrupt status */
        ret = lan78xx_write_reg(dev, INT_STS, INT_STS_PHY_INT_);
        if (unlikely(ret < 0))
-               return -EIO;
+               return ret;
 
        mutex_lock(&phydev->lock);
        phy_read_status(phydev);
@@ -1171,13 +1267,9 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                dev->link_on = false;
 
                /* reset MAC */
-               ret = lan78xx_read_reg(dev, MAC_CR, &buf);
-               if (unlikely(ret < 0))
-                       return -EIO;
-               buf |= MAC_CR_RST_;
-               ret = lan78xx_write_reg(dev, MAC_CR, buf);
-               if (unlikely(ret < 0))
-                       return -EIO;
+               ret = lan78xx_mac_reset(dev);
+               if (ret < 0)
+                       return ret;
 
                del_timer(&dev->stat_monitor);
        } else if (link && !dev->link_on) {
@@ -1189,18 +1281,30 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                        if (ecmd.base.speed == 1000) {
                                /* disable U2 */
                                ret = lan78xx_read_reg(dev, USB_CFG1, &buf);
+                               if (ret < 0)
+                                       return ret;
                                buf &= ~USB_CFG1_DEV_U2_INIT_EN_;
                                ret = lan78xx_write_reg(dev, USB_CFG1, buf);
+                               if (ret < 0)
+                                       return ret;
                                /* enable U1 */
                                ret = lan78xx_read_reg(dev, USB_CFG1, &buf);
+                               if (ret < 0)
+                                       return ret;
                                buf |= USB_CFG1_DEV_U1_INIT_EN_;
                                ret = lan78xx_write_reg(dev, USB_CFG1, buf);
+                               if (ret < 0)
+                                       return ret;
                        } else {
                                /* enable U1 & U2 */
                                ret = lan78xx_read_reg(dev, USB_CFG1, &buf);
+                               if (ret < 0)
+                                       return ret;
                                buf |= USB_CFG1_DEV_U2_INIT_EN_;
                                buf |= USB_CFG1_DEV_U1_INIT_EN_;
                                ret = lan78xx_write_reg(dev, USB_CFG1, buf);
+                               if (ret < 0)
+                                       return ret;
                        }
                }
 
@@ -1218,6 +1322,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 
                ret = lan78xx_update_flowcontrol(dev, ecmd.base.duplex, ladv,
                                                 radv);
+               if (ret < 0)
+                       return ret;
 
                if (!timer_pending(&dev->stat_monitor)) {
                        dev->delta = 1;
@@ -1228,7 +1334,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                tasklet_schedule(&dev->bh);
        }
 
-       return ret;
+       return 0;
 }
 
 /* some work can't be done in tasklets, so we use keventd
@@ -1264,9 +1370,10 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
                        generic_handle_irq(dev->domain_data.phyirq);
                        local_irq_enable();
                }
-       } else
+       } else {
                netdev_warn(dev->net,
                            "unexpected interrupt: 0x%08x\n", intdata);
+       }
 }
 
 static int lan78xx_ethtool_get_eeprom_len(struct net_device *netdev)
@@ -1355,7 +1462,7 @@ static void lan78xx_get_wol(struct net_device *netdev,
        struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
 
        if (usb_autopm_get_interface(dev->intf) < 0)
-                       return;
+               return;
 
        ret = lan78xx_read_reg(dev, USB_CFG0, &buf);
        if (unlikely(ret < 0)) {
@@ -2003,7 +2110,7 @@ static int lan8835_fixup(struct phy_device *phydev)
 
        /* RGMII MAC TXC Delay Enable */
        lan78xx_write_reg(dev, MAC_RGMII_ID,
-                               MAC_RGMII_ID_TXC_DELAY_EN_);
+                         MAC_RGMII_ID_TXC_DELAY_EN_);
 
        /* RGMII TX DLL Tune Adjust */
        lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00);
@@ -2267,11 +2374,16 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu)
        int ll_mtu = new_mtu + netdev->hard_header_len;
        int old_hard_mtu = dev->hard_mtu;
        int old_rx_urb_size = dev->rx_urb_size;
+       int ret;
 
        /* no second zero-length packet read wanted after mtu-sized packets */
        if ((ll_mtu % dev->maxpacket) == 0)
                return -EDOM;
 
+       ret = usb_autopm_get_interface(dev->intf);
+       if (ret < 0)
+               return ret;
+
        lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN);
 
        netdev->mtu = new_mtu;
@@ -2287,6 +2399,8 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu)
                }
        }
 
+       usb_autopm_put_interface(dev->intf);
+
        return 0;
 }
 
@@ -2443,26 +2557,186 @@ static void lan78xx_init_ltm(struct lan78xx_net *dev)
        lan78xx_write_reg(dev, LTM_INACTIVE1, regs[5]);
 }
 
+static int lan78xx_start_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enable)
+{
+       return lan78xx_update_reg(dev, reg, hw_enable, hw_enable);
+}
+
+static int lan78xx_stop_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enabled,
+                          u32 hw_disabled)
+{
+       unsigned long timeout;
+       bool stopped = true;
+       int ret;
+       u32 buf;
+
+       /* Stop the h/w block (if not already stopped) */
+
+       ret = lan78xx_read_reg(dev, reg, &buf);
+       if (ret < 0)
+               return ret;
+
+       if (buf & hw_enabled) {
+               buf &= ~hw_enabled;
+
+               ret = lan78xx_write_reg(dev, reg, buf);
+               if (ret < 0)
+                       return ret;
+
+               stopped = false;
+               timeout = jiffies + HW_DISABLE_TIMEOUT;
+               do  {
+                       ret = lan78xx_read_reg(dev, reg, &buf);
+                       if (ret < 0)
+                               return ret;
+
+                       if (buf & hw_disabled)
+                               stopped = true;
+                       else
+                               msleep(HW_DISABLE_DELAY_MS);
+               } while (!stopped && !time_after(jiffies, timeout));
+       }
+
+       ret = stopped ? 0 : -ETIME;
+
+       return ret;
+}
+
+static int lan78xx_flush_fifo(struct lan78xx_net *dev, u32 reg, u32 fifo_flush)
+{
+       return lan78xx_update_reg(dev, reg, fifo_flush, fifo_flush);
+}
+
+static int lan78xx_start_tx_path(struct lan78xx_net *dev)
+{
+       int ret;
+
+       netif_dbg(dev, drv, dev->net, "start tx path");
+
+       /* Start the MAC transmitter */
+
+       ret = lan78xx_start_hw(dev, MAC_TX, MAC_TX_TXEN_);
+       if (ret < 0)
+               return ret;
+
+       /* Start the Tx FIFO */
+
+       ret = lan78xx_start_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+static int lan78xx_stop_tx_path(struct lan78xx_net *dev)
+{
+       int ret;
+
+       netif_dbg(dev, drv, dev->net, "stop tx path");
+
+       /* Stop the Tx FIFO */
+
+       ret = lan78xx_stop_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_, FCT_TX_CTL_DIS_);
+       if (ret < 0)
+               return ret;
+
+       /* Stop the MAC transmitter */
+
+       ret = lan78xx_stop_hw(dev, MAC_TX, MAC_TX_TXEN_, MAC_TX_TXD_);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+/* The caller must ensure the Tx path is stopped before calling
+ * lan78xx_flush_tx_fifo().
+ */
+static int lan78xx_flush_tx_fifo(struct lan78xx_net *dev)
+{
+       return lan78xx_flush_fifo(dev, FCT_TX_CTL, FCT_TX_CTL_RST_);
+}
+
+static int lan78xx_start_rx_path(struct lan78xx_net *dev)
+{
+       int ret;
+
+       netif_dbg(dev, drv, dev->net, "start rx path");
+
+       /* Start the Rx FIFO */
+
+       ret = lan78xx_start_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_);
+       if (ret < 0)
+               return ret;
+
+       /* Start the MAC receiver*/
+
+       ret = lan78xx_start_hw(dev, MAC_RX, MAC_RX_RXEN_);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+static int lan78xx_stop_rx_path(struct lan78xx_net *dev)
+{
+       int ret;
+
+       netif_dbg(dev, drv, dev->net, "stop rx path");
+
+       /* Stop the MAC receiver */
+
+       ret = lan78xx_stop_hw(dev, MAC_RX, MAC_RX_RXEN_, MAC_RX_RXD_);
+       if (ret < 0)
+               return ret;
+
+       /* Stop the Rx FIFO */
+
+       ret = lan78xx_stop_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_, FCT_RX_CTL_DIS_);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+/* The caller must ensure the Rx path is stopped before calling
+ * lan78xx_flush_rx_fifo().
+ */
+static int lan78xx_flush_rx_fifo(struct lan78xx_net *dev)
+{
+       return lan78xx_flush_fifo(dev, FCT_RX_CTL, FCT_RX_CTL_RST_);
+}
+
 static int lan78xx_reset(struct lan78xx_net *dev)
 {
        struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
-       u32 buf;
-       int ret = 0;
        unsigned long timeout;
+       int ret;
+       u32 buf;
        u8 sig;
 
        ret = lan78xx_read_reg(dev, HW_CFG, &buf);
+       if (ret < 0)
+               return ret;
+
        buf |= HW_CFG_LRST_;
+
        ret = lan78xx_write_reg(dev, HW_CFG, buf);
+       if (ret < 0)
+               return ret;
 
        timeout = jiffies + HZ;
        do {
                mdelay(1);
                ret = lan78xx_read_reg(dev, HW_CFG, &buf);
+               if (ret < 0)
+                       return ret;
+
                if (time_after(jiffies, timeout)) {
                        netdev_warn(dev->net,
                                    "timeout on completion of LiteReset");
-                       return -EIO;
+                       ret = -ETIMEDOUT;
+                       return ret;
                }
        } while (buf & HW_CFG_LRST_);
 
@@ -2470,13 +2744,22 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 
        /* save DEVID for later usage */
        ret = lan78xx_read_reg(dev, ID_REV, &buf);
+       if (ret < 0)
+               return ret;
+
        dev->chipid = (buf & ID_REV_CHIP_ID_MASK_) >> 16;
        dev->chiprev = buf & ID_REV_CHIP_REV_MASK_;
 
        /* Respond to the IN token with a NAK */
        ret = lan78xx_read_reg(dev, USB_CFG0, &buf);
+       if (ret < 0)
+               return ret;
+
        buf |= USB_CFG_BIR_;
+
        ret = lan78xx_write_reg(dev, USB_CFG0, buf);
+       if (ret < 0)
+               return ret;
 
        /* Init LTM */
        lan78xx_init_ltm(dev);
@@ -2499,53 +2782,105 @@ static int lan78xx_reset(struct lan78xx_net *dev)
        }
 
        ret = lan78xx_write_reg(dev, BURST_CAP, buf);
+       if (ret < 0)
+               return ret;
+
        ret = lan78xx_write_reg(dev, BULK_IN_DLY, DEFAULT_BULK_IN_DELAY);
+       if (ret < 0)
+               return ret;
 
        ret = lan78xx_read_reg(dev, HW_CFG, &buf);
+       if (ret < 0)
+               return ret;
+
        buf |= HW_CFG_MEF_;
+
        ret = lan78xx_write_reg(dev, HW_CFG, buf);
+       if (ret < 0)
+               return ret;
 
        ret = lan78xx_read_reg(dev, USB_CFG0, &buf);
+       if (ret < 0)
+               return ret;
+
        buf |= USB_CFG_BCE_;
+
        ret = lan78xx_write_reg(dev, USB_CFG0, buf);
+       if (ret < 0)
+               return ret;
 
        /* set FIFO sizes */
        buf = (MAX_RX_FIFO_SIZE - 512) / 512;
+
        ret = lan78xx_write_reg(dev, FCT_RX_FIFO_END, buf);
+       if (ret < 0)
+               return ret;
 
        buf = (MAX_TX_FIFO_SIZE - 512) / 512;
+
        ret = lan78xx_write_reg(dev, FCT_TX_FIFO_END, buf);
+       if (ret < 0)
+               return ret;
 
        ret = lan78xx_write_reg(dev, INT_STS, INT_STS_CLEAR_ALL_);
+       if (ret < 0)
+               return ret;
+
        ret = lan78xx_write_reg(dev, FLOW, 0);
+       if (ret < 0)
+               return ret;
+
        ret = lan78xx_write_reg(dev, FCT_FLOW, 0);
+       if (ret < 0)
+               return ret;
 
        /* Don't need rfe_ctl_lock during initialisation */
        ret = lan78xx_read_reg(dev, RFE_CTL, &pdata->rfe_ctl);
+       if (ret < 0)
+               return ret;
+
        pdata->rfe_ctl |= RFE_CTL_BCAST_EN_ | RFE_CTL_DA_PERFECT_;
+
        ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
+       if (ret < 0)
+               return ret;
 
        /* Enable or disable checksum offload engines */
-       lan78xx_set_features(dev->net, dev->net->features);
+       ret = lan78xx_set_features(dev->net, dev->net->features);
+       if (ret < 0)
+               return ret;
 
        lan78xx_set_multicast(dev->net);
 
        /* reset PHY */
        ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+       if (ret < 0)
+               return ret;
+
        buf |= PMT_CTL_PHY_RST_;
+
        ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+       if (ret < 0)
+               return ret;
 
        timeout = jiffies + HZ;
        do {
                mdelay(1);
                ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+               if (ret < 0)
+                       return ret;
+
                if (time_after(jiffies, timeout)) {
                        netdev_warn(dev->net, "timeout waiting for PHY Reset");
-                       return -EIO;
+                       ret = -ETIMEDOUT;
+                       return ret;
                }
        } while ((buf & PMT_CTL_PHY_RST_) || !(buf & PMT_CTL_READY_));
 
        ret = lan78xx_read_reg(dev, MAC_CR, &buf);
+       if (ret < 0)
+               return ret;
+
        /* LAN7801 only has RGMII mode */
        if (dev->chipid == ID_REV_CHIP_ID_7801_)
                buf &= ~MAC_CR_GMII_EN_;
@@ -2559,27 +2894,13 @@ static int lan78xx_reset(struct lan78xx_net *dev)
                }
        }
        ret = lan78xx_write_reg(dev, MAC_CR, buf);
-
-       ret = lan78xx_read_reg(dev, MAC_TX, &buf);
-       buf |= MAC_TX_TXEN_;
-       ret = lan78xx_write_reg(dev, MAC_TX, buf);
-
-       ret = lan78xx_read_reg(dev, FCT_TX_CTL, &buf);
-       buf |= FCT_TX_CTL_EN_;
-       ret = lan78xx_write_reg(dev, FCT_TX_CTL, buf);
+       if (ret < 0)
+               return ret;
 
        ret = lan78xx_set_rx_max_frame_length(dev,
                                              dev->net->mtu + VLAN_ETH_HLEN);
 
-       ret = lan78xx_read_reg(dev, MAC_RX, &buf);
-       buf |= MAC_RX_RXEN_;
-       ret = lan78xx_write_reg(dev, MAC_RX, buf);
-
-       ret = lan78xx_read_reg(dev, FCT_RX_CTL, &buf);
-       buf |= FCT_RX_CTL_EN_;
-       ret = lan78xx_write_reg(dev, FCT_RX_CTL, buf);
-
-       return 0;
+       return ret;
 }
 
 static void lan78xx_init_stats(struct lan78xx_net *dev)
@@ -2613,9 +2934,13 @@ static int lan78xx_open(struct net_device *net)
        struct lan78xx_net *dev = netdev_priv(net);
        int ret;
 
+       netif_dbg(dev, ifup, dev->net, "open device");
+
        ret = usb_autopm_get_interface(dev->intf);
        if (ret < 0)
-               goto out;
+               return ret;
+
+       mutex_lock(&dev->dev_mutex);
 
        phy_start(net->phydev);
 
@@ -2631,6 +2956,20 @@ static int lan78xx_open(struct net_device *net)
                }
        }
 
+       ret = lan78xx_flush_rx_fifo(dev);
+       if (ret < 0)
+               goto done;
+       ret = lan78xx_flush_tx_fifo(dev);
+       if (ret < 0)
+               goto done;
+
+       ret = lan78xx_start_tx_path(dev);
+       if (ret < 0)
+               goto done;
+       ret = lan78xx_start_rx_path(dev);
+       if (ret < 0)
+               goto done;
+
        lan78xx_init_stats(dev);
 
        set_bit(EVENT_DEV_OPEN, &dev->flags);
@@ -2641,9 +2980,10 @@ static int lan78xx_open(struct net_device *net)
 
        lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
 done:
+       mutex_unlock(&dev->dev_mutex);
+
        usb_autopm_put_interface(dev->intf);
 
-out:
        return ret;
 }
 
@@ -2660,53 +3000,74 @@ static void lan78xx_terminate_urbs(struct lan78xx_net *dev)
        temp = unlink_urbs(dev, &dev->txq) + unlink_urbs(dev, &dev->rxq);
 
        /* maybe wait for deletions to finish. */
-       while (!skb_queue_empty(&dev->rxq) &&
-              !skb_queue_empty(&dev->txq) &&
-              !skb_queue_empty(&dev->done)) {
+       while (!skb_queue_empty(&dev->rxq) ||
+              !skb_queue_empty(&dev->txq)) {
                schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS));
                set_current_state(TASK_UNINTERRUPTIBLE);
                netif_dbg(dev, ifdown, dev->net,
-                         "waited for %d urb completions\n", temp);
+                         "waited for %d urb completions", temp);
        }
        set_current_state(TASK_RUNNING);
        dev->wait = NULL;
        remove_wait_queue(&unlink_wakeup, &wait);
+
+       while (!skb_queue_empty(&dev->done)) {
+               struct skb_data *entry;
+               struct sk_buff *skb;
+
+               skb = skb_dequeue(&dev->done);
+               entry = (struct skb_data *)(skb->cb);
+               usb_free_urb(entry->urb);
+               dev_kfree_skb(skb);
+       }
 }
 
 static int lan78xx_stop(struct net_device *net)
 {
        struct lan78xx_net *dev = netdev_priv(net);
 
+       netif_dbg(dev, ifup, dev->net, "stop device");
+
+       mutex_lock(&dev->dev_mutex);
+
        if (timer_pending(&dev->stat_monitor))
                del_timer_sync(&dev->stat_monitor);
 
-       if (net->phydev)
-               phy_stop(net->phydev);
-
        clear_bit(EVENT_DEV_OPEN, &dev->flags);
        netif_stop_queue(net);
+       tasklet_kill(&dev->bh);
+
+       lan78xx_terminate_urbs(dev);
 
        netif_info(dev, ifdown, dev->net,
                   "stop stats: rx/tx %lu/%lu, errs %lu/%lu\n",
                   net->stats.rx_packets, net->stats.tx_packets,
                   net->stats.rx_errors, net->stats.tx_errors);
 
-       lan78xx_terminate_urbs(dev);
+       /* ignore errors that occur stopping the Tx and Rx data paths */
+       lan78xx_stop_tx_path(dev);
+       lan78xx_stop_rx_path(dev);
 
-       usb_kill_urb(dev->urb_intr);
+       if (net->phydev)
+               phy_stop(net->phydev);
 
-       skb_queue_purge(&dev->rxq_pause);
+       usb_kill_urb(dev->urb_intr);
 
        /* deferred work (task, timer, softirq) must also stop.
         * can't flush_scheduled_work() until we drop rtnl (later),
         * else workers could deadlock; so make workers a NOP.
         */
-       dev->flags = 0;
+       clear_bit(EVENT_TX_HALT, &dev->flags);
+       clear_bit(EVENT_RX_HALT, &dev->flags);
+       clear_bit(EVENT_LINK_RESET, &dev->flags);
+       clear_bit(EVENT_STAT_UPDATE, &dev->flags);
+
        cancel_delayed_work_sync(&dev->wq);
-       tasklet_kill(&dev->bh);
 
        usb_autopm_put_interface(dev->intf);
 
+       mutex_unlock(&dev->dev_mutex);
+
        return 0;
 }
 
@@ -2795,16 +3156,23 @@ static void tx_complete(struct urb *urb)
                /* software-driven interface shutdown */
                case -ECONNRESET:
                case -ESHUTDOWN:
+                       netif_dbg(dev, tx_err, dev->net,
+                                 "tx err interface gone %d\n",
+                                 entry->urb->status);
                        break;
 
                case -EPROTO:
                case -ETIME:
                case -EILSEQ:
                        netif_stop_queue(dev->net);
+                       netif_dbg(dev, tx_err, dev->net,
+                                 "tx err queue stopped %d\n",
+                                 entry->urb->status);
                        break;
                default:
                        netif_dbg(dev, tx_err, dev->net,
-                                 "tx err %d\n", entry->urb->status);
+                                 "unknown tx err %d\n",
+                                 entry->urb->status);
                        break;
                }
        }
@@ -2829,6 +3197,9 @@ lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
        struct lan78xx_net *dev = netdev_priv(net);
        struct sk_buff *skb2 = NULL;
 
+       if (test_bit(EVENT_DEV_ASLEEP, &dev->flags))
+               schedule_delayed_work(&dev->wq, 0);
+
        if (skb) {
                skb_tx_timestamp(skb);
                skb2 = lan78xx_tx_prep(dev, skb, GFP_ATOMIC);
@@ -2988,11 +3359,6 @@ static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb)
 {
        int status;
 
-       if (test_bit(EVENT_RX_PAUSED, &dev->flags)) {
-               skb_queue_tail(&dev->rxq_pause, skb);
-               return;
-       }
-
        dev->net->stats.rx_packets++;
        dev->net->stats.rx_bytes += skb->len;
 
@@ -3140,6 +3506,7 @@ static int rx_submit(struct lan78xx_net *dev, struct urb *urb, gfp_t flags)
                        lan78xx_defer_kevent(dev, EVENT_RX_HALT);
                        break;
                case -ENODEV:
+               case -ENOENT:
                        netif_dbg(dev, ifdown, dev->net, "device gone\n");
                        netif_device_detach(dev->net);
                        break;
@@ -3340,6 +3707,12 @@ gso_skb:
                lan78xx_defer_kevent(dev, EVENT_TX_HALT);
                usb_autopm_put_interface_async(dev->intf);
                break;
+       case -ENODEV:
+       case -ENOENT:
+               netif_dbg(dev, tx_err, dev->net,
+                         "tx: submit urb err %d (disconnected?)", ret);
+               netif_device_detach(dev->net);
+               break;
        default:
                usb_autopm_put_interface_async(dev->intf);
                netif_dbg(dev, tx_err, dev->net,
@@ -3356,9 +3729,10 @@ drop:
                if (skb)
                        dev_kfree_skb_any(skb);
                usb_free_urb(urb);
-       } else
+       } else {
                netif_dbg(dev, tx_queued, dev->net,
                          "> tx, len %d, type 0x%x\n", length, skb->protocol);
+       }
 }
 
 static void lan78xx_rx_bh(struct lan78xx_net *dev)
@@ -3421,8 +3795,7 @@ static void lan78xx_bh(struct tasklet_struct *t)
                if (!skb_queue_empty(&dev->txq_pend))
                        lan78xx_tx_bh(dev);
 
-               if (!timer_pending(&dev->delay) &&
-                   !test_bit(EVENT_RX_HALT, &dev->flags))
+               if (!test_bit(EVENT_RX_HALT, &dev->flags))
                        lan78xx_rx_bh(dev);
        }
 }
@@ -3434,18 +3807,20 @@ static void lan78xx_delayedwork(struct work_struct *work)
 
        dev = container_of(work, struct lan78xx_net, wq.work);
 
+       if (test_bit(EVENT_DEV_DISCONNECT, &dev->flags))
+               return;
+
+       if (usb_autopm_get_interface(dev->intf) < 0)
+               return;
+
        if (test_bit(EVENT_TX_HALT, &dev->flags)) {
                unlink_urbs(dev, &dev->txq);
-               status = usb_autopm_get_interface(dev->intf);
-               if (status < 0)
-                       goto fail_pipe;
+
                status = usb_clear_halt(dev->udev, dev->pipe_out);
-               usb_autopm_put_interface(dev->intf);
                if (status < 0 &&
                    status != -EPIPE &&
                    status != -ESHUTDOWN) {
                        if (netif_msg_tx_err(dev))
-fail_pipe:
                                netdev_err(dev->net,
                                           "can't clear tx halt, status %d\n",
                                           status);
@@ -3455,18 +3830,14 @@ fail_pipe:
                                netif_wake_queue(dev->net);
                }
        }
+
        if (test_bit(EVENT_RX_HALT, &dev->flags)) {
                unlink_urbs(dev, &dev->rxq);
-               status = usb_autopm_get_interface(dev->intf);
-               if (status < 0)
-                               goto fail_halt;
                status = usb_clear_halt(dev->udev, dev->pipe_in);
-               usb_autopm_put_interface(dev->intf);
                if (status < 0 &&
                    status != -EPIPE &&
                    status != -ESHUTDOWN) {
                        if (netif_msg_rx_err(dev))
-fail_halt:
                                netdev_err(dev->net,
                                           "can't clear rx halt, status %d\n",
                                           status);
@@ -3480,16 +3851,9 @@ fail_halt:
                int ret = 0;
 
                clear_bit(EVENT_LINK_RESET, &dev->flags);
-               status = usb_autopm_get_interface(dev->intf);
-               if (status < 0)
-                       goto skip_reset;
                if (lan78xx_link_reset(dev) < 0) {
-                       usb_autopm_put_interface(dev->intf);
-skip_reset:
                        netdev_info(dev->net, "link reset failed (%d)\n",
                                    ret);
-               } else {
-                       usb_autopm_put_interface(dev->intf);
                }
        }
 
@@ -3503,6 +3867,8 @@ skip_reset:
 
                dev->delta = min((dev->delta * 2), 50);
        }
+
+       usb_autopm_put_interface(dev->intf);
 }
 
 static void intr_complete(struct urb *urb)
@@ -3518,6 +3884,7 @@ static void intr_complete(struct urb *urb)
 
        /* software-driven interface shutdown */
        case -ENOENT:                   /* urb killed */
+       case -ENODEV:                   /* hardware gone */
        case -ESHUTDOWN:                /* hardware gone */
                netif_dbg(dev, ifdown, dev->net,
                          "intr shutdown, code %d\n", status);
@@ -3531,14 +3898,29 @@ static void intr_complete(struct urb *urb)
                break;
        }
 
-       if (!netif_running(dev->net))
+       if (!netif_device_present(dev->net) ||
+           !netif_running(dev->net)) {
+               netdev_warn(dev->net, "not submitting new status URB");
                return;
+       }
 
        memset(urb->transfer_buffer, 0, urb->transfer_buffer_length);
        status = usb_submit_urb(urb, GFP_ATOMIC);
-       if (status != 0)
+
+       switch (status) {
+       case  0:
+               break;
+       case -ENODEV:
+       case -ENOENT:
+               netif_dbg(dev, timer, dev->net,
+                         "intr resubmit %d (disconnect?)", status);
+               netif_device_detach(dev->net);
+               break;
+       default:
                netif_err(dev, timer, dev->net,
                          "intr resubmit --> %d\n", status);
+               break;
+       }
 }
 
 static void lan78xx_disconnect(struct usb_interface *intf)
@@ -3553,8 +3935,15 @@ static void lan78xx_disconnect(struct usb_interface *intf)
        if (!dev)
                return;
 
+       set_bit(EVENT_DEV_DISCONNECT, &dev->flags);
+
        udev = interface_to_usbdev(intf);
        net = dev->net;
+
+       unregister_netdev(net);
+
+       cancel_delayed_work_sync(&dev->wq);
+
        phydev = net->phydev;
 
        phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0);
@@ -3565,12 +3954,11 @@ static void lan78xx_disconnect(struct usb_interface *intf)
        if (phy_is_pseudo_fixed_link(phydev))
                fixed_phy_unregister(phydev);
 
-       unregister_netdev(net);
-
-       cancel_delayed_work_sync(&dev->wq);
-
        usb_scuttle_anchored_urbs(&dev->deferred);
 
+       if (timer_pending(&dev->stat_monitor))
+               del_timer_sync(&dev->stat_monitor);
+
        lan78xx_unbind(dev, intf);
 
        usb_kill_urb(dev->urb_intr);
@@ -3609,7 +3997,7 @@ static const struct net_device_ops lan78xx_netdev_ops = {
        .ndo_change_mtu         = lan78xx_change_mtu,
        .ndo_set_mac_address    = lan78xx_set_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = phy_do_ioctl_running,
+       .ndo_eth_ioctl          = phy_do_ioctl_running,
        .ndo_set_rx_mode        = lan78xx_set_multicast,
        .ndo_set_features       = lan78xx_set_features,
        .ndo_vlan_rx_add_vid    = lan78xx_vlan_rx_add_vid,
@@ -3632,8 +4020,8 @@ static int lan78xx_probe(struct usb_interface *intf,
        struct net_device *netdev;
        struct usb_device *udev;
        int ret;
-       unsigned maxp;
-       unsigned period;
+       unsigned int maxp;
+       unsigned int period;
        u8 *buf = NULL;
 
        udev = interface_to_usbdev(intf);
@@ -3659,9 +4047,9 @@ static int lan78xx_probe(struct usb_interface *intf,
        skb_queue_head_init(&dev->rxq);
        skb_queue_head_init(&dev->txq);
        skb_queue_head_init(&dev->done);
-       skb_queue_head_init(&dev->rxq_pause);
        skb_queue_head_init(&dev->txq_pend);
        mutex_init(&dev->phy_mutex);
+       mutex_init(&dev->dev_mutex);
 
        tasklet_setup(&dev->bh, lan78xx_bh);
        INIT_DELAYED_WORK(&dev->wq, lan78xx_delayedwork);
@@ -3798,37 +4186,119 @@ static u16 lan78xx_wakeframe_crc16(const u8 *buf, int len)
        return crc;
 }
 
-static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+static int lan78xx_set_auto_suspend(struct lan78xx_net *dev)
 {
        u32 buf;
-       int mask_index;
-       u16 crc;
-       u32 temp_wucsr;
-       u32 temp_pmt_ctl;
+       int ret;
+
+       ret = lan78xx_stop_tx_path(dev);
+       if (ret < 0)
+               return ret;
+
+       ret = lan78xx_stop_rx_path(dev);
+       if (ret < 0)
+               return ret;
+
+       /* auto suspend (selective suspend) */
+
+       ret = lan78xx_write_reg(dev, WUCSR, 0);
+       if (ret < 0)
+               return ret;
+       ret = lan78xx_write_reg(dev, WUCSR2, 0);
+       if (ret < 0)
+               return ret;
+       ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+       if (ret < 0)
+               return ret;
+
+       /* set goodframe wakeup */
+
+       ret = lan78xx_read_reg(dev, WUCSR, &buf);
+       if (ret < 0)
+               return ret;
+
+       buf |= WUCSR_RFE_WAKE_EN_;
+       buf |= WUCSR_STORE_WAKE_;
+
+       ret = lan78xx_write_reg(dev, WUCSR, buf);
+       if (ret < 0)
+               return ret;
+
+       ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+       if (ret < 0)
+               return ret;
+
+       buf &= ~PMT_CTL_RES_CLR_WKP_EN_;
+       buf |= PMT_CTL_RES_CLR_WKP_STS_;
+       buf |= PMT_CTL_PHY_WAKE_EN_;
+       buf |= PMT_CTL_WOL_EN_;
+       buf &= ~PMT_CTL_SUS_MODE_MASK_;
+       buf |= PMT_CTL_SUS_MODE_3_;
+
+       ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+       if (ret < 0)
+               return ret;
+
+       ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+       if (ret < 0)
+               return ret;
+
+       buf |= PMT_CTL_WUPS_MASK_;
+
+       ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+       if (ret < 0)
+               return ret;
+
+       ret = lan78xx_start_rx_path(dev);
+
+       return ret;
+}
+
+static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+{
        const u8 ipv4_multicast[3] = { 0x01, 0x00, 0x5E };
        const u8 ipv6_multicast[3] = { 0x33, 0x33 };
        const u8 arp_type[2] = { 0x08, 0x06 };
+       u32 temp_pmt_ctl;
+       int mask_index;
+       u32 temp_wucsr;
+       u32 buf;
+       u16 crc;
+       int ret;
 
-       lan78xx_read_reg(dev, MAC_TX, &buf);
-       buf &= ~MAC_TX_TXEN_;
-       lan78xx_write_reg(dev, MAC_TX, buf);
-       lan78xx_read_reg(dev, MAC_RX, &buf);
-       buf &= ~MAC_RX_RXEN_;
-       lan78xx_write_reg(dev, MAC_RX, buf);
+       ret = lan78xx_stop_tx_path(dev);
+       if (ret < 0)
+               return ret;
+       ret = lan78xx_stop_rx_path(dev);
+       if (ret < 0)
+               return ret;
 
-       lan78xx_write_reg(dev, WUCSR, 0);
-       lan78xx_write_reg(dev, WUCSR2, 0);
-       lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+       ret = lan78xx_write_reg(dev, WUCSR, 0);
+       if (ret < 0)
+               return ret;
+       ret = lan78xx_write_reg(dev, WUCSR2, 0);
+       if (ret < 0)
+               return ret;
+       ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+       if (ret < 0)
+               return ret;
 
        temp_wucsr = 0;
 
        temp_pmt_ctl = 0;
-       lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl);
+
+       ret = lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl);
+       if (ret < 0)
+               return ret;
+
        temp_pmt_ctl &= ~PMT_CTL_RES_CLR_WKP_EN_;
        temp_pmt_ctl |= PMT_CTL_RES_CLR_WKP_STS_;
 
-       for (mask_index = 0; mask_index < NUM_OF_WUF_CFG; mask_index++)
-               lan78xx_write_reg(dev, WUF_CFG(mask_index), 0);
+       for (mask_index = 0; mask_index < NUM_OF_WUF_CFG; mask_index++) {
+               ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), 0);
+               if (ret < 0)
+                       return ret;
+       }
 
        mask_index = 0;
        if (wol & WAKE_PHY) {
@@ -3857,30 +4327,52 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
 
                /* set WUF_CFG & WUF_MASK for IPv4 Multicast */
                crc = lan78xx_wakeframe_crc16(ipv4_multicast, 3);
-               lan78xx_write_reg(dev, WUF_CFG(mask_index),
+               ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
                                        WUF_CFGX_EN_ |
                                        WUF_CFGX_TYPE_MCAST_ |
                                        (0 << WUF_CFGX_OFFSET_SHIFT_) |
                                        (crc & WUF_CFGX_CRC16_MASK_));
+               if (ret < 0)
+                       return ret;
+
+               ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7);
+               if (ret < 0)
+                       return ret;
+               ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+               if (ret < 0)
+                       return ret;
+               ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+               if (ret < 0)
+                       return ret;
+               ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+               if (ret < 0)
+                       return ret;
 
-               lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7);
-               lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
-               lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
-               lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
                mask_index++;
 
                /* for IPv6 Multicast */
                crc = lan78xx_wakeframe_crc16(ipv6_multicast, 2);
-               lan78xx_write_reg(dev, WUF_CFG(mask_index),
+               ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
                                        WUF_CFGX_EN_ |
                                        WUF_CFGX_TYPE_MCAST_ |
                                        (0 << WUF_CFGX_OFFSET_SHIFT_) |
                                        (crc & WUF_CFGX_CRC16_MASK_));
+               if (ret < 0)
+                       return ret;
+
+               ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3);
+               if (ret < 0)
+                       return ret;
+               ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+               if (ret < 0)
+                       return ret;
+               ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+               if (ret < 0)
+                       return ret;
+               ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+               if (ret < 0)
+                       return ret;
 
-               lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3);
-               lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
-               lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
-               lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
                mask_index++;
 
                temp_pmt_ctl |= PMT_CTL_WOL_EN_;
@@ -3901,16 +4393,27 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
                 * for packettype (offset 12,13) = ARP (0x0806)
                 */
                crc = lan78xx_wakeframe_crc16(arp_type, 2);
-               lan78xx_write_reg(dev, WUF_CFG(mask_index),
+               ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
                                        WUF_CFGX_EN_ |
                                        WUF_CFGX_TYPE_ALL_ |
                                        (0 << WUF_CFGX_OFFSET_SHIFT_) |
                                        (crc & WUF_CFGX_CRC16_MASK_));
+               if (ret < 0)
+                       return ret;
+
+               ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000);
+               if (ret < 0)
+                       return ret;
+               ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+               if (ret < 0)
+                       return ret;
+               ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+               if (ret < 0)
+                       return ret;
+               ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+               if (ret < 0)
+                       return ret;
 
-               lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000);
-               lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
-               lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
-               lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
                mask_index++;
 
                temp_pmt_ctl |= PMT_CTL_WOL_EN_;
@@ -3918,7 +4421,9 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
                temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_;
        }
 
-       lan78xx_write_reg(dev, WUCSR, temp_wucsr);
+       ret = lan78xx_write_reg(dev, WUCSR, temp_wucsr);
+       if (ret < 0)
+               return ret;
 
        /* when multiple WOL bits are set */
        if (hweight_long((unsigned long)wol) > 1) {
@@ -3926,33 +4431,45 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
                temp_pmt_ctl &= ~PMT_CTL_SUS_MODE_MASK_;
                temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_;
        }
-       lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl);
+       ret = lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl);
+       if (ret < 0)
+               return ret;
 
        /* clear WUPS */
-       lan78xx_read_reg(dev, PMT_CTL, &buf);
+       ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+       if (ret < 0)
+               return ret;
+
        buf |= PMT_CTL_WUPS_MASK_;
-       lan78xx_write_reg(dev, PMT_CTL, buf);
 
-       lan78xx_read_reg(dev, MAC_RX, &buf);
-       buf |= MAC_RX_RXEN_;
-       lan78xx_write_reg(dev, MAC_RX, buf);
+       ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+       if (ret < 0)
+               return ret;
 
-       return 0;
+       ret = lan78xx_start_rx_path(dev);
+
+       return ret;
 }
 
 static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message)
 {
        struct lan78xx_net *dev = usb_get_intfdata(intf);
-       struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
-       u32 buf;
+       bool dev_open;
        int ret;
 
-       if (!dev->suspend_count++) {
+       mutex_lock(&dev->dev_mutex);
+
+       netif_dbg(dev, ifdown, dev->net,
+                 "suspending: pm event %#x", message.event);
+
+       dev_open = test_bit(EVENT_DEV_OPEN, &dev->flags);
+
+       if (dev_open) {
                spin_lock_irq(&dev->txq.lock);
                /* don't autosuspend while transmitting */
                if ((skb_queue_len(&dev->txq) ||
                     skb_queue_len(&dev->txq_pend)) &&
-                       PMSG_IS_AUTO(message)) {
+                   PMSG_IS_AUTO(message)) {
                        spin_unlock_irq(&dev->txq.lock);
                        ret = -EBUSY;
                        goto out;
@@ -3961,129 +4478,207 @@ static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message)
                        spin_unlock_irq(&dev->txq.lock);
                }
 
-               /* stop TX & RX */
-               ret = lan78xx_read_reg(dev, MAC_TX, &buf);
-               buf &= ~MAC_TX_TXEN_;
-               ret = lan78xx_write_reg(dev, MAC_TX, buf);
-               ret = lan78xx_read_reg(dev, MAC_RX, &buf);
-               buf &= ~MAC_RX_RXEN_;
-               ret = lan78xx_write_reg(dev, MAC_RX, buf);
+               /* stop RX */
+               ret = lan78xx_stop_rx_path(dev);
+               if (ret < 0)
+                       goto out;
 
-               /* empty out the rx and queues */
+               ret = lan78xx_flush_rx_fifo(dev);
+               if (ret < 0)
+                       goto out;
+
+               /* stop Tx */
+               ret = lan78xx_stop_tx_path(dev);
+               if (ret < 0)
+                       goto out;
+
+               /* empty out the Rx and Tx queues */
                netif_device_detach(dev->net);
                lan78xx_terminate_urbs(dev);
                usb_kill_urb(dev->urb_intr);
 
                /* reattach */
                netif_device_attach(dev->net);
-       }
 
-       if (test_bit(EVENT_DEV_ASLEEP, &dev->flags)) {
                del_timer(&dev->stat_monitor);
 
                if (PMSG_IS_AUTO(message)) {
-                       /* auto suspend (selective suspend) */
-                       ret = lan78xx_read_reg(dev, MAC_TX, &buf);
-                       buf &= ~MAC_TX_TXEN_;
-                       ret = lan78xx_write_reg(dev, MAC_TX, buf);
-                       ret = lan78xx_read_reg(dev, MAC_RX, &buf);
-                       buf &= ~MAC_RX_RXEN_;
-                       ret = lan78xx_write_reg(dev, MAC_RX, buf);
+                       ret = lan78xx_set_auto_suspend(dev);
+                       if (ret < 0)
+                               goto out;
+               } else {
+                       struct lan78xx_priv *pdata;
+
+                       pdata = (struct lan78xx_priv *)(dev->data[0]);
+                       netif_carrier_off(dev->net);
+                       ret = lan78xx_set_suspend(dev, pdata->wol);
+                       if (ret < 0)
+                               goto out;
+               }
+       } else {
+               /* Interface is down; don't allow WOL and PHY
+                * events to wake up the host
+                */
+               u32 buf;
 
-                       ret = lan78xx_write_reg(dev, WUCSR, 0);
-                       ret = lan78xx_write_reg(dev, WUCSR2, 0);
-                       ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+               set_bit(EVENT_DEV_ASLEEP, &dev->flags);
 
-                       /* set goodframe wakeup */
-                       ret = lan78xx_read_reg(dev, WUCSR, &buf);
+               ret = lan78xx_write_reg(dev, WUCSR, 0);
+               if (ret < 0)
+                       goto out;
+               ret = lan78xx_write_reg(dev, WUCSR2, 0);
+               if (ret < 0)
+                       goto out;
+
+               ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+               if (ret < 0)
+                       goto out;
+
+               buf &= ~PMT_CTL_RES_CLR_WKP_EN_;
+               buf |= PMT_CTL_RES_CLR_WKP_STS_;
+               buf &= ~PMT_CTL_SUS_MODE_MASK_;
+               buf |= PMT_CTL_SUS_MODE_3_;
+
+               ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+               if (ret < 0)
+                       goto out;
 
-                       buf |= WUCSR_RFE_WAKE_EN_;
-                       buf |= WUCSR_STORE_WAKE_;
+               ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+               if (ret < 0)
+                       goto out;
 
-                       ret = lan78xx_write_reg(dev, WUCSR, buf);
+               buf |= PMT_CTL_WUPS_MASK_;
 
-                       ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+               ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+               if (ret < 0)
+                       goto out;
+       }
 
-                       buf &= ~PMT_CTL_RES_CLR_WKP_EN_;
-                       buf |= PMT_CTL_RES_CLR_WKP_STS_;
+       ret = 0;
+out:
+       mutex_unlock(&dev->dev_mutex);
 
-                       buf |= PMT_CTL_PHY_WAKE_EN_;
-                       buf |= PMT_CTL_WOL_EN_;
-                       buf &= ~PMT_CTL_SUS_MODE_MASK_;
-                       buf |= PMT_CTL_SUS_MODE_3_;
+       return ret;
+}
 
-                       ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+static bool lan78xx_submit_deferred_urbs(struct lan78xx_net *dev)
+{
+       bool pipe_halted = false;
+       struct urb *urb;
 
-                       ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
+       while ((urb = usb_get_from_anchor(&dev->deferred))) {
+               struct sk_buff *skb = urb->context;
+               int ret;
 
-                       buf |= PMT_CTL_WUPS_MASK_;
+               if (!netif_device_present(dev->net) ||
+                   !netif_carrier_ok(dev->net) ||
+                   pipe_halted) {
+                       usb_free_urb(urb);
+                       dev_kfree_skb(skb);
+                       continue;
+               }
 
-                       ret = lan78xx_write_reg(dev, PMT_CTL, buf);
+               ret = usb_submit_urb(urb, GFP_ATOMIC);
 
-                       ret = lan78xx_read_reg(dev, MAC_RX, &buf);
-                       buf |= MAC_RX_RXEN_;
-                       ret = lan78xx_write_reg(dev, MAC_RX, buf);
+               if (ret == 0) {
+                       netif_trans_update(dev->net);
+                       lan78xx_queue_skb(&dev->txq, skb, tx_start);
                } else {
-                       lan78xx_set_suspend(dev, pdata->wol);
+                       usb_free_urb(urb);
+                       dev_kfree_skb(skb);
+
+                       if (ret == -EPIPE) {
+                               netif_stop_queue(dev->net);
+                               pipe_halted = true;
+                       } else if (ret == -ENODEV) {
+                               netif_device_detach(dev->net);
+                       }
                }
        }
 
-       ret = 0;
-out:
-       return ret;
+       return pipe_halted;
 }
 
 static int lan78xx_resume(struct usb_interface *intf)
 {
        struct lan78xx_net *dev = usb_get_intfdata(intf);
-       struct sk_buff *skb;
-       struct urb *res;
+       bool dev_open;
        int ret;
-       u32 buf;
 
-       if (!timer_pending(&dev->stat_monitor)) {
-               dev->delta = 1;
-               mod_timer(&dev->stat_monitor,
-                         jiffies + STAT_UPDATE_TIMER);
-       }
+       mutex_lock(&dev->dev_mutex);
 
-       if (!--dev->suspend_count) {
-               /* resume interrupt URBs */
-               if (dev->urb_intr && test_bit(EVENT_DEV_OPEN, &dev->flags))
-                               usb_submit_urb(dev->urb_intr, GFP_NOIO);
+       netif_dbg(dev, ifup, dev->net, "resuming device");
+
+       dev_open = test_bit(EVENT_DEV_OPEN, &dev->flags);
+
+       if (dev_open) {
+               bool pipe_halted = false;
+
+               ret = lan78xx_flush_tx_fifo(dev);
+               if (ret < 0)
+                       goto out;
+
+               if (dev->urb_intr) {
+                       int ret = usb_submit_urb(dev->urb_intr, GFP_KERNEL);
 
-               spin_lock_irq(&dev->txq.lock);
-               while ((res = usb_get_from_anchor(&dev->deferred))) {
-                       skb = (struct sk_buff *)res->context;
-                       ret = usb_submit_urb(res, GFP_ATOMIC);
                        if (ret < 0) {
-                               dev_kfree_skb_any(skb);
-                               usb_free_urb(res);
-                               usb_autopm_put_interface_async(dev->intf);
-                       } else {
-                               netif_trans_update(dev->net);
-                               lan78xx_queue_skb(&dev->txq, skb, tx_start);
+                               if (ret == -ENODEV)
+                                       netif_device_detach(dev->net);
+
+                       netdev_warn(dev->net, "Failed to submit intr URB");
                        }
                }
 
+               spin_lock_irq(&dev->txq.lock);
+
+               if (netif_device_present(dev->net)) {
+                       pipe_halted = lan78xx_submit_deferred_urbs(dev);
+
+                       if (pipe_halted)
+                               lan78xx_defer_kevent(dev, EVENT_TX_HALT);
+               }
+
                clear_bit(EVENT_DEV_ASLEEP, &dev->flags);
+
                spin_unlock_irq(&dev->txq.lock);
 
-               if (test_bit(EVENT_DEV_OPEN, &dev->flags)) {
-                       if (!(skb_queue_len(&dev->txq) >= dev->tx_qlen))
-                               netif_start_queue(dev->net);
-                       tasklet_schedule(&dev->bh);
+               if (!pipe_halted &&
+                   netif_device_present(dev->net) &&
+                   (skb_queue_len(&dev->txq) < dev->tx_qlen))
+                       netif_start_queue(dev->net);
+
+               ret = lan78xx_start_tx_path(dev);
+               if (ret < 0)
+                       goto out;
+
+               tasklet_schedule(&dev->bh);
+
+               if (!timer_pending(&dev->stat_monitor)) {
+                       dev->delta = 1;
+                       mod_timer(&dev->stat_monitor,
+                                 jiffies + STAT_UPDATE_TIMER);
                }
+
+       } else {
+               clear_bit(EVENT_DEV_ASLEEP, &dev->flags);
        }
 
        ret = lan78xx_write_reg(dev, WUCSR2, 0);
+       if (ret < 0)
+               goto out;
        ret = lan78xx_write_reg(dev, WUCSR, 0);
+       if (ret < 0)
+               goto out;
        ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+       if (ret < 0)
+               goto out;
 
        ret = lan78xx_write_reg(dev, WUCSR2, WUCSR2_NS_RCD_ |
                                             WUCSR2_ARP_RCD_ |
                                             WUCSR2_IPV6_TCPSYN_RCD_ |
                                             WUCSR2_IPV4_TCPSYN_RCD_);
+       if (ret < 0)
+               goto out;
 
        ret = lan78xx_write_reg(dev, WUCSR, WUCSR_EEE_TX_WAKE_ |
                                            WUCSR_EEE_RX_WAKE_ |
@@ -4092,23 +4687,32 @@ static int lan78xx_resume(struct usb_interface *intf)
                                            WUCSR_WUFR_ |
                                            WUCSR_MPR_ |
                                            WUCSR_BCST_FR_);
+       if (ret < 0)
+               goto out;
 
-       ret = lan78xx_read_reg(dev, MAC_TX, &buf);
-       buf |= MAC_TX_TXEN_;
-       ret = lan78xx_write_reg(dev, MAC_TX, buf);
+       ret = 0;
+out:
+       mutex_unlock(&dev->dev_mutex);
 
-       return 0;
+       return ret;
 }
 
 static int lan78xx_reset_resume(struct usb_interface *intf)
 {
        struct lan78xx_net *dev = usb_get_intfdata(intf);
+       int ret;
 
-       lan78xx_reset(dev);
+       netif_dbg(dev, ifup, dev->net, "(reset) resuming device");
+
+       ret = lan78xx_reset(dev);
+       if (ret < 0)
+               return ret;
 
        phy_start(dev->net->phydev);
 
-       return lan78xx_resume(intf);
+       ret = lan78xx_resume(intf);
+
+       return ret;
 }
 
 static const struct usb_device_id products[] = {
index 2469bdc..66866be 100644 (file)
@@ -464,7 +464,7 @@ static const struct net_device_ops mcs7830_netdev_ops = {
        .ndo_change_mtu         = usbnet_change_mtu,
        .ndo_get_stats64        = dev_get_tstats64,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = mcs7830_ioctl,
+       .ndo_eth_ioctl          = mcs7830_ioctl,
        .ndo_set_rx_mode        = mcs7830_set_multicast,
        .ndo_set_mac_address    = mcs7830_set_mac_address,
 };
index 9f9dd0d..6a92a3f 100644 (file)
@@ -1001,7 +1001,8 @@ static const struct ethtool_ops ops = {
        .set_link_ksettings = pegasus_set_link_ksettings,
 };
 
-static int pegasus_ioctl(struct net_device *net, struct ifreq *rq, int cmd)
+static int pegasus_siocdevprivate(struct net_device *net, struct ifreq *rq,
+                                 void __user *udata, int cmd)
 {
        __u16 *data = (__u16 *) &rq->ifr_ifru;
        pegasus_t *pegasus = netdev_priv(net);
@@ -1269,7 +1270,7 @@ static int pegasus_resume(struct usb_interface *intf)
 static const struct net_device_ops pegasus_netdev_ops = {
        .ndo_open =                     pegasus_open,
        .ndo_stop =                     pegasus_close,
-       .ndo_do_ioctl =                 pegasus_ioctl,
+       .ndo_siocdevprivate =           pegasus_siocdevprivate,
        .ndo_start_xmit =               pegasus_start_xmit,
        .ndo_set_rx_mode =              pegasus_set_multicast,
        .ndo_tx_timeout =               pegasus_tx_timeout,
index 7983237..60ba9b7 100644 (file)
@@ -8848,7 +8848,9 @@ out:
 }
 
 static int rtl8152_get_coalesce(struct net_device *netdev,
-                               struct ethtool_coalesce *coalesce)
+                               struct ethtool_coalesce *coalesce,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct r8152 *tp = netdev_priv(netdev);
 
@@ -8867,7 +8869,9 @@ static int rtl8152_get_coalesce(struct net_device *netdev,
 }
 
 static int rtl8152_set_coalesce(struct net_device *netdev,
-                               struct ethtool_coalesce *coalesce)
+                               struct ethtool_coalesce *coalesce,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct r8152 *tp = netdev_priv(netdev);
        int ret;
@@ -9190,7 +9194,7 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu)
 static const struct net_device_ops rtl8152_netdev_ops = {
        .ndo_open               = rtl8152_open,
        .ndo_stop               = rtl8152_close,
-       .ndo_do_ioctl           = rtl8152_ioctl,
+       .ndo_eth_ioctl          = rtl8152_ioctl,
        .ndo_start_xmit         = rtl8152_start_xmit,
        .ndo_tx_timeout         = rtl8152_tx_timeout,
        .ndo_set_features       = rtl8152_set_features,
index 7656f2a..4a1b0e0 100644 (file)
@@ -822,7 +822,8 @@ static const struct ethtool_ops ops = {
        .get_link_ksettings = rtl8150_get_link_ksettings,
 };
 
-static int rtl8150_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
+static int rtl8150_siocdevprivate(struct net_device *netdev, struct ifreq *rq,
+                                 void __user *udata, int cmd)
 {
        rtl8150_t *dev = netdev_priv(netdev);
        u16 *data = (u16 *) & rq->ifr_ifru;
@@ -850,7 +851,7 @@ static int rtl8150_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
 static const struct net_device_ops rtl8150_netdev_ops = {
        .ndo_open               = rtl8150_open,
        .ndo_stop               = rtl8150_close,
-       .ndo_do_ioctl           = rtl8150_ioctl,
+       .ndo_siocdevprivate     = rtl8150_siocdevprivate,
        .ndo_start_xmit         = rtl8150_start_xmit,
        .ndo_tx_timeout         = rtl8150_tx_timeout,
        .ndo_set_rx_mode        = rtl8150_set_multicast,
index 13141db..76f7af1 100644 (file)
@@ -1439,7 +1439,7 @@ static const struct net_device_ops smsc75xx_netdev_ops = {
        .ndo_change_mtu         = smsc75xx_change_mtu,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = smsc75xx_ioctl,
+       .ndo_eth_ioctl          = smsc75xx_ioctl,
        .ndo_set_rx_mode        = smsc75xx_set_multicast,
        .ndo_set_features       = smsc75xx_set_features,
 };
index 4c8ee1c..7d95397 100644 (file)
@@ -1044,7 +1044,7 @@ static const struct net_device_ops smsc95xx_netdev_ops = {
        .ndo_get_stats64        = dev_get_tstats64,
        .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = smsc95xx_ioctl,
+       .ndo_eth_ioctl          = smsc95xx_ioctl,
        .ndo_set_rx_mode        = smsc95xx_set_multicast,
        .ndo_set_features       = smsc95xx_set_features,
 };
index ce29261..6516a37 100644 (file)
@@ -310,7 +310,7 @@ static const struct net_device_ops sr9700_netdev_ops = {
        .ndo_change_mtu         = usbnet_change_mtu,
        .ndo_get_stats64        = dev_get_tstats64,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = sr9700_ioctl,
+       .ndo_eth_ioctl          = sr9700_ioctl,
        .ndo_set_rx_mode        = sr9700_set_multicast,
        .ndo_set_mac_address    = sr9700_set_mac_address,
 };
index a822d81..576401c 100644 (file)
@@ -684,7 +684,7 @@ static const struct net_device_ops sr9800_netdev_ops = {
        .ndo_get_stats64        = dev_get_tstats64,
        .ndo_set_mac_address    = sr_set_mac_address,
        .ndo_validate_addr      = eth_validate_addr,
-       .ndo_do_ioctl           = sr_ioctl,
+       .ndo_eth_ioctl          = sr_ioctl,
        .ndo_set_rx_mode        = sr_set_multicast,
 };
 
index 470e1c1..840c1c2 100644 (file)
@@ -1725,7 +1725,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
        dev->interrupt_count = 0;
 
        dev->net = net;
-       strcpy (net->name, "usb%d");
+       strscpy(net->name, "usb%d", sizeof(net->name));
        memcpy (net->dev_addr, node_id, sizeof node_id);
 
        /* rx and tx sides can use different message sizes;
@@ -1752,13 +1752,13 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
                if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
                    ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
                     (net->dev_addr [0] & 0x02) == 0))
-                       strcpy (net->name, "eth%d");
+                       strscpy(net->name, "eth%d", sizeof(net->name));
                /* WLAN devices should always be named "wlan%d" */
                if ((dev->driver_info->flags & FLAG_WLAN) != 0)
-                       strcpy(net->name, "wlan%d");
+                       strscpy(net->name, "wlan%d", sizeof(net->name));
                /* WWAN devices should always be named "wwan%d" */
                if ((dev->driver_info->flags & FLAG_WWAN) != 0)
-                       strcpy(net->name, "wwan%d");
+                       strscpy(net->name, "wwan%d", sizeof(net->name));
 
                /* devices that cannot do ARP */
                if ((dev->driver_info->flags & FLAG_NOARP) != 0)
index bdb7ce3..50eb43e 100644 (file)
@@ -224,12 +224,13 @@ static void veth_get_channels(struct net_device *dev,
 {
        channels->tx_count = dev->real_num_tx_queues;
        channels->rx_count = dev->real_num_rx_queues;
-       channels->max_tx = dev->real_num_tx_queues;
-       channels->max_rx = dev->real_num_rx_queues;
-       channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
-       channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
+       channels->max_tx = dev->num_tx_queues;
+       channels->max_rx = dev->num_rx_queues;
 }
 
+static int veth_set_channels(struct net_device *dev,
+                            struct ethtool_channels *ch);
+
 static const struct ethtool_ops veth_ethtool_ops = {
        .get_drvinfo            = veth_get_drvinfo,
        .get_link               = ethtool_op_get_link,
@@ -239,6 +240,7 @@ static const struct ethtool_ops veth_ethtool_ops = {
        .get_link_ksettings     = veth_get_link_ksettings,
        .get_ts_info            = ethtool_op_get_ts_info,
        .get_channels           = veth_get_channels,
+       .set_channels           = veth_set_channels,
 };
 
 /* general routines */
@@ -711,7 +713,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
        int mac_len, delta, off;
        struct xdp_buff xdp;
 
-       skb_orphan_partial(skb);
+       skb_prepare_for_gro(skb);
 
        rcu_read_lock();
        xdp_prog = rcu_dereference(rq->xdp_prog);
@@ -928,12 +930,12 @@ static int veth_poll(struct napi_struct *napi, int budget)
        return done;
 }
 
-static int __veth_napi_enable(struct net_device *dev)
+static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
 {
        struct veth_priv *priv = netdev_priv(dev);
        int err, i;
 
-       for (i = 0; i < dev->real_num_rx_queues; i++) {
+       for (i = start; i < end; i++) {
                struct veth_rq *rq = &priv->rq[i];
 
                err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
@@ -941,7 +943,7 @@ static int __veth_napi_enable(struct net_device *dev)
                        goto err_xdp_ring;
        }
 
-       for (i = 0; i < dev->real_num_rx_queues; i++) {
+       for (i = start; i < end; i++) {
                struct veth_rq *rq = &priv->rq[i];
 
                napi_enable(&rq->xdp_napi);
@@ -949,19 +951,25 @@ static int __veth_napi_enable(struct net_device *dev)
        }
 
        return 0;
+
 err_xdp_ring:
-       for (i--; i >= 0; i--)
+       for (i--; i >= start; i--)
                ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
 
        return err;
 }
 
-static void veth_napi_del(struct net_device *dev)
+static int __veth_napi_enable(struct net_device *dev)
+{
+       return __veth_napi_enable_range(dev, 0, dev->real_num_rx_queues);
+}
+
+static void veth_napi_del_range(struct net_device *dev, int start, int end)
 {
        struct veth_priv *priv = netdev_priv(dev);
        int i;
 
-       for (i = 0; i < dev->real_num_rx_queues; i++) {
+       for (i = start; i < end; i++) {
                struct veth_rq *rq = &priv->rq[i];
 
                rcu_assign_pointer(priv->rq[i].napi, NULL);
@@ -970,7 +978,7 @@ static void veth_napi_del(struct net_device *dev)
        }
        synchronize_net();
 
-       for (i = 0; i < dev->real_num_rx_queues; i++) {
+       for (i = start; i < end; i++) {
                struct veth_rq *rq = &priv->rq[i];
 
                rq->rx_notify_masked = false;
@@ -978,41 +986,90 @@ static void veth_napi_del(struct net_device *dev)
        }
 }
 
+static void veth_napi_del(struct net_device *dev)
+{
+       veth_napi_del_range(dev, 0, dev->real_num_rx_queues);
+}
+
 static bool veth_gro_requested(const struct net_device *dev)
 {
        return !!(dev->wanted_features & NETIF_F_GRO);
 }
 
-static int veth_enable_xdp(struct net_device *dev)
+static int veth_enable_xdp_range(struct net_device *dev, int start, int end,
+                                bool napi_already_on)
 {
-       bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP);
        struct veth_priv *priv = netdev_priv(dev);
        int err, i;
 
-       if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
-               for (i = 0; i < dev->real_num_rx_queues; i++) {
-                       struct veth_rq *rq = &priv->rq[i];
+       for (i = start; i < end; i++) {
+               struct veth_rq *rq = &priv->rq[i];
 
-                       if (!napi_already_on)
-                               netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
-                       err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id);
-                       if (err < 0)
-                               goto err_rxq_reg;
+               if (!napi_already_on)
+                       netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
+               err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id);
+               if (err < 0)
+                       goto err_rxq_reg;
 
-                       err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
-                                                        MEM_TYPE_PAGE_SHARED,
-                                                        NULL);
-                       if (err < 0)
-                               goto err_reg_mem;
+               err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+                                                MEM_TYPE_PAGE_SHARED,
+                                                NULL);
+               if (err < 0)
+                       goto err_reg_mem;
 
-                       /* Save original mem info as it can be overwritten */
-                       rq->xdp_mem = rq->xdp_rxq.mem;
-               }
+               /* Save original mem info as it can be overwritten */
+               rq->xdp_mem = rq->xdp_rxq.mem;
+       }
+       return 0;
+
+err_reg_mem:
+       xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
+err_rxq_reg:
+       for (i--; i >= start; i--) {
+               struct veth_rq *rq = &priv->rq[i];
+
+               xdp_rxq_info_unreg(&rq->xdp_rxq);
+               if (!napi_already_on)
+                       netif_napi_del(&rq->xdp_napi);
+       }
+
+       return err;
+}
+
+static void veth_disable_xdp_range(struct net_device *dev, int start, int end,
+                                  bool delete_napi)
+{
+       struct veth_priv *priv = netdev_priv(dev);
+       int i;
+
+       for (i = start; i < end; i++) {
+               struct veth_rq *rq = &priv->rq[i];
+
+               rq->xdp_rxq.mem = rq->xdp_mem;
+               xdp_rxq_info_unreg(&rq->xdp_rxq);
+
+               if (delete_napi)
+                       netif_napi_del(&rq->xdp_napi);
+       }
+}
+
+static int veth_enable_xdp(struct net_device *dev)
+{
+       bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP);
+       struct veth_priv *priv = netdev_priv(dev);
+       int err, i;
+
+       if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
+               err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on);
+               if (err)
+                       return err;
 
                if (!napi_already_on) {
                        err = __veth_napi_enable(dev);
-                       if (err)
-                               goto err_rxq_reg;
+                       if (err) {
+                               veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true);
+                               return err;
+                       }
 
                        if (!veth_gro_requested(dev)) {
                                /* user-space did not require GRO, but adding XDP
@@ -1030,18 +1087,6 @@ static int veth_enable_xdp(struct net_device *dev)
        }
 
        return 0;
-err_reg_mem:
-       xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
-err_rxq_reg:
-       for (i--; i >= 0; i--) {
-               struct veth_rq *rq = &priv->rq[i];
-
-               xdp_rxq_info_unreg(&rq->xdp_rxq);
-               if (!napi_already_on)
-                       netif_napi_del(&rq->xdp_napi);
-       }
-
-       return err;
 }
 
 static void veth_disable_xdp(struct net_device *dev)
@@ -1064,28 +1109,23 @@ static void veth_disable_xdp(struct net_device *dev)
                }
        }
 
-       for (i = 0; i < dev->real_num_rx_queues; i++) {
-               struct veth_rq *rq = &priv->rq[i];
-
-               rq->xdp_rxq.mem = rq->xdp_mem;
-               xdp_rxq_info_unreg(&rq->xdp_rxq);
-       }
+       veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false);
 }
 
-static int veth_napi_enable(struct net_device *dev)
+static int veth_napi_enable_range(struct net_device *dev, int start, int end)
 {
        struct veth_priv *priv = netdev_priv(dev);
        int err, i;
 
-       for (i = 0; i < dev->real_num_rx_queues; i++) {
+       for (i = start; i < end; i++) {
                struct veth_rq *rq = &priv->rq[i];
 
                netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
        }
 
-       err = __veth_napi_enable(dev);
+       err = __veth_napi_enable_range(dev, start, end);
        if (err) {
-               for (i = 0; i < dev->real_num_rx_queues; i++) {
+               for (i = start; i < end; i++) {
                        struct veth_rq *rq = &priv->rq[i];
 
                        netif_napi_del(&rq->xdp_napi);
@@ -1095,6 +1135,128 @@ static int veth_napi_enable(struct net_device *dev)
        return err;
 }
 
+static int veth_napi_enable(struct net_device *dev)
+{
+       return veth_napi_enable_range(dev, 0, dev->real_num_rx_queues);
+}
+
+static void veth_disable_range_safe(struct net_device *dev, int start, int end)
+{
+       struct veth_priv *priv = netdev_priv(dev);
+
+       if (start >= end)
+               return;
+
+       if (priv->_xdp_prog) {
+               veth_napi_del_range(dev, start, end);
+               veth_disable_xdp_range(dev, start, end, false);
+       } else if (veth_gro_requested(dev)) {
+               veth_napi_del_range(dev, start, end);
+       }
+}
+
+static int veth_enable_range_safe(struct net_device *dev, int start, int end)
+{
+       struct veth_priv *priv = netdev_priv(dev);
+       int err;
+
+       if (start >= end)
+               return 0;
+
+       if (priv->_xdp_prog) {
+               /* these channels are freshly initialized, napi is not on there even
+                * when GRO is requeste
+                */
+               err = veth_enable_xdp_range(dev, start, end, false);
+               if (err)
+                       return err;
+
+               err = __veth_napi_enable_range(dev, start, end);
+               if (err) {
+                       /* on error always delete the newly added napis */
+                       veth_disable_xdp_range(dev, start, end, true);
+                       return err;
+               }
+       } else if (veth_gro_requested(dev)) {
+               return veth_napi_enable_range(dev, start, end);
+       }
+       return 0;
+}
+
+static int veth_set_channels(struct net_device *dev,
+                            struct ethtool_channels *ch)
+{
+       struct veth_priv *priv = netdev_priv(dev);
+       unsigned int old_rx_count, new_rx_count;
+       struct veth_priv *peer_priv;
+       struct net_device *peer;
+       int err;
+
+       /* sanity check. Upper bounds are already enforced by the caller */
+       if (!ch->rx_count || !ch->tx_count)
+               return -EINVAL;
+
+       /* avoid braking XDP, if that is enabled */
+       peer = rtnl_dereference(priv->peer);
+       peer_priv = peer ? netdev_priv(peer) : NULL;
+       if (priv->_xdp_prog && peer && ch->rx_count < peer->real_num_tx_queues)
+               return -EINVAL;
+
+       if (peer && peer_priv && peer_priv->_xdp_prog && ch->tx_count > peer->real_num_rx_queues)
+               return -EINVAL;
+
+       old_rx_count = dev->real_num_rx_queues;
+       new_rx_count = ch->rx_count;
+       if (netif_running(dev)) {
+               /* turn device off */
+               netif_carrier_off(dev);
+               if (peer)
+                       netif_carrier_off(peer);
+
+               /* try to allocate new resurces, as needed*/
+               err = veth_enable_range_safe(dev, old_rx_count, new_rx_count);
+               if (err)
+                       goto out;
+       }
+
+       err = netif_set_real_num_rx_queues(dev, ch->rx_count);
+       if (err)
+               goto revert;
+
+       err = netif_set_real_num_tx_queues(dev, ch->tx_count);
+       if (err) {
+               int err2 = netif_set_real_num_rx_queues(dev, old_rx_count);
+
+               /* this error condition could happen only if rx and tx change
+                * in opposite directions (e.g. tx nr raises, rx nr decreases)
+                * and we can't do anything to fully restore the original
+                * status
+                */
+               if (err2)
+                       pr_warn("Can't restore rx queues config %d -> %d %d",
+                               new_rx_count, old_rx_count, err2);
+               else
+                       goto revert;
+       }
+
+out:
+       if (netif_running(dev)) {
+               /* note that we need to swap the arguments WRT the enable part
+                * to identify the range we have to disable
+                */
+               veth_disable_range_safe(dev, new_rx_count, old_rx_count);
+               netif_carrier_on(dev);
+               if (peer)
+                       netif_carrier_on(peer);
+       }
+       return err;
+
+revert:
+       new_rx_count = old_rx_count;
+       old_rx_count = ch->rx_count;
+       goto out;
+}
+
 static int veth_open(struct net_device *dev)
 {
        struct veth_priv *priv = netdev_priv(dev);
@@ -1447,6 +1609,23 @@ static void veth_disable_gro(struct net_device *dev)
        netdev_update_features(dev);
 }
 
+static int veth_init_queues(struct net_device *dev, struct nlattr *tb[])
+{
+       int err;
+
+       if (!tb[IFLA_NUM_TX_QUEUES] && dev->num_tx_queues > 1) {
+               err = netif_set_real_num_tx_queues(dev, 1);
+               if (err)
+                       return err;
+       }
+       if (!tb[IFLA_NUM_RX_QUEUES] && dev->num_rx_queues > 1) {
+               err = netif_set_real_num_rx_queues(dev, 1);
+               if (err)
+                       return err;
+       }
+       return 0;
+}
+
 static int veth_newlink(struct net *src_net, struct net_device *dev,
                        struct nlattr *tb[], struct nlattr *data[],
                        struct netlink_ext_ack *extack)
@@ -1556,13 +1735,21 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 
        priv = netdev_priv(dev);
        rcu_assign_pointer(priv->peer, peer);
+       err = veth_init_queues(dev, tb);
+       if (err)
+               goto err_queues;
 
        priv = netdev_priv(peer);
        rcu_assign_pointer(priv->peer, dev);
+       err = veth_init_queues(peer, tb);
+       if (err)
+               goto err_queues;
 
        veth_disable_gro(dev);
        return 0;
 
+err_queues:
+       unregister_netdevice(dev);
 err_register_dev:
        /* nothing to do */
 err_configure_peer:
@@ -1608,6 +1795,16 @@ static struct net *veth_get_link_net(const struct net_device *dev)
        return peer ? dev_net(peer) : dev_net(dev);
 }
 
+static unsigned int veth_get_num_queues(void)
+{
+       /* enforce the same queue limit as rtnl_create_link */
+       int queues = num_possible_cpus();
+
+       if (queues > 4096)
+               queues = 4096;
+       return queues;
+}
+
 static struct rtnl_link_ops veth_link_ops = {
        .kind           = DRV_NAME,
        .priv_size      = sizeof(struct veth_priv),
@@ -1618,6 +1815,8 @@ static struct rtnl_link_ops veth_link_ops = {
        .policy         = veth_policy,
        .maxtype        = VETH_INFO_MAX,
        .get_link_net   = veth_get_link_net,
+       .get_num_tx_queues      = veth_get_num_queues,
+       .get_num_rx_queues      = veth_get_num_queues,
 };
 
 /*
index eee4936..271d38c 100644 (file)
@@ -380,7 +380,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
                                   struct page *page, unsigned int offset,
                                   unsigned int len, unsigned int truesize,
                                   bool hdr_valid, unsigned int metasize,
-                                  bool whole_page)
+                                  unsigned int headroom)
 {
        struct sk_buff *skb;
        struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -398,28 +398,16 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
        else
                hdr_padded_len = sizeof(struct padded_vnet_hdr);
 
-       /* If whole_page, there is an offset between the beginning of the
+       /* If headroom is not 0, there is an offset between the beginning of the
         * data and the allocated space, otherwise the data and the allocated
         * space are aligned.
         *
         * Buffers with headroom use PAGE_SIZE as alloc size, see
         * add_recvbuf_mergeable() + get_mergeable_buf_len()
         */
-       if (whole_page) {
-               /* Buffers with whole_page use PAGE_SIZE as alloc size,
-                * see add_recvbuf_mergeable() + get_mergeable_buf_len()
-                */
-               truesize = PAGE_SIZE;
-
-               /* page maybe head page, so we should get the buf by p, not the
-                * page
-                */
-               tailroom = truesize - len - offset_in_page(p);
-               buf = (char *)((unsigned long)p & PAGE_MASK);
-       } else {
-               tailroom = truesize - len;
-               buf = p;
-       }
+       truesize = headroom ? PAGE_SIZE : truesize;
+       tailroom = truesize - len - headroom;
+       buf = p - headroom;
 
        len -= hdr_len;
        offset += hdr_padded_len;
@@ -540,19 +528,20 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
  * functions to perfectly solve these three problems at the same time.
  */
 #define virtnet_xdp_get_sq(vi) ({                                       \
+       int cpu = smp_processor_id();                                   \
        struct netdev_queue *txq;                                       \
        typeof(vi) v = (vi);                                            \
        unsigned int qp;                                                \
                                                                        \
        if (v->curr_queue_pairs > nr_cpu_ids) {                         \
                qp = v->curr_queue_pairs - v->xdp_queue_pairs;          \
-               qp += smp_processor_id();                               \
+               qp += cpu;                                              \
                txq = netdev_get_tx_queue(v->dev, qp);                  \
                __netif_tx_acquire(txq);                                \
        } else {                                                        \
-               qp = smp_processor_id() % v->curr_queue_pairs;          \
+               qp = cpu % v->curr_queue_pairs;                         \
                txq = netdev_get_tx_queue(v->dev, qp);                  \
-               __netif_tx_lock(txq, raw_smp_processor_id());           \
+               __netif_tx_lock(txq, cpu);                              \
        }                                                               \
        v->sq + qp;                                                     \
 })
@@ -978,7 +967,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                                put_page(page);
                                head_skb = page_to_skb(vi, rq, xdp_page, offset,
                                                       len, PAGE_SIZE, false,
-                                                      metasize, true);
+                                                      metasize,
+                                                      VIRTIO_XDP_HEADROOM);
                                return head_skb;
                        }
                        break;
@@ -1029,7 +1019,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
        rcu_read_unlock();
 
        head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
-                              metasize, !!headroom);
+                              metasize, headroom);
        curr_skb = head_skb;
 
        if (unlikely(!curr_skb))
@@ -2208,14 +2198,14 @@ static int virtnet_set_channels(struct net_device *dev,
        if (vi->rq[0].xdp_prog)
                return -EINVAL;
 
-       get_online_cpus();
+       cpus_read_lock();
        err = _virtnet_set_queues(vi, queue_pairs);
        if (err) {
-               put_online_cpus();
+               cpus_read_unlock();
                goto err;
        }
        virtnet_set_affinity(vi);
-       put_online_cpus();
+       cpus_read_unlock();
 
        netif_set_real_num_tx_queues(dev, queue_pairs);
        netif_set_real_num_rx_queues(dev, queue_pairs);
@@ -2331,7 +2321,9 @@ static int virtnet_get_link_ksettings(struct net_device *dev,
 }
 
 static int virtnet_set_coalesce(struct net_device *dev,
-                               struct ethtool_coalesce *ec)
+                               struct ethtool_coalesce *ec,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct virtnet_info *vi = netdev_priv(dev);
        int i, napi_weight;
@@ -2352,7 +2344,9 @@ static int virtnet_set_coalesce(struct net_device *dev,
 }
 
 static int virtnet_get_coalesce(struct net_device *dev,
-                               struct ethtool_coalesce *ec)
+                               struct ethtool_coalesce *ec,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct ethtool_coalesce ec_default = {
                .cmd = ETHTOOL_GCOALESCE,
@@ -2970,9 +2964,9 @@ static int init_vqs(struct virtnet_info *vi)
        if (ret)
                goto err_free;
 
-       get_online_cpus();
+       cpus_read_lock();
        virtnet_set_affinity(vi);
-       put_online_cpus();
+       cpus_read_unlock();
 
        return 0;
 
index c5a167a..7a38925 100644 (file)
@@ -2,7 +2,7 @@
 #
 # Linux driver for VMware's vmxnet3 ethernet NIC.
 #
-# Copyright (C) 2007-2020, VMware, Inc. All Rights Reserved.
+# Copyright (C) 2007-2021, VMware, Inc. All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
index 8c014c9..f9f3a23 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
index a8d5ebd..74d4e8b 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -98,6 +98,9 @@ enum {
        VMXNET3_CMD_GET_TXDATA_DESC_SIZE,
        VMXNET3_CMD_GET_COALESCE,
        VMXNET3_CMD_GET_RSS_FIELDS,
+       VMXNET3_CMD_GET_RESERVED2,
+       VMXNET3_CMD_GET_RESERVED3,
+       VMXNET3_CMD_GET_MAX_QUEUES_CONF,
 };
 
 /*
@@ -341,13 +344,15 @@ struct Vmxnet3_RxCompDescExt {
 #define VMXNET3_TXD_EOP_SIZE 1
 
 /* value of RxCompDesc.rssType */
-enum {
-       VMXNET3_RCD_RSS_TYPE_NONE     = 0,
-       VMXNET3_RCD_RSS_TYPE_IPV4     = 1,
-       VMXNET3_RCD_RSS_TYPE_TCPIPV4  = 2,
-       VMXNET3_RCD_RSS_TYPE_IPV6     = 3,
-       VMXNET3_RCD_RSS_TYPE_TCPIPV6  = 4,
-};
+#define VMXNET3_RCD_RSS_TYPE_NONE     0
+#define VMXNET3_RCD_RSS_TYPE_IPV4     1
+#define VMXNET3_RCD_RSS_TYPE_TCPIPV4  2
+#define VMXNET3_RCD_RSS_TYPE_IPV6     3
+#define VMXNET3_RCD_RSS_TYPE_TCPIPV6  4
+#define VMXNET3_RCD_RSS_TYPE_UDPIPV4  5
+#define VMXNET3_RCD_RSS_TYPE_UDPIPV6  6
+#define VMXNET3_RCD_RSS_TYPE_ESPIPV4  7
+#define VMXNET3_RCD_RSS_TYPE_ESPIPV6  8
 
 
 /* a union for accessing all cmd/completion descriptors */
@@ -533,6 +538,13 @@ enum vmxnet3_intr_type {
 /* addition 1 for events */
 #define VMXNET3_MAX_INTRS      25
 
+/* Version 6 and later will use below macros */
+#define VMXNET3_EXT_MAX_TX_QUEUES  32
+#define VMXNET3_EXT_MAX_RX_QUEUES  32
+/* addition 1 for events */
+#define VMXNET3_EXT_MAX_INTRS      65
+#define VMXNET3_FIRST_SET_INTRS    64
+
 /* value of intrCtrl */
 #define VMXNET3_IC_DISABLE_ALL  0x1   /* bit 0 */
 
@@ -547,6 +559,19 @@ struct Vmxnet3_IntrConf {
        __le32          reserved[2];
 };
 
+struct Vmxnet3_IntrConfExt {
+       u8              autoMask;
+       u8              numIntrs;      /* # of interrupts */
+       u8              eventIntrIdx;
+       u8              reserved;
+       __le32          intrCtrl;
+       __le32          reserved1;
+       u8              modLevels[VMXNET3_EXT_MAX_INTRS]; /* moderation level for
+                                                          * each intr
+                                                          */
+       u8              reserved2[3];
+};
+
 /* one bit per VLAN ID, the size is in the units of u32        */
 #define VMXNET3_VFT_SIZE  (4096 / (sizeof(u32) * 8))
 
@@ -719,11 +744,16 @@ struct Vmxnet3_DSDevRead {
        struct Vmxnet3_VariableLenConfDesc      pluginConfDesc;
 };
 
+struct Vmxnet3_DSDevReadExt {
+       /* read-only region for device, read by dev in response to a SET cmd */
+       struct Vmxnet3_IntrConfExt              intrConfExt;
+};
+
 /* All structures in DriverShared are padded to multiples of 8 bytes */
 struct Vmxnet3_DriverShared {
        __le32                          magic;
        /* make devRead start at 64bit boundaries */
-       __le32                          pad;
+       __le32                          size; /* size of DriverShared */
        struct Vmxnet3_DSDevRead        devRead;
        __le32                          ecr;
        __le32                          reserved;
@@ -734,6 +764,7 @@ struct Vmxnet3_DriverShared {
                                                  * command
                                                  */
        } cu;
+       struct Vmxnet3_DSDevReadExt     devReadExt;
 };
 
 
@@ -764,6 +795,7 @@ struct Vmxnet3_DriverShared {
        ((vfTable[vid >> 5] & (1 << (vid & 31))) != 0)
 
 #define VMXNET3_MAX_MTU     9000
+#define VMXNET3_V6_MAX_MTU  9190
 #define VMXNET3_MIN_MTU     60
 
 #define VMXNET3_LINK_UP         (10000 << 16 | 1)    /* 10 Gbps, up */
index 6e87f1f..142f706 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -314,10 +314,10 @@ vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi,
 {
        if (tbi->map_type == VMXNET3_MAP_SINGLE)
                dma_unmap_single(&pdev->dev, tbi->dma_addr, tbi->len,
-                                PCI_DMA_TODEVICE);
+                                DMA_TO_DEVICE);
        else if (tbi->map_type == VMXNET3_MAP_PAGE)
                dma_unmap_page(&pdev->dev, tbi->dma_addr, tbi->len,
-                              PCI_DMA_TODEVICE);
+                              DMA_TO_DEVICE);
        else
                BUG_ON(tbi->map_type != VMXNET3_MAP_NONE);
 
@@ -585,7 +585,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
                                rbi->dma_addr = dma_map_single(
                                                &adapter->pdev->dev,
                                                rbi->skb->data, rbi->len,
-                                               PCI_DMA_FROMDEVICE);
+                                               DMA_FROM_DEVICE);
                                if (dma_mapping_error(&adapter->pdev->dev,
                                                      rbi->dma_addr)) {
                                        dev_kfree_skb_any(rbi->skb);
@@ -609,7 +609,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
                                rbi->dma_addr = dma_map_page(
                                                &adapter->pdev->dev,
                                                rbi->page, 0, PAGE_SIZE,
-                                               PCI_DMA_FROMDEVICE);
+                                               DMA_FROM_DEVICE);
                                if (dma_mapping_error(&adapter->pdev->dev,
                                                      rbi->dma_addr)) {
                                        put_page(rbi->page);
@@ -723,7 +723,7 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
                tbi->map_type = VMXNET3_MAP_SINGLE;
                tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
                                skb->data + buf_offset, buf_size,
-                               PCI_DMA_TODEVICE);
+                               DMA_TO_DEVICE);
                if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
                        return -EFAULT;
 
@@ -1449,7 +1449,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                                new_dma_addr =
                                        dma_map_single(&adapter->pdev->dev,
                                                       new_skb->data, rbi->len,
-                                                      PCI_DMA_FROMDEVICE);
+                                                      DMA_FROM_DEVICE);
                                if (dma_mapping_error(&adapter->pdev->dev,
                                                      new_dma_addr)) {
                                        dev_kfree_skb(new_skb);
@@ -1467,7 +1467,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                                dma_unmap_single(&adapter->pdev->dev,
                                                 rbi->dma_addr,
                                                 rbi->len,
-                                                PCI_DMA_FROMDEVICE);
+                                                DMA_FROM_DEVICE);
 
                                /* Immediate refill */
                                rbi->skb = new_skb;
@@ -1478,10 +1478,28 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 
 #ifdef VMXNET3_RSS
                        if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE &&
-                           (adapter->netdev->features & NETIF_F_RXHASH))
+                           (adapter->netdev->features & NETIF_F_RXHASH)) {
+                               enum pkt_hash_types hash_type;
+
+                               switch (rcd->rssType) {
+                               case VMXNET3_RCD_RSS_TYPE_IPV4:
+                               case VMXNET3_RCD_RSS_TYPE_IPV6:
+                                       hash_type = PKT_HASH_TYPE_L3;
+                                       break;
+                               case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
+                               case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
+                               case VMXNET3_RCD_RSS_TYPE_UDPIPV4:
+                               case VMXNET3_RCD_RSS_TYPE_UDPIPV6:
+                                       hash_type = PKT_HASH_TYPE_L4;
+                                       break;
+                               default:
+                                       hash_type = PKT_HASH_TYPE_L3;
+                                       break;
+                               }
                                skb_set_hash(ctx->skb,
                                             le32_to_cpu(rcd->rssHash),
-                                            PKT_HASH_TYPE_L3);
+                                            hash_type);
+                       }
 #endif
                        skb_put(ctx->skb, rcd->len);
 
@@ -1528,7 +1546,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                                new_dma_addr = dma_map_page(&adapter->pdev->dev,
                                                            new_page,
                                                            0, PAGE_SIZE,
-                                                           PCI_DMA_FROMDEVICE);
+                                                           DMA_FROM_DEVICE);
                                if (dma_mapping_error(&adapter->pdev->dev,
                                                      new_dma_addr)) {
                                        put_page(new_page);
@@ -1541,7 +1559,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 
                                dma_unmap_page(&adapter->pdev->dev,
                                               rbi->dma_addr, rbi->len,
-                                              PCI_DMA_FROMDEVICE);
+                                              DMA_FROM_DEVICE);
 
                                vmxnet3_append_frag(ctx->skb, rcd, rbi);
 
@@ -1659,13 +1677,13 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
                        if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD &&
                                        rq->buf_info[ring_idx][i].skb) {
                                dma_unmap_single(&adapter->pdev->dev, rxd->addr,
-                                                rxd->len, PCI_DMA_FROMDEVICE);
+                                                rxd->len, DMA_FROM_DEVICE);
                                dev_kfree_skb(rq->buf_info[ring_idx][i].skb);
                                rq->buf_info[ring_idx][i].skb = NULL;
                        } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY &&
                                        rq->buf_info[ring_idx][i].page) {
                                dma_unmap_page(&adapter->pdev->dev, rxd->addr,
-                                              rxd->len, PCI_DMA_FROMDEVICE);
+                                              rxd->len, DMA_FROM_DEVICE);
                                put_page(rq->buf_info[ring_idx][i].page);
                                rq->buf_info[ring_idx][i].page = NULL;
                        }
@@ -2401,7 +2419,7 @@ vmxnet3_set_mc(struct net_device *netdev)
                                                        &adapter->pdev->dev,
                                                        new_table,
                                                        sz,
-                                                       PCI_DMA_TODEVICE);
+                                                       DMA_TO_DEVICE);
                                if (!dma_mapping_error(&adapter->pdev->dev,
                                                       new_table_pa)) {
                                        new_mode |= VMXNET3_RXM_MCAST;
@@ -2437,7 +2455,7 @@ vmxnet3_set_mc(struct net_device *netdev)
 
        if (new_table_pa_valid)
                dma_unmap_single(&adapter->pdev->dev, new_table_pa,
-                                rxConf->mfTableLen, PCI_DMA_TODEVICE);
+                                rxConf->mfTableLen, DMA_TO_DEVICE);
        kfree(new_table);
 }
 
@@ -2460,6 +2478,7 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
 {
        struct Vmxnet3_DriverShared *shared = adapter->shared;
        struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
+       struct Vmxnet3_DSDevReadExt *devReadExt = &shared->devReadExt;
        struct Vmxnet3_TxQueueConf *tqc;
        struct Vmxnet3_RxQueueConf *rqc;
        int i;
@@ -2572,14 +2591,26 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
 #endif /* VMXNET3_RSS */
 
        /* intr settings */
-       devRead->intrConf.autoMask = adapter->intr.mask_mode ==
-                                    VMXNET3_IMM_AUTO;
-       devRead->intrConf.numIntrs = adapter->intr.num_intrs;
-       for (i = 0; i < adapter->intr.num_intrs; i++)
-               devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
+       if (!VMXNET3_VERSION_GE_6(adapter) ||
+           !adapter->queuesExtEnabled) {
+               devRead->intrConf.autoMask = adapter->intr.mask_mode ==
+                                            VMXNET3_IMM_AUTO;
+               devRead->intrConf.numIntrs = adapter->intr.num_intrs;
+               for (i = 0; i < adapter->intr.num_intrs; i++)
+                       devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
+
+               devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
+               devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
+       } else {
+               devReadExt->intrConfExt.autoMask = adapter->intr.mask_mode ==
+                                                  VMXNET3_IMM_AUTO;
+               devReadExt->intrConfExt.numIntrs = adapter->intr.num_intrs;
+               for (i = 0; i < adapter->intr.num_intrs; i++)
+                       devReadExt->intrConfExt.modLevels[i] = adapter->intr.mod_levels[i];
 
-       devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
-       devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
+               devReadExt->intrConfExt.eventIntrIdx = adapter->intr.event_intr_idx;
+               devReadExt->intrConfExt.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
+       }
 
        /* rx filter settings */
        devRead->rxFilterConf.rxMode = 0;
@@ -2717,6 +2748,7 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
         * tx queue if the link is up.
         */
        vmxnet3_check_link(adapter, true);
+       netif_tx_wake_all_queues(adapter->netdev);
        for (i = 0; i < adapter->num_rx_queues; i++)
                napi_enable(&adapter->rx_queue[i].napi);
        vmxnet3_enable_all_intrs(adapter);
@@ -3372,6 +3404,8 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        int size;
        int num_tx_queues;
        int num_rx_queues;
+       int queues;
+       unsigned long flags;
 
        if (!pci_msi_enabled())
                enable_mq = 0;
@@ -3383,7 +3417,6 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        else
 #endif
                num_rx_queues = 1;
-       num_rx_queues = rounddown_pow_of_two(num_rx_queues);
 
        if (enable_mq)
                num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
@@ -3391,13 +3424,8 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        else
                num_tx_queues = 1;
 
-       num_tx_queues = rounddown_pow_of_two(num_tx_queues);
        netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
                                   max(num_tx_queues, num_rx_queues));
-       dev_info(&pdev->dev,
-                "# of Tx queues : %d, # of Rx queues : %d\n",
-                num_tx_queues, num_rx_queues);
-
        if (!netdev)
                return -ENOMEM;
 
@@ -3410,19 +3438,12 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE;
        adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE;
 
-       if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
-               if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) {
-                       dev_err(&pdev->dev,
-                               "pci_set_consistent_dma_mask failed\n");
-                       err = -EIO;
-                       goto err_set_mask;
-               }
+       if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) == 0) {
                dma64 = true;
        } else {
-               if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) {
-                       dev_err(&pdev->dev,
-                               "pci_set_dma_mask failed\n");
-                       err = -EIO;
+               err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+               if (err) {
+                       dev_err(&pdev->dev, "dma_set_mask failed\n");
                        goto err_set_mask;
                }
                dma64 = false;
@@ -3431,7 +3452,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        spin_lock_init(&adapter->cmd_lock);
        adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
                                             sizeof(struct vmxnet3_adapter),
-                                            PCI_DMA_TODEVICE);
+                                            DMA_TO_DEVICE);
        if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
                dev_err(&pdev->dev, "Failed to map dma\n");
                err = -EFAULT;
@@ -3447,51 +3468,22 @@ vmxnet3_probe_device(struct pci_dev *pdev,
                goto err_alloc_shared;
        }
 
-       adapter->num_rx_queues = num_rx_queues;
-       adapter->num_tx_queues = num_tx_queues;
-       adapter->rx_buf_per_pkt = 1;
-
-       size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
-       size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
-       adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
-                                               &adapter->queue_desc_pa,
-                                               GFP_KERNEL);
-
-       if (!adapter->tqd_start) {
-               dev_err(&pdev->dev, "Failed to allocate memory\n");
-               err = -ENOMEM;
-               goto err_alloc_queue_desc;
-       }
-       adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
-                                                           adapter->num_tx_queues);
-
-       adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
-                                             sizeof(struct Vmxnet3_PMConf),
-                                             &adapter->pm_conf_pa,
-                                             GFP_KERNEL);
-       if (adapter->pm_conf == NULL) {
-               err = -ENOMEM;
-               goto err_alloc_pm;
-       }
-
-#ifdef VMXNET3_RSS
-
-       adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
-                                              sizeof(struct UPT1_RSSConf),
-                                              &adapter->rss_conf_pa,
-                                              GFP_KERNEL);
-       if (adapter->rss_conf == NULL) {
-               err = -ENOMEM;
-               goto err_alloc_rss;
-       }
-#endif /* VMXNET3_RSS */
-
        err = vmxnet3_alloc_pci_resources(adapter);
        if (err < 0)
                goto err_alloc_pci;
 
        ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
-       if (ver & (1 << VMXNET3_REV_4)) {
+       if (ver & (1 << VMXNET3_REV_6)) {
+               VMXNET3_WRITE_BAR1_REG(adapter,
+                                      VMXNET3_REG_VRRS,
+                                      1 << VMXNET3_REV_6);
+               adapter->version = VMXNET3_REV_6 + 1;
+       } else if (ver & (1 << VMXNET3_REV_5)) {
+               VMXNET3_WRITE_BAR1_REG(adapter,
+                                      VMXNET3_REG_VRRS,
+                                      1 << VMXNET3_REV_5);
+               adapter->version = VMXNET3_REV_5 + 1;
+       } else if (ver & (1 << VMXNET3_REV_4)) {
                VMXNET3_WRITE_BAR1_REG(adapter,
                                       VMXNET3_REG_VRRS,
                                       1 << VMXNET3_REV_4);
@@ -3529,6 +3521,77 @@ vmxnet3_probe_device(struct pci_dev *pdev,
                goto err_ver;
        }
 
+       if (VMXNET3_VERSION_GE_6(adapter)) {
+               spin_lock_irqsave(&adapter->cmd_lock, flags);
+               VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+                                      VMXNET3_CMD_GET_MAX_QUEUES_CONF);
+               queues = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
+               spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+               if (queues > 0) {
+                       adapter->num_rx_queues = min(num_rx_queues, ((queues >> 8) & 0xff));
+                       adapter->num_tx_queues = min(num_tx_queues, (queues & 0xff));
+               } else {
+                       adapter->num_rx_queues = min(num_rx_queues,
+                                                    VMXNET3_DEVICE_DEFAULT_RX_QUEUES);
+                       adapter->num_tx_queues = min(num_tx_queues,
+                                                    VMXNET3_DEVICE_DEFAULT_TX_QUEUES);
+               }
+               if (adapter->num_rx_queues > VMXNET3_MAX_RX_QUEUES ||
+                   adapter->num_tx_queues > VMXNET3_MAX_TX_QUEUES) {
+                       adapter->queuesExtEnabled = true;
+               } else {
+                       adapter->queuesExtEnabled = false;
+               }
+       } else {
+               adapter->queuesExtEnabled = false;
+               num_rx_queues = rounddown_pow_of_two(num_rx_queues);
+               num_tx_queues = rounddown_pow_of_two(num_tx_queues);
+               adapter->num_rx_queues = min(num_rx_queues,
+                                            VMXNET3_DEVICE_DEFAULT_RX_QUEUES);
+               adapter->num_tx_queues = min(num_tx_queues,
+                                            VMXNET3_DEVICE_DEFAULT_TX_QUEUES);
+       }
+       dev_info(&pdev->dev,
+                "# of Tx queues : %d, # of Rx queues : %d\n",
+                adapter->num_tx_queues, adapter->num_rx_queues);
+
+       adapter->rx_buf_per_pkt = 1;
+
+       size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
+       size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
+       adapter->tqd_start = dma_alloc_coherent(&adapter->pdev->dev, size,
+                                               &adapter->queue_desc_pa,
+                                               GFP_KERNEL);
+
+       if (!adapter->tqd_start) {
+               dev_err(&pdev->dev, "Failed to allocate memory\n");
+               err = -ENOMEM;
+               goto err_ver;
+       }
+       adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
+                                                           adapter->num_tx_queues);
+
+       adapter->pm_conf = dma_alloc_coherent(&adapter->pdev->dev,
+                                             sizeof(struct Vmxnet3_PMConf),
+                                             &adapter->pm_conf_pa,
+                                             GFP_KERNEL);
+       if (adapter->pm_conf == NULL) {
+               err = -ENOMEM;
+               goto err_alloc_pm;
+       }
+
+#ifdef VMXNET3_RSS
+
+       adapter->rss_conf = dma_alloc_coherent(&adapter->pdev->dev,
+                                              sizeof(struct UPT1_RSSConf),
+                                              &adapter->rss_conf_pa,
+                                              GFP_KERNEL);
+       if (adapter->rss_conf == NULL) {
+               err = -ENOMEM;
+               goto err_alloc_rss;
+       }
+#endif /* VMXNET3_RSS */
+
        if (VMXNET3_VERSION_GE_3(adapter)) {
                adapter->coal_conf =
                        dma_alloc_coherent(&adapter->pdev->dev,
@@ -3538,7 +3601,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
                                           GFP_KERNEL);
                if (!adapter->coal_conf) {
                        err = -ENOMEM;
-                       goto err_ver;
+                       goto err_coal_conf;
                }
                adapter->coal_conf->coalMode = VMXNET3_COALESCE_DISABLED;
                adapter->default_coal_mode = true;
@@ -3581,9 +3644,12 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        vmxnet3_set_ethtool_ops(netdev);
        netdev->watchdog_timeo = 5 * HZ;
 
-       /* MTU range: 60 - 9000 */
+       /* MTU range: 60 - 9190 */
        netdev->min_mtu = VMXNET3_MIN_MTU;
-       netdev->max_mtu = VMXNET3_MAX_MTU;
+       if (VMXNET3_VERSION_GE_6(adapter))
+               netdev->max_mtu = VMXNET3_V6_MAX_MTU;
+       else
+               netdev->max_mtu = VMXNET3_MAX_MTU;
 
        INIT_WORK(&adapter->work, vmxnet3_reset_work);
        set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
@@ -3621,9 +3687,7 @@ err_register:
                                  adapter->coal_conf, adapter->coal_conf_pa);
        }
        vmxnet3_free_intr_resources(adapter);
-err_ver:
-       vmxnet3_free_pci_resources(adapter);
-err_alloc_pci:
+err_coal_conf:
 #ifdef VMXNET3_RSS
        dma_free_coherent(&adapter->pdev->dev, sizeof(struct UPT1_RSSConf),
                          adapter->rss_conf, adapter->rss_conf_pa);
@@ -3634,13 +3698,15 @@ err_alloc_rss:
 err_alloc_pm:
        dma_free_coherent(&adapter->pdev->dev, size, adapter->tqd_start,
                          adapter->queue_desc_pa);
-err_alloc_queue_desc:
+err_ver:
+       vmxnet3_free_pci_resources(adapter);
+err_alloc_pci:
        dma_free_coherent(&adapter->pdev->dev,
                          sizeof(struct Vmxnet3_DriverShared),
                          adapter->shared, adapter->shared_pa);
 err_alloc_shared:
        dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
-                        sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
+                        sizeof(struct vmxnet3_adapter), DMA_TO_DEVICE);
 err_set_mask:
        free_netdev(netdev);
        return err;
@@ -3653,7 +3719,8 @@ vmxnet3_remove_device(struct pci_dev *pdev)
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
        int size = 0;
-       int num_rx_queues;
+       int num_rx_queues, rx_queues;
+       unsigned long flags;
 
 #ifdef VMXNET3_RSS
        if (enable_mq)
@@ -3662,7 +3729,24 @@ vmxnet3_remove_device(struct pci_dev *pdev)
        else
 #endif
                num_rx_queues = 1;
-       num_rx_queues = rounddown_pow_of_two(num_rx_queues);
+       if (!VMXNET3_VERSION_GE_6(adapter)) {
+               num_rx_queues = rounddown_pow_of_two(num_rx_queues);
+       }
+       if (VMXNET3_VERSION_GE_6(adapter)) {
+               spin_lock_irqsave(&adapter->cmd_lock, flags);
+               VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+                                      VMXNET3_CMD_GET_MAX_QUEUES_CONF);
+               rx_queues = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
+               spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+               if (rx_queues > 0)
+                       rx_queues = (rx_queues >> 8) & 0xff;
+               else
+                       rx_queues = min(num_rx_queues, VMXNET3_DEVICE_DEFAULT_RX_QUEUES);
+               num_rx_queues = min(num_rx_queues, rx_queues);
+       } else {
+               num_rx_queues = min(num_rx_queues,
+                                   VMXNET3_DEVICE_DEFAULT_RX_QUEUES);
+       }
 
        cancel_work_sync(&adapter->work);
 
@@ -3690,7 +3774,7 @@ vmxnet3_remove_device(struct pci_dev *pdev)
                          sizeof(struct Vmxnet3_DriverShared),
                          adapter->shared, adapter->shared_pa);
        dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
-                        sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
+                        sizeof(struct vmxnet3_adapter), DMA_TO_DEVICE);
        free_netdev(netdev);
 }
 
index 1b483cf..5dd8360 100644 (file)
@@ -787,6 +787,10 @@ vmxnet3_get_rss_hash_opts(struct vmxnet3_adapter *adapter,
        case AH_ESP_V6_FLOW:
        case AH_V6_FLOW:
        case ESP_V6_FLOW:
+               if (VMXNET3_VERSION_GE_6(adapter) &&
+                   (rss_fields & VMXNET3_RSS_FIELDS_ESPIP6))
+                       info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+               fallthrough;
        case SCTP_V6_FLOW:
        case IPV6_FLOW:
                info->data |= RXH_IP_SRC | RXH_IP_DST;
@@ -871,6 +875,22 @@ vmxnet3_set_rss_hash_opt(struct net_device *netdev,
        case ESP_V6_FLOW:
        case AH_V6_FLOW:
        case AH_ESP_V6_FLOW:
+               if (!VMXNET3_VERSION_GE_6(adapter))
+                       return -EOPNOTSUPP;
+               if (!(nfc->data & RXH_IP_SRC) ||
+                   !(nfc->data & RXH_IP_DST))
+                       return -EINVAL;
+               switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+               case 0:
+                       rss_fields &= ~VMXNET3_RSS_FIELDS_ESPIP6;
+                       break;
+               case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+                       rss_fields |= VMXNET3_RSS_FIELDS_ESPIP6;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               break;
        case SCTP_V4_FLOW:
        case SCTP_V6_FLOW:
                if (!(nfc->data & RXH_IP_SRC) ||
@@ -1033,8 +1053,10 @@ vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key,
 }
 #endif
 
-static int
-vmxnet3_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+static int vmxnet3_get_coalesce(struct net_device *netdev,
+                               struct ethtool_coalesce *ec,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 
@@ -1068,8 +1090,10 @@ vmxnet3_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
        return 0;
 }
 
-static int
-vmxnet3_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec)
+static int vmxnet3_set_coalesce(struct net_device *netdev,
+                               struct ethtool_coalesce *ec,
+                               struct kernel_ethtool_coalesce *kernel_coal,
+                               struct netlink_ext_ack *extack)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
        struct Vmxnet3_DriverShared *shared = adapter->shared;
index e910596..7027ff4 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.5.0.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.6.0.0-k"
 
 /* Each byte of this 32-bit integer encodes a version number in
  * VMXNET3_DRIVER_VERSION_STRING.
  */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01050000
+#define VMXNET3_DRIVER_VERSION_NUM      0x01060000
 
 #if defined(CONFIG_PCI_MSI)
        /* RSS only makes sense if MSI-X is supported. */
        #define VMXNET3_RSS
 #endif
 
+#define VMXNET3_REV_6          5       /* Vmxnet3 Rev. 6 */
+#define VMXNET3_REV_5          4       /* Vmxnet3 Rev. 5 */
 #define VMXNET3_REV_4          3       /* Vmxnet3 Rev. 4 */
 #define VMXNET3_REV_3          2       /* Vmxnet3 Rev. 3 */
 #define VMXNET3_REV_2          1       /* Vmxnet3 Rev. 2 */
@@ -301,15 +303,18 @@ struct vmxnet3_rx_queue {
        struct vmxnet3_rq_driver_stats  stats;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
-#define VMXNET3_DEVICE_MAX_TX_QUEUES 8
-#define VMXNET3_DEVICE_MAX_RX_QUEUES 8   /* Keep this value as a power of 2 */
+#define VMXNET3_DEVICE_MAX_TX_QUEUES 32
+#define VMXNET3_DEVICE_MAX_RX_QUEUES 32   /* Keep this value as a power of 2 */
+
+#define VMXNET3_DEVICE_DEFAULT_TX_QUEUES 8
+#define VMXNET3_DEVICE_DEFAULT_RX_QUEUES 8   /* Keep this value as a power of 2 */
 
 /* Should be less than UPT1_RSS_MAX_IND_TABLE_SIZE */
 #define VMXNET3_RSS_IND_TABLE_SIZE (VMXNET3_DEVICE_MAX_RX_QUEUES * 4)
 
 #define VMXNET3_LINUX_MAX_MSIX_VECT     (VMXNET3_DEVICE_MAX_TX_QUEUES + \
                                         VMXNET3_DEVICE_MAX_RX_QUEUES + 1)
-#define VMXNET3_LINUX_MIN_MSIX_VECT     2 /* 1 for tx-rx pair and 1 for event */
+#define VMXNET3_LINUX_MIN_MSIX_VECT     3 /* 1 for tx, 1 for rx pair and 1 for event */
 
 
 struct vmxnet3_intr {
@@ -396,6 +401,7 @@ struct vmxnet3_adapter {
        dma_addr_t adapter_pa;
        dma_addr_t pm_conf_pa;
        dma_addr_t rss_conf_pa;
+       bool   queuesExtEnabled;
 };
 
 #define VMXNET3_WRITE_BAR0_REG(adapter, reg, val)  \
@@ -421,6 +427,10 @@ struct vmxnet3_adapter {
        (adapter->version >= VMXNET3_REV_3 + 1)
 #define VMXNET3_VERSION_GE_4(adapter) \
        (adapter->version >= VMXNET3_REV_4 + 1)
+#define VMXNET3_VERSION_GE_5(adapter) \
+       (adapter->version >= VMXNET3_REV_5 + 1)
+#define VMXNET3_VERSION_GE_6(adapter) \
+       (adapter->version >= VMXNET3_REV_6 + 1)
 
 /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */
 #define VMXNET3_DEF_TX_RING_SIZE    512
index 8bbe2a7..bf2fac9 100644 (file)
@@ -857,30 +857,24 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
        unsigned int hh_len = LL_RESERVED_SPACE(dev);
        struct neighbour *neigh;
        bool is_v6gw = false;
-       int ret = -EINVAL;
 
        nf_reset_ct(skb);
 
        /* Be paranoid, rather than too clever. */
        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
-               if (!skb2) {
-                       ret = -ENOMEM;
-                       goto err;
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb) {
+                       dev->stats.tx_errors++;
+                       return -ENOMEM;
                }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-
-               consume_skb(skb);
-               skb = skb2;
        }
 
        rcu_read_lock_bh();
 
        neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
        if (!IS_ERR(neigh)) {
+               int ret;
+
                sock_confirm_neigh(skb, neigh);
                /* if crossing protocols, can not use the cached header */
                ret = neigh_output(neigh, skb, is_v6gw);
@@ -889,9 +883,8 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
        }
 
        rcu_read_unlock_bh();
-err:
        vrf_tx_error(skb->dev, skb);
-       return ret;
+       return -EINVAL;
 }
 
 static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
index 473df25..592a838 100644 (file)
@@ -290,30 +290,6 @@ config SLIC_DS26522
          To compile this driver as a module, choose M here: the
          module will be called slic_ds26522.
 
-config DSCC4_PCISYNC
-       bool "Etinc PCISYNC features"
-       depends on DSCC4
-       help
-         Due to Etinc's design choice for its PCISYNC cards, some operations
-         are only allowed on specific ports of the DSCC4. This option is the
-         only way for the driver to know that it shouldn't return a success
-         code for these operations.
-
-         Please say Y if your card is an Etinc's PCISYNC.
-
-config DSCC4_PCI_RST
-       bool "Hard reset support"
-       depends on DSCC4
-       help
-         Various DSCC4 bugs forbid any reliable software reset of the ASIC.
-         As a replacement, some vendors provide a way to assert the PCI #RST
-         pin of DSCC4 through the GPIO port of the card. If you choose Y,
-         the driver will make use of this feature before module removal
-         (i.e. rmmod). The feature is known to be available on Commtech's
-         cards. Contact your manufacturer for details.
-
-         Say Y if your card supports this feature.
-
 config IXP4XX_HSS
        tristate "Intel IXP4xx HSS (synchronous serial port) support"
        depends on HDLC && IXP4XX_NPE && IXP4XX_QMGR
@@ -337,33 +313,6 @@ config LAPBETHER
          To compile this driver as a module, choose M here: the
          module will be called lapbether.
 
-         If unsure, say N.
-
-config SBNI
-       tristate "Granch SBNI12 Leased Line adapter support"
-       depends on X86
-       help
-         Driver for ISA SBNI12-xx cards which are low cost alternatives to
-         leased line modems.
-
-         You can find more information and last versions of drivers and
-         utilities at <http://www.granch.ru/>. If you have any question you
-         can send email to <sbni@granch.ru>.
-
-         To compile this driver as a module, choose M here: the
-         module will be called sbni.
-
-         If unsure, say N.
-
-config SBNI_MULTILINE
-       bool "Multiple line feature support"
-       depends on SBNI
-       help
-         Schedule traffic for some parallel lines, via SBNI12 adapters.
-
-         If you have two computers connected with two parallel lines it's
-         possible to increase transfer rate nearly twice. You should have
-         a program named 'sbniconfig' to configure adapters.
 
          If unsure, say N.
 
index 081666c..f6b92ef 100644 (file)
@@ -22,7 +22,6 @@ obj-$(CONFIG_FARSYNC)         += farsync.o
 obj-$(CONFIG_LANMEDIA)         += lmc/
 
 obj-$(CONFIG_LAPBETHER)                += lapbether.o
-obj-$(CONFIG_SBNI)             += sbni.o
 obj-$(CONFIG_N2)               += n2.o
 obj-$(CONFIG_C101)             += c101.o
 obj-$(CONFIG_WANXL)            += wanxl.o
index 059c2f7..8dd14d9 100644 (file)
@@ -208,14 +208,12 @@ static int c101_close(struct net_device *dev)
        return 0;
 }
 
-static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int c101_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                              void __user *data, int cmd)
 {
-       const size_t size = sizeof(sync_serial_settings);
-       sync_serial_settings new_line;
-       sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
+#ifdef DEBUG_RINGS
        port_t *port = dev_to_port(dev);
 
-#ifdef DEBUG_RINGS
        if (cmd == SIOCDEVPRIVATE) {
                sca_dump_rings(dev);
                printk(KERN_DEBUG "MSCI1: ST: %02x %02x %02x %02x\n",
@@ -226,14 +224,22 @@ static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                return 0;
        }
 #endif
-       if (cmd != SIOCWANDEV)
-               return hdlc_ioctl(dev, ifr, cmd);
 
-       switch (ifr->ifr_settings.type) {
+       return -EOPNOTSUPP;
+}
+
+static int c101_ioctl(struct net_device *dev, struct if_settings *ifs)
+{
+       const size_t size = sizeof(sync_serial_settings);
+       sync_serial_settings new_line;
+       sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
+       port_t *port = dev_to_port(dev);
+
+       switch (ifs->type) {
        case IF_GET_IFACE:
-               ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_IFACE_SYNC_SERIAL;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                if (copy_to_user(line, &port->settings, size))
@@ -261,7 +267,7 @@ static int c101_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                return 0;
 
        default:
-               return hdlc_ioctl(dev, ifr, cmd);
+               return hdlc_ioctl(dev, ifs);
        }
 }
 
@@ -286,7 +292,8 @@ static const struct net_device_ops c101_ops = {
        .ndo_open       = c101_open,
        .ndo_stop       = c101_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = c101_ioctl,
+       .ndo_siocwandev = c101_ioctl,
+       .ndo_siocdevprivate = c101_siocdevprivate,
 };
 
 static int __init c101_run(unsigned long irq, unsigned long winbase)
index 43caab0..23d2954 100644 (file)
@@ -267,7 +267,6 @@ static netdev_tx_t cosa_net_tx(struct sk_buff *skb, struct net_device *d);
 static char *cosa_net_setup_rx(struct channel_data *channel, int size);
 static int cosa_net_rx_done(struct channel_data *channel);
 static int cosa_net_tx_done(struct channel_data *channel, int size);
-static int cosa_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
 
 /* Character device */
 static char *chrdev_setup_rx(struct channel_data *channel, int size);
@@ -415,7 +414,7 @@ static const struct net_device_ops cosa_ops = {
        .ndo_open       = cosa_net_open,
        .ndo_stop       = cosa_net_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = cosa_net_ioctl,
+       .ndo_siocwandev = hdlc_ioctl,
        .ndo_tx_timeout = cosa_net_timeout,
 };
 
@@ -1169,18 +1168,6 @@ static int cosa_ioctl_common(struct cosa_data *cosa,
        return -ENOIOCTLCMD;
 }
 
-static int cosa_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-       int rv;
-       struct channel_data *chan = dev_to_chan(dev);
-
-       rv = cosa_ioctl_common(chan->cosa, chan, cmd,
-                              (unsigned long)ifr->ifr_data);
-       if (rv != -ENOIOCTLCMD)
-               return rv;
-       return hdlc_ioctl(dev, ifr, cmd);
-}
-
 static long cosa_chardev_ioctl(struct file *file, unsigned int cmd,
                               unsigned long arg)
 {
index b3466e0..6a212c0 100644 (file)
@@ -1784,16 +1784,15 @@ gather_conf_info(struct fst_card_info *card, struct fst_port_info *port,
 
 static int
 fst_set_iface(struct fst_card_info *card, struct fst_port_info *port,
-             struct ifreq *ifr)
+             struct if_settings *ifs)
 {
        sync_serial_settings sync;
        int i;
 
-       if (ifr->ifr_settings.size != sizeof(sync))
+       if (ifs->size != sizeof(sync))
                return -ENOMEM;
 
-       if (copy_from_user
-           (&sync, ifr->ifr_settings.ifs_ifsu.sync, sizeof(sync)))
+       if (copy_from_user(&sync, ifs->ifs_ifsu.sync, sizeof(sync)))
                return -EFAULT;
 
        if (sync.loopback)
@@ -1801,7 +1800,7 @@ fst_set_iface(struct fst_card_info *card, struct fst_port_info *port,
 
        i = port->index;
 
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_IFACE_V35:
                FST_WRW(card, portConfig[i].lineInterface, V35);
                port->hwif = V35;
@@ -1857,7 +1856,7 @@ fst_set_iface(struct fst_card_info *card, struct fst_port_info *port,
 
 static int
 fst_get_iface(struct fst_card_info *card, struct fst_port_info *port,
-             struct ifreq *ifr)
+             struct if_settings *ifs)
 {
        sync_serial_settings sync;
        int i;
@@ -1868,29 +1867,29 @@ fst_get_iface(struct fst_card_info *card, struct fst_port_info *port,
         */
        switch (port->hwif) {
        case E1:
-               ifr->ifr_settings.type = IF_IFACE_E1;
+               ifs->type = IF_IFACE_E1;
                break;
        case T1:
-               ifr->ifr_settings.type = IF_IFACE_T1;
+               ifs->type = IF_IFACE_T1;
                break;
        case V35:
-               ifr->ifr_settings.type = IF_IFACE_V35;
+               ifs->type = IF_IFACE_V35;
                break;
        case V24:
-               ifr->ifr_settings.type = IF_IFACE_V24;
+               ifs->type = IF_IFACE_V24;
                break;
        case X21D:
-               ifr->ifr_settings.type = IF_IFACE_X21D;
+               ifs->type = IF_IFACE_X21D;
                break;
        case X21:
        default:
-               ifr->ifr_settings.type = IF_IFACE_X21;
+               ifs->type = IF_IFACE_X21;
                break;
        }
-       if (ifr->ifr_settings.size == 0)
+       if (!ifs->size)
                return 0;       /* only type requested */
 
-       if (ifr->ifr_settings.size < sizeof(sync))
+       if (ifs->size < sizeof(sync))
                return -ENOMEM;
 
        i = port->index;
@@ -1901,15 +1900,15 @@ fst_get_iface(struct fst_card_info *card, struct fst_port_info *port,
            INTCLK ? CLOCK_INT : CLOCK_EXT;
        sync.loopback = 0;
 
-       if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &sync, sizeof(sync)))
+       if (copy_to_user(ifs->ifs_ifsu.sync, &sync, sizeof(sync)))
                return -EFAULT;
 
-       ifr->ifr_settings.size = sizeof(sync);
+       ifs->size = sizeof(sync);
        return 0;
 }
 
 static int
-fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+fst_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
 {
        struct fst_card_info *card;
        struct fst_port_info *port;
@@ -1918,7 +1917,7 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        unsigned long flags;
        void *buf;
 
-       dbg(DBG_IOCTL, "ioctl: %x, %p\n", cmd, ifr->ifr_data);
+       dbg(DBG_IOCTL, "ioctl: %x, %p\n", cmd, data);
 
        port = dev_to_port(dev);
        card = port->card;
@@ -1942,11 +1941,10 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                /* First copy in the header with the length and offset of data
                 * to write
                 */
-               if (!ifr->ifr_data)
+               if (!data)
                        return -EINVAL;
 
-               if (copy_from_user(&wrthdr, ifr->ifr_data,
-                                  sizeof(struct fstioc_write)))
+               if (copy_from_user(&wrthdr, data, sizeof(struct fstioc_write)))
                        return -EFAULT;
 
                /* Sanity check the parameters. We don't support partial writes
@@ -1958,7 +1956,7 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
                /* Now copy the data to the card. */
 
-               buf = memdup_user(ifr->ifr_data + sizeof(struct fstioc_write),
+               buf = memdup_user(data + sizeof(struct fstioc_write),
                                  wrthdr.size);
                if (IS_ERR(buf))
                        return PTR_ERR(buf);
@@ -1991,12 +1989,12 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                        }
                }
 
-               if (!ifr->ifr_data)
+               if (!data)
                        return -EINVAL;
 
                gather_conf_info(card, port, &info);
 
-               if (copy_to_user(ifr->ifr_data, &info, sizeof(info)))
+               if (copy_to_user(data, &info, sizeof(info)))
                        return -EFAULT;
 
                return 0;
@@ -2011,46 +2009,58 @@ fst_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                               card->card_no, card->state);
                        return -EIO;
                }
-               if (copy_from_user(&info, ifr->ifr_data, sizeof(info)))
+               if (copy_from_user(&info, data, sizeof(info)))
                        return -EFAULT;
 
                return set_conf_from_info(card, port, &info);
+       default:
+               return -EINVAL;
+       }
+}
 
-       case SIOCWANDEV:
-               switch (ifr->ifr_settings.type) {
-               case IF_GET_IFACE:
-                       return fst_get_iface(card, port, ifr);
-
-               case IF_IFACE_SYNC_SERIAL:
-               case IF_IFACE_V35:
-               case IF_IFACE_V24:
-               case IF_IFACE_X21:
-               case IF_IFACE_X21D:
-               case IF_IFACE_T1:
-               case IF_IFACE_E1:
-                       return fst_set_iface(card, port, ifr);
-
-               case IF_PROTO_RAW:
-                       port->mode = FST_RAW;
-                       return 0;
+static int
+fst_ioctl(struct net_device *dev, struct if_settings *ifs)
+{
+       struct fst_card_info *card;
+       struct fst_port_info *port;
 
-               case IF_GET_PROTO:
-                       if (port->mode == FST_RAW) {
-                               ifr->ifr_settings.type = IF_PROTO_RAW;
-                               return 0;
-                       }
-                       return hdlc_ioctl(dev, ifr, cmd);
+       dbg(DBG_IOCTL, "SIOCDEVPRIVATE, %x\n", ifs->type);
 
-               default:
-                       port->mode = FST_GEN_HDLC;
-                       dbg(DBG_IOCTL, "Passing this type to hdlc %x\n",
-                           ifr->ifr_settings.type);
-                       return hdlc_ioctl(dev, ifr, cmd);
+       port = dev_to_port(dev);
+       card = port->card;
+
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
+       switch (ifs->type) {
+       case IF_GET_IFACE:
+               return fst_get_iface(card, port, ifs);
+
+       case IF_IFACE_SYNC_SERIAL:
+       case IF_IFACE_V35:
+       case IF_IFACE_V24:
+       case IF_IFACE_X21:
+       case IF_IFACE_X21D:
+       case IF_IFACE_T1:
+       case IF_IFACE_E1:
+               return fst_set_iface(card, port, ifs);
+
+       case IF_PROTO_RAW:
+               port->mode = FST_RAW;
+               return 0;
+
+       case IF_GET_PROTO:
+               if (port->mode == FST_RAW) {
+                       ifs->type = IF_PROTO_RAW;
+                       return 0;
                }
+               return hdlc_ioctl(dev, ifs);
 
        default:
-               /* Not one of ours. Pass through to HDLC package */
-               return hdlc_ioctl(dev, ifr, cmd);
+               port->mode = FST_GEN_HDLC;
+               dbg(DBG_IOCTL, "Passing this type to hdlc %x\n",
+                   ifs->type);
+               return hdlc_ioctl(dev, ifs);
        }
 }
 
@@ -2310,7 +2320,8 @@ static const struct net_device_ops fst_ops = {
        .ndo_open       = fst_open,
        .ndo_stop       = fst_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = fst_ioctl,
+       .ndo_siocwandev = fst_ioctl,
+       .ndo_siocdevprivate = fst_siocdevprivate,
        .ndo_tx_timeout = fst_tx_timeout,
 };
 
index 39f05fa..cda1b4c 100644 (file)
@@ -674,31 +674,28 @@ static irqreturn_t ucc_hdlc_irq_handler(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-static int uhdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int uhdlc_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
        const size_t size = sizeof(te1_settings);
        te1_settings line;
        struct ucc_hdlc_private *priv = netdev_priv(dev);
 
-       if (cmd != SIOCWANDEV)
-               return hdlc_ioctl(dev, ifr, cmd);
-
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_IFACE:
-               ifr->ifr_settings.type = IF_IFACE_E1;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_IFACE_E1;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                memset(&line, 0, sizeof(line));
                line.clock_type = priv->clocking;
 
-               if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &line, size))
+               if (copy_to_user(ifs->ifs_ifsu.sync, &line, size))
                        return -EFAULT;
                return 0;
 
        default:
-               return hdlc_ioctl(dev, ifr, cmd);
+               return hdlc_ioctl(dev, ifs);
        }
 }
 
@@ -1053,7 +1050,7 @@ static const struct net_device_ops uhdlc_ops = {
        .ndo_open       = uhdlc_open,
        .ndo_stop       = uhdlc_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = uhdlc_ioctl,
+       .ndo_siocwandev = uhdlc_ioctl,
        .ndo_tx_timeout = uhdlc_tx_timeout,
 };
 
index dd6312b..cbed10b 100644 (file)
@@ -196,16 +196,13 @@ void hdlc_close(struct net_device *dev)
 }
 EXPORT_SYMBOL(hdlc_close);
 
-int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+int hdlc_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
        struct hdlc_proto *proto = first_proto;
        int result;
 
-       if (cmd != SIOCWANDEV)
-               return -EINVAL;
-
        if (dev_to_hdlc(dev)->proto) {
-               result = dev_to_hdlc(dev)->proto->ioctl(dev, ifr);
+               result = dev_to_hdlc(dev)->proto->ioctl(dev, ifs);
                if (result != -EINVAL)
                        return result;
        }
@@ -213,7 +210,7 @@ int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        /* Not handled by currently attached protocol (if any) */
 
        while (proto) {
-               result = proto->ioctl(dev, ifr);
+               result = proto->ioctl(dev, ifs);
                if (result != -EINVAL)
                        return result;
                proto = proto->next;
index c54fdae..cdebe65 100644 (file)
@@ -56,7 +56,7 @@ struct cisco_state {
        u32 rxseq; /* RX sequence number */
 };
 
-static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int cisco_ioctl(struct net_device *dev, struct if_settings *ifs);
 
 static inline struct cisco_state *state(hdlc_device *hdlc)
 {
@@ -306,21 +306,21 @@ static const struct header_ops cisco_header_ops = {
        .create = cisco_hard_header,
 };
 
-static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int cisco_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
-       cisco_proto __user *cisco_s = ifr->ifr_settings.ifs_ifsu.cisco;
+       cisco_proto __user *cisco_s = ifs->ifs_ifsu.cisco;
        const size_t size = sizeof(cisco_proto);
        cisco_proto new_settings;
        hdlc_device *hdlc = dev_to_hdlc(dev);
        int result;
 
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_PROTO:
                if (dev_to_hdlc(dev)->proto != &proto)
                        return -EINVAL;
-               ifr->ifr_settings.type = IF_PROTO_CISCO;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_PROTO_CISCO;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                if (copy_to_user(cisco_s, &state(hdlc)->settings, size))
index 25e3564..7637edc 100644 (file)
@@ -146,7 +146,7 @@ struct frad_state {
        u8 rxseq; /* RX sequence number */
 };
 
-static int fr_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int fr_ioctl(struct net_device *dev, struct if_settings *ifs);
 
 static inline u16 q922_to_dlci(u8 *hdr)
 {
@@ -357,26 +357,26 @@ static int pvc_close(struct net_device *dev)
        return 0;
 }
 
-static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int pvc_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
        struct pvc_device *pvc = dev->ml_priv;
        fr_proto_pvc_info info;
 
-       if (ifr->ifr_settings.type == IF_GET_PROTO) {
+       if (ifs->type == IF_GET_PROTO) {
                if (dev->type == ARPHRD_ETHER)
-                       ifr->ifr_settings.type = IF_PROTO_FR_ETH_PVC;
+                       ifs->type = IF_PROTO_FR_ETH_PVC;
                else
-                       ifr->ifr_settings.type = IF_PROTO_FR_PVC;
+                       ifs->type = IF_PROTO_FR_PVC;
 
-               if (ifr->ifr_settings.size < sizeof(info)) {
+               if (ifs->size < sizeof(info)) {
                        /* data size wanted */
-                       ifr->ifr_settings.size = sizeof(info);
+                       ifs->size = sizeof(info);
                        return -ENOBUFS;
                }
 
                info.dlci = pvc->dlci;
                memcpy(info.master, pvc->frad->name, IFNAMSIZ);
-               if (copy_to_user(ifr->ifr_settings.ifs_ifsu.fr_pvc_info,
+               if (copy_to_user(ifs->ifs_ifsu.fr_pvc_info,
                                 &info, sizeof(info)))
                        return -EFAULT;
                return 0;
@@ -1056,7 +1056,7 @@ static const struct net_device_ops pvc_ops = {
        .ndo_open       = pvc_open,
        .ndo_stop       = pvc_close,
        .ndo_start_xmit = pvc_xmit,
-       .ndo_do_ioctl   = pvc_ioctl,
+       .ndo_siocwandev = pvc_ioctl,
 };
 
 static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
@@ -1179,22 +1179,22 @@ static struct hdlc_proto proto = {
        .module         = THIS_MODULE,
 };
 
-static int fr_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int fr_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
-       fr_proto __user *fr_s = ifr->ifr_settings.ifs_ifsu.fr;
+       fr_proto __user *fr_s = ifs->ifs_ifsu.fr;
        const size_t size = sizeof(fr_proto);
        fr_proto new_settings;
        hdlc_device *hdlc = dev_to_hdlc(dev);
        fr_proto_pvc pvc;
        int result;
 
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_PROTO:
                if (dev_to_hdlc(dev)->proto != &proto) /* Different proto */
                        return -EINVAL;
-               ifr->ifr_settings.type = IF_PROTO_FR;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_PROTO_FR;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                if (copy_to_user(fr_s, &state(hdlc)->settings, size))
@@ -1256,21 +1256,21 @@ static int fr_ioctl(struct net_device *dev, struct ifreq *ifr)
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
 
-               if (copy_from_user(&pvc, ifr->ifr_settings.ifs_ifsu.fr_pvc,
+               if (copy_from_user(&pvc, ifs->ifs_ifsu.fr_pvc,
                                   sizeof(fr_proto_pvc)))
                        return -EFAULT;
 
                if (pvc.dlci <= 0 || pvc.dlci >= 1024)
                        return -EINVAL; /* Only 10 bits, DLCI 0 reserved */
 
-               if (ifr->ifr_settings.type == IF_PROTO_FR_ADD_ETH_PVC ||
-                   ifr->ifr_settings.type == IF_PROTO_FR_DEL_ETH_PVC)
+               if (ifs->type == IF_PROTO_FR_ADD_ETH_PVC ||
+                   ifs->type == IF_PROTO_FR_DEL_ETH_PVC)
                        result = ARPHRD_ETHER; /* bridged Ethernet device */
                else
                        result = ARPHRD_DLCI;
 
-               if (ifr->ifr_settings.type == IF_PROTO_FR_ADD_PVC ||
-                   ifr->ifr_settings.type == IF_PROTO_FR_ADD_ETH_PVC)
+               if (ifs->type == IF_PROTO_FR_ADD_PVC ||
+                   ifs->type == IF_PROTO_FR_ADD_ETH_PVC)
                        return fr_add_pvc(dev, pvc.dlci, result);
                else
                        return fr_del_pvc(hdlc, pvc.dlci, result);
index b81ecf4..37a3c98 100644 (file)
@@ -100,7 +100,7 @@ static const char *const event_names[EVENTS] = {
 
 static struct sk_buff_head tx_queue; /* used when holding the spin lock */
 
-static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int ppp_ioctl(struct net_device *dev, struct if_settings *ifs);
 
 static inline struct ppp *get_ppp(struct net_device *dev)
 {
@@ -655,17 +655,17 @@ static const struct header_ops ppp_header_ops = {
        .create = ppp_hard_header,
 };
 
-static int ppp_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int ppp_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
        hdlc_device *hdlc = dev_to_hdlc(dev);
        struct ppp *ppp;
        int result;
 
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_PROTO:
                if (dev_to_hdlc(dev)->proto != &proto)
                        return -EINVAL;
-               ifr->ifr_settings.type = IF_PROTO_PPP;
+               ifs->type = IF_PROTO_PPP;
                return 0; /* return protocol only, no settable parameters */
 
        case IF_PROTO_PPP:
index 54d2849..4a2f068 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/skbuff.h>
 
 
-static int raw_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int raw_ioctl(struct net_device *dev, struct if_settings *ifs);
 
 static __be16 raw_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
@@ -33,21 +33,21 @@ static struct hdlc_proto proto = {
 };
 
 
-static int raw_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int raw_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
-       raw_hdlc_proto __user *raw_s = ifr->ifr_settings.ifs_ifsu.raw_hdlc;
+       raw_hdlc_proto __user *raw_s = ifs->ifs_ifsu.raw_hdlc;
        const size_t size = sizeof(raw_hdlc_proto);
        raw_hdlc_proto new_settings;
        hdlc_device *hdlc = dev_to_hdlc(dev);
        int result;
 
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_PROTO:
                if (dev_to_hdlc(dev)->proto != &proto)
                        return -EINVAL;
-               ifr->ifr_settings.type = IF_PROTO_HDLC;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_PROTO_HDLC;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                if (copy_to_user(raw_s, hdlc->state, size))
index 9275962..0a66b73 100644 (file)
@@ -20,7 +20,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 
-static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int raw_eth_ioctl(struct net_device *dev, struct if_settings *ifs);
 
 static netdev_tx_t eth_tx(struct sk_buff *skb, struct net_device *dev)
 {
@@ -48,22 +48,22 @@ static struct hdlc_proto proto = {
 };
 
 
-static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int raw_eth_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
-       raw_hdlc_proto __user *raw_s = ifr->ifr_settings.ifs_ifsu.raw_hdlc;
+       raw_hdlc_proto __user *raw_s = ifs->ifs_ifsu.raw_hdlc;
        const size_t size = sizeof(raw_hdlc_proto);
        raw_hdlc_proto new_settings;
        hdlc_device *hdlc = dev_to_hdlc(dev);
        unsigned int old_qlen;
        int result;
 
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_PROTO:
                if (dev_to_hdlc(dev)->proto != &proto)
                        return -EINVAL;
-               ifr->ifr_settings.type = IF_PROTO_HDLC_ETH;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_PROTO_HDLC_ETH;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                if (copy_to_user(raw_s, hdlc->state, size))
index 9b7ebf8..f72c92c 100644 (file)
@@ -29,7 +29,7 @@ struct x25_state {
        struct tasklet_struct rx_tasklet;
 };
 
-static int x25_ioctl(struct net_device *dev, struct ifreq *ifr);
+static int x25_ioctl(struct net_device *dev, struct if_settings *ifs);
 
 static struct x25_state *state(hdlc_device *hdlc)
 {
@@ -274,21 +274,21 @@ static struct hdlc_proto proto = {
        .module         = THIS_MODULE,
 };
 
-static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
+static int x25_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
-       x25_hdlc_proto __user *x25_s = ifr->ifr_settings.ifs_ifsu.x25;
+       x25_hdlc_proto __user *x25_s = ifs->ifs_ifsu.x25;
        const size_t size = sizeof(x25_hdlc_proto);
        hdlc_device *hdlc = dev_to_hdlc(dev);
        x25_hdlc_proto new_settings;
        int result;
 
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_PROTO:
                if (dev_to_hdlc(dev)->proto != &proto)
                        return -EINVAL;
-               ifr->ifr_settings.type = IF_PROTO_X25;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_PROTO_X25;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                if (copy_to_user(x25_s, &state(hdlc)->settings, size))
@@ -303,7 +303,7 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
                        return -EBUSY;
 
                /* backward compatibility */
-               if (ifr->ifr_settings.size == 0) {
+               if (ifs->size == 0) {
                        new_settings.dce = 0;
                        new_settings.modulo = 8;
                        new_settings.window = 7;
index fd61a7c..e985e54 100644 (file)
@@ -142,11 +142,6 @@ static int hostess_close(struct net_device *d)
        return 0;
 }
 
-static int hostess_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
-{
-       return hdlc_ioctl(d, ifr, cmd);
-}
-
 /*     Passed network frames, fire them downwind.
  */
 
@@ -171,7 +166,7 @@ static const struct net_device_ops hostess_ops = {
        .ndo_open       = hostess_open,
        .ndo_stop       = hostess_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = hostess_ioctl,
+       .ndo_siocwandev = hdlc_ioctl,
 };
 
 static struct z8530_dev *sv11_init(int iobase, int irq)
@@ -324,16 +319,18 @@ MODULE_DESCRIPTION("Modular driver for the Comtrol Hostess SV11");
 
 static struct z8530_dev *sv11_unit;
 
-int init_module(void)
+static int sv11_module_init(void)
 {
        sv11_unit = sv11_init(io, irq);
        if (!sv11_unit)
                return -ENODEV;
        return 0;
 }
+module_init(sv11_module_init);
 
-void cleanup_module(void)
+static void sv11_module_cleanup(void)
 {
        if (sv11_unit)
                sv11_shutdown(sv11_unit);
 }
+module_exit(sv11_module_cleanup);
index 3c51ab2..88a36a0 100644 (file)
@@ -975,11 +975,10 @@ static int init_hdlc_queues(struct port *port)
                        return -ENOMEM;
        }
 
-       port->desc_tab = dma_pool_alloc(dma_pool, GFP_KERNEL,
+       port->desc_tab = dma_pool_zalloc(dma_pool, GFP_KERNEL,
                                        &port->desc_tab_phys);
        if (!port->desc_tab)
                return -ENOMEM;
-       memset(port->desc_tab, 0, POOL_ALLOC_SIZE);
        memset(port->rx_buff_tab, 0, sizeof(port->rx_buff_tab)); /* tables */
        memset(port->tx_buff_tab, 0, sizeof(port->tx_buff_tab));
 
@@ -1255,23 +1254,20 @@ static void find_best_clock(u32 timer_freq, u32 rate, u32 *best, u32 *reg)
        }
 }
 
-static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int hss_hdlc_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
        const size_t size = sizeof(sync_serial_settings);
        sync_serial_settings new_line;
-       sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
+       sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
        struct port *port = dev_to_port(dev);
        unsigned long flags;
        int clk;
 
-       if (cmd != SIOCWANDEV)
-               return hdlc_ioctl(dev, ifr, cmd);
-
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_IFACE:
-               ifr->ifr_settings.type = IF_IFACE_V35;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_IFACE_V35;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                memset(&new_line, 0, sizeof(new_line));
@@ -1324,7 +1320,7 @@ static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                return 0;
 
        default:
-               return hdlc_ioctl(dev, ifr, cmd);
+               return hdlc_ioctl(dev, ifs);
        }
 }
 
@@ -1336,7 +1332,7 @@ static const struct net_device_ops hss_hdlc_ops = {
        .ndo_open       = hss_hdlc_open,
        .ndo_stop       = hss_hdlc_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = hss_hdlc_ioctl,
+       .ndo_siocwandev = hss_hdlc_ioctl,
 };
 
 static int hss_init_one(struct platform_device *pdev)
index 3bd541c..d7d59b4 100644 (file)
@@ -19,7 +19,7 @@ void lmc_mii_writereg(lmc_softc_t * const, unsigned, unsigned, unsigned);
 void lmc_gpio_mkinput(lmc_softc_t * const sc, u32 bits);
 void lmc_gpio_mkoutput(lmc_softc_t * const sc, u32 bits);
 
-int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+int lmc_ioctl(struct net_device *dev, struct if_settings *ifs);
 
 extern lmc_media_t lmc_ds3_media;
 extern lmc_media_t lmc_ssi_media;
index 6c163db..ed687bf 100644 (file)
@@ -105,7 +105,8 @@ static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue);
  * linux reserves 16 device specific IOCTLs.  We call them
  * LMCIOC* to control various bits of our world.
  */
-int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
+static int lmc_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                             void __user *data, int cmd) /*fold00*/
 {
     lmc_softc_t *sc = dev_to_sc(dev);
     lmc_ctl_t ctl;
@@ -124,7 +125,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
          * To date internally, just copy this out to the user.
          */
     case LMCIOCGINFO: /*fold01*/
-       if (copy_to_user(ifr->ifr_data, &sc->ictl, sizeof(lmc_ctl_t)))
+       if (copy_to_user(data, &sc->ictl, sizeof(lmc_ctl_t)))
                ret = -EFAULT;
        else
                ret = 0;
@@ -141,7 +142,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
             break;
         }
 
-       if (copy_from_user(&ctl, ifr->ifr_data, sizeof(lmc_ctl_t))) {
+       if (copy_from_user(&ctl, data, sizeof(lmc_ctl_t))) {
                ret = -EFAULT;
                break;
        }
@@ -171,7 +172,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
                break;
            }
 
-           if (copy_from_user(&new_type, ifr->ifr_data, sizeof(u16))) {
+           if (copy_from_user(&new_type, data, sizeof(u16))) {
                ret = -EFAULT;
                break;
            }
@@ -211,8 +212,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
 
         sc->lmc_xinfo.Magic1 = 0xDEADBEEF;
 
-        if (copy_to_user(ifr->ifr_data, &sc->lmc_xinfo,
-                        sizeof(struct lmc_xinfo)))
+       if (copy_to_user(data, &sc->lmc_xinfo, sizeof(struct lmc_xinfo)))
                ret = -EFAULT;
        else
                ret = 0;
@@ -245,9 +245,9 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
                            regVal & T1FRAMER_SEF_MASK;
            }
            spin_unlock_irqrestore(&sc->lmc_lock, flags);
-           if (copy_to_user(ifr->ifr_data, &sc->lmc_device->stats,
+           if (copy_to_user(data, &sc->lmc_device->stats,
                             sizeof(sc->lmc_device->stats)) ||
-               copy_to_user(ifr->ifr_data + sizeof(sc->lmc_device->stats),
+               copy_to_user(data + sizeof(sc->lmc_device->stats),
                             &sc->extra_stats, sizeof(sc->extra_stats)))
                    ret = -EFAULT;
            else
@@ -282,7 +282,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
             break;
         }
 
-       if (copy_from_user(&ctl, ifr->ifr_data, sizeof(lmc_ctl_t))) {
+       if (copy_from_user(&ctl, data, sizeof(lmc_ctl_t))) {
                ret = -EFAULT;
                break;
        }
@@ -314,11 +314,11 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
 
 #ifdef DEBUG
     case LMCIOCDUMPEVENTLOG:
-       if (copy_to_user(ifr->ifr_data, &lmcEventLogIndex, sizeof(u32))) {
+       if (copy_to_user(data, &lmcEventLogIndex, sizeof(u32))) {
                ret = -EFAULT;
                break;
        }
-       if (copy_to_user(ifr->ifr_data + sizeof(u32), lmcEventLogBuf,
+       if (copy_to_user(data + sizeof(u32), lmcEventLogBuf,
                         sizeof(lmcEventLogBuf)))
                ret = -EFAULT;
        else
@@ -346,7 +346,7 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
              */
             netif_stop_queue(dev);
 
-           if (copy_from_user(&xc, ifr->ifr_data, sizeof(struct lmc_xilinx_control))) {
+           if (copy_from_user(&xc, data, sizeof(struct lmc_xilinx_control))) {
                ret = -EFAULT;
                break;
            }
@@ -609,10 +609,8 @@ int lmc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) /*fold00*/
 
         }
         break;
-    default: /*fold01*/
-        /* If we don't know what to do, give the protocol a shot. */
-        ret = lmc_proto_ioctl (sc, ifr, cmd);
-        break;
+    default:
+       break;
     }
 
     return ret;
@@ -788,7 +786,8 @@ static const struct net_device_ops lmc_ops = {
        .ndo_open       = lmc_open,
        .ndo_stop       = lmc_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = lmc_ioctl,
+       .ndo_siocwandev = hdlc_ioctl,
+       .ndo_siocdevprivate = lmc_siocdevprivate,
        .ndo_tx_timeout = lmc_driver_timeout,
        .ndo_get_stats  = lmc_get_stats,
 };
index 4e9cc83..e548761 100644 (file)
@@ -58,13 +58,6 @@ void lmc_proto_attach(lmc_softc_t *sc) /*FOLD00*/
         }
 }
 
-int lmc_proto_ioctl(lmc_softc_t *sc, struct ifreq *ifr, int cmd)
-{
-       if (sc->if_type == LMC_PPP)
-               return hdlc_ioctl(sc->lmc_device, ifr, cmd);
-       return -EOPNOTSUPP;
-}
-
 int lmc_proto_open(lmc_softc_t *sc)
 {
        int ret = 0;
index bb098e4..e56e707 100644 (file)
@@ -5,7 +5,6 @@
 #include <linux/hdlc.h>
 
 void lmc_proto_attach(lmc_softc_t *sc);
-int lmc_proto_ioctl(lmc_softc_t *sc, struct ifreq *ifr, int cmd);
 int lmc_proto_open(lmc_softc_t *sc);
 void lmc_proto_close(lmc_softc_t *sc);
 __be16 lmc_proto_type(lmc_softc_t *sc, struct sk_buff *skb);
index bdb6dc2..f3e8072 100644 (file)
@@ -227,27 +227,30 @@ static int n2_close(struct net_device *dev)
        return 0;
 }
 
-static int n2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int n2_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                            void __user *data, int cmd)
 {
-       const size_t size = sizeof(sync_serial_settings);
-       sync_serial_settings new_line;
-       sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
-       port_t *port = dev_to_port(dev);
-
 #ifdef DEBUG_RINGS
        if (cmd == SIOCDEVPRIVATE) {
                sca_dump_rings(dev);
                return 0;
        }
 #endif
-       if (cmd != SIOCWANDEV)
-               return hdlc_ioctl(dev, ifr, cmd);
+       return -EOPNOTSUPP;
+}
+
+static int n2_ioctl(struct net_device *dev, struct if_settings *ifs)
+{
+       const size_t size = sizeof(sync_serial_settings);
+       sync_serial_settings new_line;
+       sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
+       port_t *port = dev_to_port(dev);
 
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_IFACE:
-               ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_IFACE_SYNC_SERIAL;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                if (copy_to_user(line, &port->settings, size))
@@ -275,7 +278,7 @@ static int n2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                return 0;
 
        default:
-               return hdlc_ioctl(dev, ifr, cmd);
+               return hdlc_ioctl(dev, ifs);
        }
 }
 
@@ -311,7 +314,8 @@ static const struct net_device_ops n2_ops = {
        .ndo_open       = n2_open,
        .ndo_stop       = n2_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = n2_ioctl,
+       .ndo_siocwandev = n2_ioctl,
+       .ndo_siocdevprivate = n2_siocdevprivate,
 };
 
 static int __init n2_run(unsigned long io, unsigned long irq,
index 7b123a7..4766446 100644 (file)
@@ -174,27 +174,30 @@ static int pc300_close(struct net_device *dev)
        return 0;
 }
 
-static int pc300_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int pc300_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                               void __user *data, int cmd)
 {
-       const size_t size = sizeof(sync_serial_settings);
-       sync_serial_settings new_line;
-       sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
-       int new_type;
-       port_t *port = dev_to_port(dev);
-
 #ifdef DEBUG_RINGS
        if (cmd == SIOCDEVPRIVATE) {
                sca_dump_rings(dev);
                return 0;
        }
 #endif
-       if (cmd != SIOCWANDEV)
-               return hdlc_ioctl(dev, ifr, cmd);
+       return -EOPNOTSUPP;
+}
+
+static int pc300_ioctl(struct net_device *dev, struct if_settings *ifs)
+{
+       const size_t size = sizeof(sync_serial_settings);
+       sync_serial_settings new_line;
+       sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
+       int new_type;
+       port_t *port = dev_to_port(dev);
 
-       if (ifr->ifr_settings.type == IF_GET_IFACE) {
-               ifr->ifr_settings.type = port->iface;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+       if (ifs->type == IF_GET_IFACE) {
+               ifs->type = port->iface;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                if (copy_to_user(line, &port->settings, size))
@@ -203,21 +206,21 @@ static int pc300_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        }
 
        if (port->card->type == PC300_X21 &&
-           (ifr->ifr_settings.type == IF_IFACE_SYNC_SERIAL ||
-            ifr->ifr_settings.type == IF_IFACE_X21))
+           (ifs->type == IF_IFACE_SYNC_SERIAL ||
+            ifs->type == IF_IFACE_X21))
                new_type = IF_IFACE_X21;
 
        else if (port->card->type == PC300_RSV &&
-                (ifr->ifr_settings.type == IF_IFACE_SYNC_SERIAL ||
-                 ifr->ifr_settings.type == IF_IFACE_V35))
+                (ifs->type == IF_IFACE_SYNC_SERIAL ||
+                 ifs->type == IF_IFACE_V35))
                new_type = IF_IFACE_V35;
 
        else if (port->card->type == PC300_RSV &&
-                ifr->ifr_settings.type == IF_IFACE_V24)
+                ifs->type == IF_IFACE_V24)
                new_type = IF_IFACE_V24;
 
        else
-               return hdlc_ioctl(dev, ifr, cmd);
+               return hdlc_ioctl(dev, ifs);
 
        if (!capable(CAP_NET_ADMIN))
                return -EPERM;
@@ -272,7 +275,8 @@ static const struct net_device_ops pc300_ops = {
        .ndo_open       = pc300_open,
        .ndo_stop       = pc300_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = pc300_ioctl,
+       .ndo_siocwandev = pc300_ioctl,
+       .ndo_siocdevprivate = pc300_siocdevprivate,
 };
 
 static int pc300_pci_init_one(struct pci_dev *pdev,
index dee9c4e..ea86c70 100644 (file)
@@ -167,27 +167,30 @@ static int pci200_close(struct net_device *dev)
        return 0;
 }
 
-static int pci200_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int pci200_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                                void __user *data, int cmd)
 {
-       const size_t size = sizeof(sync_serial_settings);
-       sync_serial_settings new_line;
-       sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
-       port_t *port = dev_to_port(dev);
-
 #ifdef DEBUG_RINGS
        if (cmd == SIOCDEVPRIVATE) {
                sca_dump_rings(dev);
                return 0;
        }
 #endif
-       if (cmd != SIOCWANDEV)
-               return hdlc_ioctl(dev, ifr, cmd);
+       return -EOPNOTSUPP;
+}
+
+static int pci200_ioctl(struct net_device *dev, struct if_settings *ifs)
+{
+       const size_t size = sizeof(sync_serial_settings);
+       sync_serial_settings new_line;
+       sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
+       port_t *port = dev_to_port(dev);
 
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_IFACE:
-               ifr->ifr_settings.type = IF_IFACE_V35;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_IFACE_V35;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                if (copy_to_user(line, &port->settings, size))
@@ -217,7 +220,7 @@ static int pci200_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                return 0;
 
        default:
-               return hdlc_ioctl(dev, ifr, cmd);
+               return hdlc_ioctl(dev, ifs);
        }
 }
 
@@ -253,7 +256,8 @@ static const struct net_device_ops pci200_ops = {
        .ndo_open       = pci200_open,
        .ndo_stop       = pci200_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = pci200_ioctl,
+       .ndo_siocwandev = pci200_ioctl,
+       .ndo_siocdevprivate = pci200_siocdevprivate,
 };
 
 static int pci200_pci_init_one(struct pci_dev *pdev,
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
deleted file mode 100644 (file)
index 3092a09..0000000
+++ /dev/null
@@ -1,1638 +0,0 @@
-/* sbni.c:  Granch SBNI12 leased line adapters driver for linux
- *
- *     Written 2001 by Denis I.Timofeev (timofeev@granch.ru)
- *
- *     Previous versions were written by Yaroslav Polyakov,
- *     Alexey Zverev and Max Khon.
- *
- *     Driver supports SBNI12-02,-04,-05,-10,-11 cards, single and
- *     double-channel, PCI and ISA modifications.
- *     More info and useful utilities to work with SBNI12 cards you can find
- *     at http://www.granch.com (English) or http://www.granch.ru (Russian)
- *
- *     This software may be used and distributed according to the terms
- *     of the GNU General Public License.
- *
- *
- *  5.0.1      Jun 22 2001
- *       - Fixed bug in probe
- *  5.0.0      Jun 06 2001
- *       - Driver was completely redesigned by Denis I.Timofeev,
- *       - now PCI/Dual, ISA/Dual (with single interrupt line) models are
- *       - supported
- *  3.3.0      Thu Feb 24 21:30:28 NOVT 2000 
- *        - PCI cards support
- *  3.2.0      Mon Dec 13 22:26:53 NOVT 1999
- *       - Completely rebuilt all the packet storage system
- *       -    to work in Ethernet-like style.
- *  3.1.1      just fixed some bugs (5 aug 1999)
- *  3.1.0      added balancing feature (26 apr 1999)
- *  3.0.1      just fixed some bugs (14 apr 1999).
- *  3.0.0      Initial Revision, Yaroslav Polyakov (24 Feb 1999)
- *        - added pre-calculation for CRC, fixed bug with "len-2" frames, 
- *        - removed outbound fragmentation (MTU=1000), written CRC-calculation 
- *        - on asm, added work with hard_headers and now we have our own cache 
- *        - for them, optionally supported word-interchange on some chipsets,
- * 
- *     Known problem: this driver wasn't tested on multiprocessor machine.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/fcntl.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/pci.h>
-#include <linux/skbuff.h>
-#include <linux/timer.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-
-#include <net/net_namespace.h>
-#include <net/arp.h>
-#include <net/Space.h>
-
-#include <asm/io.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <linux/uaccess.h>
-
-#include "sbni.h"
-
-/* device private data */
-
-struct net_local {
-       struct timer_list       watchdog;
-       struct net_device       *watchdog_dev;
-
-       spinlock_t      lock;
-       struct sk_buff  *rx_buf_p;              /* receive buffer ptr */
-       struct sk_buff  *tx_buf_p;              /* transmit buffer ptr */
-       
-       unsigned int    framelen;               /* current frame length */
-       unsigned int    maxframe;               /* maximum valid frame length */
-       unsigned int    state;
-       unsigned int    inppos, outpos;         /* positions in rx/tx buffers */
-
-       /* transmitting frame number - from frames qty to 1 */
-       unsigned int    tx_frameno;
-
-       /* expected number of next receiving frame */
-       unsigned int    wait_frameno;
-
-       /* count of failed attempts to frame send - 32 attempts do before
-          error - while receiver tunes on opposite side of wire */
-       unsigned int    trans_errors;
-
-       /* idle time; send pong when limit exceeded */
-       unsigned int    timer_ticks;
-
-       /* fields used for receive level autoselection */
-       int     delta_rxl;
-       unsigned int    cur_rxl_index, timeout_rxl;
-       unsigned long   cur_rxl_rcvd, prev_rxl_rcvd;
-
-       struct sbni_csr1        csr1;           /* current value of CSR1 */
-       struct sbni_in_stats    in_stats;       /* internal statistics */ 
-
-       struct net_device               *second;        /* for ISA/dual cards */
-
-#ifdef CONFIG_SBNI_MULTILINE
-       struct net_device               *master;
-       struct net_device               *link;
-#endif
-};
-
-
-static int  sbni_card_probe( unsigned long );
-static int  sbni_pci_probe( struct net_device  * );
-static struct net_device  *sbni_probe1(struct net_device *, unsigned long, int);
-static int  sbni_open( struct net_device * );
-static int  sbni_close( struct net_device * );
-static netdev_tx_t sbni_start_xmit(struct sk_buff *,
-                                        struct net_device * );
-static int  sbni_ioctl( struct net_device *, struct ifreq *, int );
-static void  set_multicast_list( struct net_device * );
-
-static irqreturn_t sbni_interrupt( int, void * );
-static void  handle_channel( struct net_device * );
-static int   recv_frame( struct net_device * );
-static void  send_frame( struct net_device * );
-static int   upload_data( struct net_device *,
-                         unsigned, unsigned, unsigned, u32 );
-static void  download_data( struct net_device *, u32 * );
-static void  sbni_watchdog(struct timer_list *);
-static void  interpret_ack( struct net_device *, unsigned );
-static int   append_frame_to_pkt( struct net_device *, unsigned, u32 );
-static void  indicate_pkt( struct net_device * );
-static void  card_start( struct net_device * );
-static void  prepare_to_send( struct sk_buff *, struct net_device * );
-static void  drop_xmit_queue( struct net_device * );
-static void  send_frame_header( struct net_device *, u32 * );
-static int   skip_tail( unsigned int, unsigned int, u32 );
-static int   check_fhdr( u32, u32 *, u32 *, u32 *, u32 *, u32 * );
-static void  change_level( struct net_device * );
-static void  timeout_change_level( struct net_device * );
-static u32   calc_crc32( u32, u8 *, u32 );
-static struct sk_buff *  get_rx_buf( struct net_device * );
-static int  sbni_init( struct net_device * );
-
-#ifdef CONFIG_SBNI_MULTILINE
-static int  enslave( struct net_device *, struct net_device * );
-static int  emancipate( struct net_device * );
-#endif
-
-static const char  version[] =
-       "Granch SBNI12 driver ver 5.0.1  Jun 22 2001  Denis I.Timofeev.\n";
-
-static bool skip_pci_probe     __initdata = false;
-static int  scandone   __initdata = 0;
-static int  num                __initdata = 0;
-
-static unsigned char  rxl_tab[];
-static u32  crc32tab[];
-
-/* A list of all installed devices, for removing the driver module. */
-static struct net_device  *sbni_cards[ SBNI_MAX_NUM_CARDS ];
-
-/* Lists of device's parameters */
-static u32     io[   SBNI_MAX_NUM_CARDS ] __initdata =
-       { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 };
-static u32     irq[  SBNI_MAX_NUM_CARDS ] __initdata;
-static u32     baud[ SBNI_MAX_NUM_CARDS ] __initdata;
-static u32     rxl[  SBNI_MAX_NUM_CARDS ] __initdata =
-       { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 };
-static u32     mac[  SBNI_MAX_NUM_CARDS ] __initdata;
-
-#ifndef MODULE
-typedef u32  iarr[];
-static iarr *dest[5] __initdata = { &io, &irq, &baud, &rxl, &mac };
-#endif
-
-/* A zero-terminated list of I/O addresses to be probed on ISA bus */
-static unsigned int  netcard_portlist[ ] __initdata = { 
-       0x210, 0x214, 0x220, 0x224, 0x230, 0x234, 0x240, 0x244, 0x250, 0x254,
-       0x260, 0x264, 0x270, 0x274, 0x280, 0x284, 0x290, 0x294, 0x2a0, 0x2a4,
-       0x2b0, 0x2b4, 0x2c0, 0x2c4, 0x2d0, 0x2d4, 0x2e0, 0x2e4, 0x2f0, 0x2f4,
-       0 };
-
-#define NET_LOCAL_LOCK(dev) (((struct net_local *)netdev_priv(dev))->lock)
-
-/*
- * Look for SBNI card which addr stored in dev->base_addr, if nonzero.
- * Otherwise, look through PCI bus. If none PCI-card was found, scan ISA.
- */
-
-static inline int __init
-sbni_isa_probe( struct net_device  *dev )
-{
-       if( dev->base_addr > 0x1ff &&
-           request_region( dev->base_addr, SBNI_IO_EXTENT, dev->name ) &&
-           sbni_probe1( dev, dev->base_addr, dev->irq ) )
-
-               return  0;
-       else {
-               pr_err("base address 0x%lx is busy, or adapter is malfunctional!\n",
-                      dev->base_addr);
-               return  -ENODEV;
-       }
-}
-
-static const struct net_device_ops sbni_netdev_ops = {
-       .ndo_open               = sbni_open,
-       .ndo_stop               = sbni_close,
-       .ndo_start_xmit         = sbni_start_xmit,
-       .ndo_set_rx_mode        = set_multicast_list,
-       .ndo_do_ioctl           = sbni_ioctl,
-       .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-static void __init sbni_devsetup(struct net_device *dev)
-{
-       ether_setup( dev );
-       dev->netdev_ops = &sbni_netdev_ops;
-}
-
-int __init sbni_probe(int unit)
-{
-       struct net_device *dev;
-       int err;
-
-       dev = alloc_netdev(sizeof(struct net_local), "sbni",
-                          NET_NAME_UNKNOWN, sbni_devsetup);
-       if (!dev)
-               return -ENOMEM;
-
-       dev->netdev_ops = &sbni_netdev_ops;
-
-       sprintf(dev->name, "sbni%d", unit);
-       netdev_boot_setup_check(dev);
-
-       err = sbni_init(dev);
-       if (err) {
-               free_netdev(dev);
-               return err;
-       }
-
-       err = register_netdev(dev);
-       if (err) {
-               release_region( dev->base_addr, SBNI_IO_EXTENT );
-               free_netdev(dev);
-               return err;
-       }
-       pr_info_once("%s", version);
-       return 0;
-}
-
-static int __init sbni_init(struct net_device *dev)
-{
-       int  i;
-       if( dev->base_addr )
-               return  sbni_isa_probe( dev );
-       /* otherwise we have to perform search our adapter */
-
-       if( io[ num ] != -1 ) {
-               dev->base_addr  = io[ num ];
-               dev->irq        = irq[ num ];
-       } else if( scandone  ||  io[ 0 ] != -1 ) {
-               return  -ENODEV;
-       }
-
-       /* if io[ num ] contains non-zero address, then that is on ISA bus */
-       if( dev->base_addr )
-               return  sbni_isa_probe( dev );
-
-       /* ...otherwise - scan PCI first */
-       if( !skip_pci_probe  &&  !sbni_pci_probe( dev ) )
-               return  0;
-
-       if( io[ num ] == -1 ) {
-               /* Auto-scan will be stopped when first ISA card were found */
-               scandone = 1;
-               if( num > 0 )
-                       return  -ENODEV;
-       }
-
-       for( i = 0;  netcard_portlist[ i ];  ++i ) {
-               int  ioaddr = netcard_portlist[ i ];
-               if( request_region( ioaddr, SBNI_IO_EXTENT, dev->name ) &&
-                   sbni_probe1( dev, ioaddr, 0 ))
-                       return 0;
-       }
-
-       return  -ENODEV;
-}
-
-
-static int __init
-sbni_pci_probe( struct net_device  *dev )
-{
-       struct pci_dev  *pdev = NULL;
-
-       while( (pdev = pci_get_class( PCI_CLASS_NETWORK_OTHER << 8, pdev ))
-              != NULL ) {
-               int  pci_irq_line;
-               unsigned long  pci_ioaddr;
-
-               if( pdev->vendor != SBNI_PCI_VENDOR &&
-                   pdev->device != SBNI_PCI_DEVICE )
-                       continue;
-
-               pci_ioaddr = pci_resource_start( pdev, 0 );
-               pci_irq_line = pdev->irq;
-
-               /* Avoid already found cards from previous calls */
-               if( !request_region( pci_ioaddr, SBNI_IO_EXTENT, dev->name ) ) {
-                       if (pdev->subsystem_device != 2)
-                               continue;
-
-                       /* Dual adapter is present */
-                       if (!request_region(pci_ioaddr += 4, SBNI_IO_EXTENT,
-                                                       dev->name ) )
-                               continue;
-               }
-
-               if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs)
-                       pr_warn(
-"WARNING: The PCI BIOS assigned this PCI card to IRQ %d, which is unlikely to work!.\n"
-"You should use the PCI BIOS setup to assign a valid IRQ line.\n",
-                               pci_irq_line );
-
-               /* avoiding re-enable dual adapters */
-               if( (pci_ioaddr & 7) == 0  &&  pci_enable_device( pdev ) ) {
-                       release_region( pci_ioaddr, SBNI_IO_EXTENT );
-                       pci_dev_put( pdev );
-                       return  -EIO;
-               }
-               if( sbni_probe1( dev, pci_ioaddr, pci_irq_line ) ) {
-                       SET_NETDEV_DEV(dev, &pdev->dev);
-                       /* not the best thing to do, but this is all messed up 
-                          for hotplug systems anyway... */
-                       pci_dev_put( pdev );
-                       return  0;
-               }
-       }
-       return  -ENODEV;
-}
-
-
-static struct net_device * __init
-sbni_probe1( struct net_device  *dev,  unsigned long  ioaddr,  int  irq )
-{
-       struct net_local  *nl;
-
-       if( sbni_card_probe( ioaddr ) ) {
-               release_region( ioaddr, SBNI_IO_EXTENT );
-               return NULL;
-       }
-
-       outb( 0, ioaddr + CSR0 );
-
-       if( irq < 2 ) {
-               unsigned long irq_mask;
-
-               irq_mask = probe_irq_on();
-               outb( EN_INT | TR_REQ, ioaddr + CSR0 );
-               outb( PR_RES, ioaddr + CSR1 );
-               mdelay(50);
-               irq = probe_irq_off(irq_mask);
-               outb( 0, ioaddr + CSR0 );
-
-               if( !irq ) {
-                       pr_err("%s: can't detect device irq!\n", dev->name);
-                       release_region( ioaddr, SBNI_IO_EXTENT );
-                       return NULL;
-               }
-       } else if( irq == 2 )
-               irq = 9;
-
-       dev->irq = irq;
-       dev->base_addr = ioaddr;
-
-       /* Fill in sbni-specific dev fields. */
-       nl = netdev_priv(dev);
-       if( !nl ) {
-               pr_err("%s: unable to get memory!\n", dev->name);
-               release_region( ioaddr, SBNI_IO_EXTENT );
-               return NULL;
-       }
-
-       memset( nl, 0, sizeof(struct net_local) );
-       spin_lock_init( &nl->lock );
-
-       /* store MAC address (generate if that isn't known) */
-       *(__be16 *)dev->dev_addr = htons( 0x00ff );
-       *(__be32 *)(dev->dev_addr + 2) = htonl( 0x01000000 |
-               ((mac[num] ?
-               mac[num] :
-               (u32)((long)netdev_priv(dev))) & 0x00ffffff));
-
-       /* store link settings (speed, receive level ) */
-       nl->maxframe  = DEFAULT_FRAME_LEN;
-       nl->csr1.rate = baud[ num ];
-
-       if( (nl->cur_rxl_index = rxl[ num ]) == -1 ) {
-               /* autotune rxl */
-               nl->cur_rxl_index = DEF_RXL;
-               nl->delta_rxl = DEF_RXL_DELTA;
-       } else {
-               nl->delta_rxl = 0;
-       }
-       nl->csr1.rxl  = rxl_tab[ nl->cur_rxl_index ];
-       if( inb( ioaddr + CSR0 ) & 0x01 )
-               nl->state |= FL_SLOW_MODE;
-
-       pr_notice("%s: ioaddr %#lx, irq %d, MAC: 00:ff:01:%02x:%02x:%02x\n",
-                 dev->name, dev->base_addr, dev->irq,
-                 ((u8 *)dev->dev_addr)[3],
-                 ((u8 *)dev->dev_addr)[4],
-                 ((u8 *)dev->dev_addr)[5]);
-
-       pr_notice("%s: speed %d",
-                 dev->name,
-                 ((nl->state & FL_SLOW_MODE) ? 500000 : 2000000)
-                 / (1 << nl->csr1.rate));
-
-       if( nl->delta_rxl == 0 )
-               pr_cont(", receive level 0x%x (fixed)\n", nl->cur_rxl_index);
-       else
-               pr_cont(", receive level (auto)\n");
-
-#ifdef CONFIG_SBNI_MULTILINE
-       nl->master = dev;
-       nl->link   = NULL;
-#endif
-   
-       sbni_cards[ num++ ] = dev;
-       return  dev;
-}
-
-/* -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-static netdev_tx_t
-sbni_start_xmit( struct sk_buff  *skb,  struct net_device  *dev )
-{
-       struct net_device  *p;
-
-       netif_stop_queue( dev );
-
-       /* Looking for idle device in the list */
-       for( p = dev;  p; ) {
-               struct net_local  *nl = netdev_priv(p);
-               spin_lock( &nl->lock );
-               if( nl->tx_buf_p  ||  (nl->state & FL_LINE_DOWN) ) {
-                       p = nl->link;
-                       spin_unlock( &nl->lock );
-               } else {
-                       /* Idle dev is found */
-                       prepare_to_send( skb, p );
-                       spin_unlock( &nl->lock );
-                       netif_start_queue( dev );
-                       return NETDEV_TX_OK;
-               }
-       }
-
-       return NETDEV_TX_BUSY;
-}
-
-#else  /* CONFIG_SBNI_MULTILINE */
-
-static netdev_tx_t
-sbni_start_xmit( struct sk_buff  *skb,  struct net_device  *dev )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-
-       netif_stop_queue( dev );
-       spin_lock( &nl->lock );
-
-       prepare_to_send( skb, dev );
-
-       spin_unlock( &nl->lock );
-       return NETDEV_TX_OK;
-}
-
-#endif /* CONFIG_SBNI_MULTILINE */
-
-/* -------------------------------------------------------------------------- */
-
-/* interrupt handler */
-
-/*
- *     SBNI12D-10, -11/ISA boards within "common interrupt" mode could not
- * be looked as two independent single-channel devices. Every channel seems
- * as Ethernet interface but interrupt handler must be common. Really, first
- * channel ("master") driver only registers the handler. In its struct net_local
- * it has got pointer to "slave" channel's struct net_local and handles that's
- * interrupts too.
- *     dev of successfully attached ISA SBNI boards is linked to list.
- * While next board driver is initialized, it scans this list. If one
- * has found dev with same irq and ioaddr different by 4 then it assumes
- * this board to be "master".
- */ 
-
-static irqreturn_t
-sbni_interrupt( int  irq,  void  *dev_id )
-{
-       struct net_device         *dev = dev_id;
-       struct net_local  *nl  = netdev_priv(dev);
-       int     repeat;
-
-       spin_lock( &nl->lock );
-       if( nl->second )
-               spin_lock(&NET_LOCAL_LOCK(nl->second));
-
-       do {
-               repeat = 0;
-               if( inb( dev->base_addr + CSR0 ) & (RC_RDY | TR_RDY) ) {
-                       handle_channel( dev );
-                       repeat = 1;
-               }
-               if( nl->second  &&      /* second channel present */
-                   (inb( nl->second->base_addr+CSR0 ) & (RC_RDY | TR_RDY)) ) {
-                       handle_channel( nl->second );
-                       repeat = 1;
-               }
-       } while( repeat );
-
-       if( nl->second )
-               spin_unlock(&NET_LOCAL_LOCK(nl->second));
-       spin_unlock( &nl->lock );
-       return IRQ_HANDLED;
-}
-
-
-static void
-handle_channel( struct net_device  *dev )
-{
-       struct net_local        *nl    = netdev_priv(dev);
-       unsigned long           ioaddr = dev->base_addr;
-
-       int  req_ans;
-       unsigned char  csr0;
-
-#ifdef CONFIG_SBNI_MULTILINE
-       /* Lock the master device because we going to change its local data */
-       if( nl->state & FL_SLAVE )
-               spin_lock(&NET_LOCAL_LOCK(nl->master));
-#endif
-
-       outb( (inb( ioaddr + CSR0 ) & ~EN_INT) | TR_REQ, ioaddr + CSR0 );
-
-       nl->timer_ticks = CHANGE_LEVEL_START_TICKS;
-       for(;;) {
-               csr0 = inb( ioaddr + CSR0 );
-               if( ( csr0 & (RC_RDY | TR_RDY) ) == 0 )
-                       break;
-
-               req_ans = !(nl->state & FL_PREV_OK);
-
-               if( csr0 & RC_RDY )
-                       req_ans = recv_frame( dev );
-
-               /*
-                * TR_RDY always equals 1 here because we have owned the marker,
-                * and we set TR_REQ when disabled interrupts
-                */
-               csr0 = inb( ioaddr + CSR0 );
-               if( !(csr0 & TR_RDY)  ||  (csr0 & RC_RDY) )
-                       netdev_err(dev, "internal error!\n");
-
-               /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */
-               if( req_ans  ||  nl->tx_frameno != 0 )
-                       send_frame( dev );
-               else
-                       /* send marker without any data */
-                       outb( inb( ioaddr + CSR0 ) & ~TR_REQ, ioaddr + CSR0 );
-       }
-
-       outb( inb( ioaddr + CSR0 ) | EN_INT, ioaddr + CSR0 );
-
-#ifdef CONFIG_SBNI_MULTILINE
-       if( nl->state & FL_SLAVE )
-               spin_unlock(&NET_LOCAL_LOCK(nl->master));
-#endif
-}
-
-
-/*
- * Routine returns 1 if it needs to acknowledge received frame.
- * Empty frame received without errors won't be acknowledged.
- */
-
-static int
-recv_frame( struct net_device  *dev )
-{
-       struct net_local  *nl   = netdev_priv(dev);
-       unsigned long  ioaddr   = dev->base_addr;
-
-       u32  crc = CRC32_INITIAL;
-
-       unsigned  framelen = 0, frameno, ack;
-       unsigned  is_first, frame_ok = 0;
-
-       if( check_fhdr( ioaddr, &framelen, &frameno, &ack, &is_first, &crc ) ) {
-               frame_ok = framelen > 4
-                       ?  upload_data( dev, framelen, frameno, is_first, crc )
-                       :  skip_tail( ioaddr, framelen, crc );
-               if( frame_ok )
-                       interpret_ack( dev, ack );
-       }
-
-       outb( inb( ioaddr + CSR0 ) ^ CT_ZER, ioaddr + CSR0 );
-       if( frame_ok ) {
-               nl->state |= FL_PREV_OK;
-               if( framelen > 4 )
-                       nl->in_stats.all_rx_number++;
-       } else {
-               nl->state &= ~FL_PREV_OK;
-               change_level( dev );
-               nl->in_stats.all_rx_number++;
-               nl->in_stats.bad_rx_number++;
-       }
-
-       return  !frame_ok  ||  framelen > 4;
-}
-
-
-static void
-send_frame( struct net_device  *dev )
-{
-       struct net_local  *nl    = netdev_priv(dev);
-
-       u32  crc = CRC32_INITIAL;
-
-       if( nl->state & FL_NEED_RESEND ) {
-
-               /* if frame was sended but not ACK'ed - resend it */
-               if( nl->trans_errors ) {
-                       --nl->trans_errors;
-                       if( nl->framelen != 0 )
-                               nl->in_stats.resend_tx_number++;
-               } else {
-                       /* cannot xmit with many attempts */
-#ifdef CONFIG_SBNI_MULTILINE
-                       if( (nl->state & FL_SLAVE)  ||  nl->link )
-#endif
-                       nl->state |= FL_LINE_DOWN;
-                       drop_xmit_queue( dev );
-                       goto  do_send;
-               }
-       } else
-               nl->trans_errors = TR_ERROR_COUNT;
-
-       send_frame_header( dev, &crc );
-       nl->state |= FL_NEED_RESEND;
-       /*
-        * FL_NEED_RESEND will be cleared after ACK, but if empty
-        * frame sended then in prepare_to_send next frame
-        */
-
-
-       if( nl->framelen ) {
-               download_data( dev, &crc );
-               nl->in_stats.all_tx_number++;
-               nl->state |= FL_WAIT_ACK;
-       }
-
-       outsb( dev->base_addr + DAT, (u8 *)&crc, sizeof crc );
-
-do_send:
-       outb( inb( dev->base_addr + CSR0 ) & ~TR_REQ, dev->base_addr + CSR0 );
-
-       if( nl->tx_frameno )
-               /* next frame exists - we request card to send it */
-               outb( inb( dev->base_addr + CSR0 ) | TR_REQ,
-                     dev->base_addr + CSR0 );
-}
-
-
-/*
- * Write the frame data into adapter's buffer memory, and calculate CRC.
- * Do padding if necessary.
- */
-
-static void
-download_data( struct net_device  *dev,  u32  *crc_p )
-{
-       struct net_local  *nl    = netdev_priv(dev);
-       struct sk_buff    *skb   = nl->tx_buf_p;
-
-       unsigned  len = min_t(unsigned int, skb->len - nl->outpos, nl->framelen);
-
-       outsb( dev->base_addr + DAT, skb->data + nl->outpos, len );
-       *crc_p = calc_crc32( *crc_p, skb->data + nl->outpos, len );
-
-       /* if packet too short we should write some more bytes to pad */
-       for( len = nl->framelen - len;  len--; ) {
-               outb( 0, dev->base_addr + DAT );
-               *crc_p = CRC32( 0, *crc_p );
-       }
-}
-
-
-static int
-upload_data( struct net_device  *dev,  unsigned  framelen,  unsigned  frameno,
-            unsigned  is_first,  u32  crc )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       int  frame_ok;
-
-       if( is_first ) {
-               nl->wait_frameno = frameno;
-               nl->inppos = 0;
-       }
-
-       if( nl->wait_frameno == frameno ) {
-
-               if( nl->inppos + framelen  <=  ETHER_MAX_LEN )
-                       frame_ok = append_frame_to_pkt( dev, framelen, crc );
-
-               /*
-                * if CRC is right but framelen incorrect then transmitter
-                * error was occurred... drop entire packet
-                */
-               else if( (frame_ok = skip_tail( dev->base_addr, framelen, crc ))
-                        != 0 ) {
-                       nl->wait_frameno = 0;
-                       nl->inppos = 0;
-#ifdef CONFIG_SBNI_MULTILINE
-                       nl->master->stats.rx_errors++;
-                       nl->master->stats.rx_missed_errors++;
-#else
-                       dev->stats.rx_errors++;
-                       dev->stats.rx_missed_errors++;
-#endif
-               }
-                       /* now skip all frames until is_first != 0 */
-       } else
-               frame_ok = skip_tail( dev->base_addr, framelen, crc );
-
-       if( is_first  &&  !frame_ok ) {
-               /*
-                * Frame has been broken, but we had already stored
-                * is_first... Drop entire packet.
-                */
-               nl->wait_frameno = 0;
-#ifdef CONFIG_SBNI_MULTILINE
-               nl->master->stats.rx_errors++;
-               nl->master->stats.rx_crc_errors++;
-#else
-               dev->stats.rx_errors++;
-               dev->stats.rx_crc_errors++;
-#endif
-       }
-
-       return  frame_ok;
-}
-
-
-static inline void
-send_complete( struct net_device *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-#ifdef CONFIG_SBNI_MULTILINE
-       nl->master->stats.tx_packets++;
-       nl->master->stats.tx_bytes += nl->tx_buf_p->len;
-#else
-       dev->stats.tx_packets++;
-       dev->stats.tx_bytes += nl->tx_buf_p->len;
-#endif
-       dev_consume_skb_irq(nl->tx_buf_p);
-
-       nl->tx_buf_p = NULL;
-
-       nl->outpos = 0;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-       nl->framelen   = 0;
-}
-
-
-static void
-interpret_ack( struct net_device  *dev,  unsigned  ack )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( ack == FRAME_SENT_OK ) {
-               nl->state &= ~FL_NEED_RESEND;
-
-               if( nl->state & FL_WAIT_ACK ) {
-                       nl->outpos += nl->framelen;
-
-                       if( --nl->tx_frameno ) {
-                               nl->framelen = min_t(unsigned int,
-                                                  nl->maxframe,
-                                                  nl->tx_buf_p->len - nl->outpos);
-                       } else {
-                               send_complete( dev );
-#ifdef CONFIG_SBNI_MULTILINE
-                               netif_wake_queue( nl->master );
-#else
-                               netif_wake_queue( dev );
-#endif
-                       }
-               }
-       }
-
-       nl->state &= ~FL_WAIT_ACK;
-}
-
-
-/*
- * Glue received frame with previous fragments of packet.
- * Indicate packet when last frame would be accepted.
- */
-
-static int
-append_frame_to_pkt( struct net_device  *dev,  unsigned  framelen,  u32  crc )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       u8  *p;
-
-       if( nl->inppos + framelen  >  ETHER_MAX_LEN )
-               return  0;
-
-       if( !nl->rx_buf_p  &&  !(nl->rx_buf_p = get_rx_buf( dev )) )
-               return  0;
-
-       p = nl->rx_buf_p->data + nl->inppos;
-       insb( dev->base_addr + DAT, p, framelen );
-       if( calc_crc32( crc, p, framelen ) != CRC32_REMAINDER )
-               return  0;
-
-       nl->inppos += framelen - 4;
-       if( --nl->wait_frameno == 0 )           /* last frame received */
-               indicate_pkt( dev );
-
-       return  1;
-}
-
-
-/*
- * Prepare to start output on adapter.
- * Transmitter will be actually activated when marker is accepted.
- */
-
-static void
-prepare_to_send( struct sk_buff  *skb,  struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       unsigned int  len;
-
-       /* nl->tx_buf_p == NULL here! */
-       if( nl->tx_buf_p )
-               netdev_err(dev, "memory leak!\n");
-
-       nl->outpos = 0;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-
-       len = skb->len;
-       if( len < SBNI_MIN_LEN )
-               len = SBNI_MIN_LEN;
-
-       nl->tx_buf_p    = skb;
-       nl->tx_frameno  = DIV_ROUND_UP(len, nl->maxframe);
-       nl->framelen    = len < nl->maxframe  ?  len  :  nl->maxframe;
-
-       outb( inb( dev->base_addr + CSR0 ) | TR_REQ,  dev->base_addr + CSR0 );
-#ifdef CONFIG_SBNI_MULTILINE
-       netif_trans_update(nl->master);
-#else
-       netif_trans_update(dev);
-#endif
-}
-
-
-static void
-drop_xmit_queue( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( nl->tx_buf_p ) {
-               dev_kfree_skb_any( nl->tx_buf_p );
-               nl->tx_buf_p = NULL;
-#ifdef CONFIG_SBNI_MULTILINE
-               nl->master->stats.tx_errors++;
-               nl->master->stats.tx_carrier_errors++;
-#else
-               dev->stats.tx_errors++;
-               dev->stats.tx_carrier_errors++;
-#endif
-       }
-
-       nl->tx_frameno  = 0;
-       nl->framelen    = 0;
-       nl->outpos      = 0;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-#ifdef CONFIG_SBNI_MULTILINE
-       netif_start_queue( nl->master );
-       netif_trans_update(nl->master);
-#else
-       netif_start_queue( dev );
-       netif_trans_update(dev);
-#endif
-}
-
-
-static void
-send_frame_header( struct net_device  *dev,  u32  *crc_p )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-
-       u32  crc = *crc_p;
-       u32  len_field = nl->framelen + 6;      /* CRC + frameno + reserved */
-       u8   value;
-
-       if( nl->state & FL_NEED_RESEND )
-               len_field |= FRAME_RETRY;       /* non-first attempt... */
-
-       if( nl->outpos == 0 )
-               len_field |= FRAME_FIRST;
-
-       len_field |= (nl->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD;
-       outb( SBNI_SIG, dev->base_addr + DAT );
-
-       value = (u8) len_field;
-       outb( value, dev->base_addr + DAT );
-       crc = CRC32( value, crc );
-       value = (u8) (len_field >> 8);
-       outb( value, dev->base_addr + DAT );
-       crc = CRC32( value, crc );
-
-       outb( nl->tx_frameno, dev->base_addr + DAT );
-       crc = CRC32( nl->tx_frameno, crc );
-       outb( 0, dev->base_addr + DAT );
-       crc = CRC32( 0, crc );
-       *crc_p = crc;
-}
-
-
-/*
- * if frame tail not needed (incorrect number or received twice),
- * it won't store, but CRC will be calculated
- */
-
-static int
-skip_tail( unsigned int  ioaddr,  unsigned int  tail_len,  u32 crc )
-{
-       while( tail_len-- )
-               crc = CRC32( inb( ioaddr + DAT ), crc );
-
-       return  crc == CRC32_REMAINDER;
-}
-
-
-/*
- * Preliminary checks if frame header is correct, calculates its CRC
- * and split it to simple fields
- */
-
-static int
-check_fhdr( u32  ioaddr,  u32  *framelen,  u32  *frameno,  u32  *ack,
-           u32  *is_first,  u32  *crc_p )
-{
-       u32  crc = *crc_p;
-       u8   value;
-
-       if( inb( ioaddr + DAT ) != SBNI_SIG )
-               return  0;
-
-       value = inb( ioaddr + DAT );
-       *framelen = (u32)value;
-       crc = CRC32( value, crc );
-       value = inb( ioaddr + DAT );
-       *framelen |= ((u32)value) << 8;
-       crc = CRC32( value, crc );
-
-       *ack = *framelen & FRAME_ACK_MASK;
-       *is_first = (*framelen & FRAME_FIRST) != 0;
-
-       if( (*framelen &= FRAME_LEN_MASK) < 6 ||
-           *framelen > SBNI_MAX_FRAME - 3 )
-               return  0;
-
-       value = inb( ioaddr + DAT );
-       *frameno = (u32)value;
-       crc = CRC32( value, crc );
-
-       crc = CRC32( inb( ioaddr + DAT ), crc );        /* reserved byte */
-       *framelen -= 2;
-
-       *crc_p = crc;
-       return  1;
-}
-
-
-static struct sk_buff *
-get_rx_buf( struct net_device  *dev )
-{
-       /* +2 is to compensate for the alignment fixup below */
-       struct sk_buff  *skb = dev_alloc_skb( ETHER_MAX_LEN + 2 );
-       if( !skb )
-               return  NULL;
-
-       skb_reserve( skb, 2 );          /* Align IP on longword boundaries */
-       return  skb;
-}
-
-
-static void
-indicate_pkt( struct net_device  *dev )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-       struct sk_buff    *skb = nl->rx_buf_p;
-
-       skb_put( skb, nl->inppos );
-
-#ifdef CONFIG_SBNI_MULTILINE
-       skb->protocol = eth_type_trans( skb, nl->master );
-       netif_rx( skb );
-       ++nl->master->stats.rx_packets;
-       nl->master->stats.rx_bytes += nl->inppos;
-#else
-       skb->protocol = eth_type_trans( skb, dev );
-       netif_rx( skb );
-       ++dev->stats.rx_packets;
-       dev->stats.rx_bytes += nl->inppos;
-#endif
-       nl->rx_buf_p = NULL;    /* protocol driver will clear this sk_buff */
-}
-
-
-/* -------------------------------------------------------------------------- */
-
-/*
- * Routine checks periodically wire activity and regenerates marker if
- * connect was inactive for a long time.
- */
-
-static void
-sbni_watchdog(struct timer_list *t)
-{
-       struct net_local   *nl  = from_timer(nl, t, watchdog);
-       struct net_device  *dev = nl->watchdog_dev;
-       unsigned long      flags;
-       unsigned char      csr0;
-
-       spin_lock_irqsave( &nl->lock, flags );
-
-       csr0 = inb( dev->base_addr + CSR0 );
-       if( csr0 & RC_CHK ) {
-
-               if( nl->timer_ticks ) {
-                       if( csr0 & (RC_RDY | BU_EMP) )
-                               /* receiving not active */
-                               nl->timer_ticks--;
-               } else {
-                       nl->in_stats.timeout_number++;
-                       if( nl->delta_rxl )
-                               timeout_change_level( dev );
-
-                       outb( *(u_char *)&nl->csr1 | PR_RES,
-                             dev->base_addr + CSR1 );
-                       csr0 = inb( dev->base_addr + CSR0 );
-               }
-       } else
-               nl->state &= ~FL_LINE_DOWN;
-
-       outb( csr0 | RC_CHK, dev->base_addr + CSR0 ); 
-
-       mod_timer(t, jiffies + SBNI_TIMEOUT);
-
-       spin_unlock_irqrestore( &nl->lock, flags );
-}
-
-
-static unsigned char  rxl_tab[] = {
-       0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
-       0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f
-};
-
-#define SIZE_OF_TIMEOUT_RXL_TAB 4
-static unsigned char  timeout_rxl_tab[] = {
-       0x03, 0x05, 0x08, 0x0b
-};
-
-/* -------------------------------------------------------------------------- */
-
-static void
-card_start( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       nl->timer_ticks = CHANGE_LEVEL_START_TICKS;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-       nl->state |= FL_PREV_OK;
-
-       nl->inppos = nl->outpos = 0;
-       nl->wait_frameno = 0;
-       nl->tx_frameno   = 0;
-       nl->framelen     = 0;
-
-       outb( *(u_char *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 );
-       outb( EN_INT, dev->base_addr + CSR0 );
-}
-
-/* -------------------------------------------------------------------------- */
-
-/* Receive level auto-selection */
-
-static void
-change_level( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( nl->delta_rxl == 0 )        /* do not auto-negotiate RxL */
-               return;
-
-       if( nl->cur_rxl_index == 0 )
-               nl->delta_rxl = 1;
-       else if( nl->cur_rxl_index == 15 )
-               nl->delta_rxl = -1;
-       else if( nl->cur_rxl_rcvd < nl->prev_rxl_rcvd )
-               nl->delta_rxl = -nl->delta_rxl;
-
-       nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index += nl->delta_rxl ];
-       inb( dev->base_addr + CSR0 );   /* needs for PCI cards */
-       outb( *(u8 *)&nl->csr1, dev->base_addr + CSR1 );
-
-       nl->prev_rxl_rcvd = nl->cur_rxl_rcvd;
-       nl->cur_rxl_rcvd  = 0;
-}
-
-
-static void
-timeout_change_level( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       nl->cur_rxl_index = timeout_rxl_tab[ nl->timeout_rxl ];
-       if( ++nl->timeout_rxl >= 4 )
-               nl->timeout_rxl = 0;
-
-       nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
-       inb( dev->base_addr + CSR0 );
-       outb( *(unsigned char *)&nl->csr1, dev->base_addr + CSR1 );
-
-       nl->prev_rxl_rcvd = nl->cur_rxl_rcvd;
-       nl->cur_rxl_rcvd  = 0;
-}
-
-/* -------------------------------------------------------------------------- */
-
-/*
- *     Open/initialize the board. 
- */
-
-static int
-sbni_open( struct net_device  *dev )
-{
-       struct net_local        *nl = netdev_priv(dev);
-       struct timer_list       *w  = &nl->watchdog;
-
-       /*
-        * For double ISA adapters within "common irq" mode, we have to
-        * determine whether primary or secondary channel is initialized,
-        * and set the irq handler only in first case.
-        */
-       if( dev->base_addr < 0x400 ) {          /* ISA only */
-               struct net_device  **p = sbni_cards;
-               for( ;  *p  &&  p < sbni_cards + SBNI_MAX_NUM_CARDS;  ++p )
-                       if( (*p)->irq == dev->irq &&
-                           ((*p)->base_addr == dev->base_addr + 4 ||
-                            (*p)->base_addr == dev->base_addr - 4) &&
-                           (*p)->flags & IFF_UP ) {
-
-                               ((struct net_local *) (netdev_priv(*p)))
-                                       ->second = dev;
-                               netdev_notice(dev, "using shared irq with %s\n",
-                                             (*p)->name);
-                               nl->state |= FL_SECONDARY;
-                               goto  handler_attached;
-                       }
-       }
-
-       if( request_irq(dev->irq, sbni_interrupt, IRQF_SHARED, dev->name, dev) ) {
-               netdev_err(dev, "unable to get IRQ %d\n", dev->irq);
-               return  -EAGAIN;
-       }
-
-handler_attached:
-
-       spin_lock( &nl->lock );
-       memset( &dev->stats, 0, sizeof(struct net_device_stats) );
-       memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) );
-
-       card_start( dev );
-
-       netif_start_queue( dev );
-
-       /* set timer watchdog */
-       nl->watchdog_dev = dev;
-       timer_setup(w, sbni_watchdog, 0);
-       w->expires      = jiffies + SBNI_TIMEOUT;
-       add_timer( w );
-   
-       spin_unlock( &nl->lock );
-       return 0;
-}
-
-
-static int
-sbni_close( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( nl->second  &&  nl->second->flags & IFF_UP ) {
-               netdev_notice(dev, "Secondary channel (%s) is active!\n",
-                             nl->second->name);
-               return  -EBUSY;
-       }
-
-#ifdef CONFIG_SBNI_MULTILINE
-       if( nl->state & FL_SLAVE )
-               emancipate( dev );
-       else
-               while( nl->link )       /* it's master device! */
-                       emancipate( nl->link );
-#endif
-
-       spin_lock( &nl->lock );
-
-       nl->second = NULL;
-       drop_xmit_queue( dev ); 
-       netif_stop_queue( dev );
-   
-       del_timer( &nl->watchdog );
-
-       outb( 0, dev->base_addr + CSR0 );
-
-       if( !(nl->state & FL_SECONDARY) )
-               free_irq( dev->irq, dev );
-       nl->state &= FL_SECONDARY;
-
-       spin_unlock( &nl->lock );
-       return 0;
-}
-
-
-/*
-       Valid combinations in CSR0 (for probing):
-
-       VALID_DECODER   0000,0011,1011,1010
-
-                                       ; 0   ; -
-                               TR_REQ  ; 1   ; +
-                       TR_RDY          ; 2   ; -
-                       TR_RDY  TR_REQ  ; 3   ; +
-               BU_EMP                  ; 4   ; +
-               BU_EMP          TR_REQ  ; 5   ; +
-               BU_EMP  TR_RDY          ; 6   ; -
-               BU_EMP  TR_RDY  TR_REQ  ; 7   ; +
-       RC_RDY                          ; 8   ; +
-       RC_RDY                  TR_REQ  ; 9   ; +
-       RC_RDY          TR_RDY          ; 10  ; -
-       RC_RDY          TR_RDY  TR_REQ  ; 11  ; -
-       RC_RDY  BU_EMP                  ; 12  ; -
-       RC_RDY  BU_EMP          TR_REQ  ; 13  ; -
-       RC_RDY  BU_EMP  TR_RDY          ; 14  ; -
-       RC_RDY  BU_EMP  TR_RDY  TR_REQ  ; 15  ; -
-*/
-
-#define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200)
-
-
-static int
-sbni_card_probe( unsigned long  ioaddr )
-{
-       unsigned char  csr0;
-
-       csr0 = inb( ioaddr + CSR0 );
-       if( csr0 != 0xff  &&  csr0 != 0x00 ) {
-               csr0 &= ~EN_INT;
-               if( csr0 & BU_EMP )
-                       csr0 |= EN_INT;
-      
-               if( VALID_DECODER & (1 << (csr0 >> 4)) )
-                       return  0;
-       }
-   
-       return  -ENODEV;
-}
-
-/* -------------------------------------------------------------------------- */
-
-static int
-sbni_ioctl( struct net_device  *dev,  struct ifreq  *ifr,  int  cmd )
-{
-       struct net_local  *nl = netdev_priv(dev);
-       struct sbni_flags  flags;
-       int  error = 0;
-
-#ifdef CONFIG_SBNI_MULTILINE
-       struct net_device  *slave_dev;
-       char  slave_name[ 8 ];
-#endif
-  
-       switch( cmd ) {
-       case  SIOCDEVGETINSTATS :
-               if (copy_to_user( ifr->ifr_data, &nl->in_stats,
-                                       sizeof(struct sbni_in_stats) ))
-                       error = -EFAULT;
-               break;
-
-       case  SIOCDEVRESINSTATS :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-               memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) );
-               break;
-
-       case  SIOCDEVGHWSTATE :
-               flags.mac_addr  = *(u32 *)(dev->dev_addr + 3);
-               flags.rate      = nl->csr1.rate;
-               flags.slow_mode = (nl->state & FL_SLOW_MODE) != 0;
-               flags.rxl       = nl->cur_rxl_index;
-               flags.fixed_rxl = nl->delta_rxl == 0;
-
-               if (copy_to_user( ifr->ifr_data, &flags, sizeof flags ))
-                       error = -EFAULT;
-               break;
-
-       case  SIOCDEVSHWSTATE :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-
-               spin_lock( &nl->lock );
-               flags = *(struct sbni_flags*) &ifr->ifr_ifru;
-               if( flags.fixed_rxl ) {
-                       nl->delta_rxl = 0;
-                       nl->cur_rxl_index = flags.rxl;
-               } else {
-                       nl->delta_rxl = DEF_RXL_DELTA;
-                       nl->cur_rxl_index = DEF_RXL;
-               }
-
-               nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
-               nl->csr1.rate = flags.rate;
-               outb( *(u8 *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 );
-               spin_unlock( &nl->lock );
-               break;
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-       case  SIOCDEVENSLAVE :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-
-               if (copy_from_user( slave_name, ifr->ifr_data, sizeof slave_name ))
-                       return -EFAULT;
-               slave_dev = dev_get_by_name(&init_net, slave_name );
-               if( !slave_dev  ||  !(slave_dev->flags & IFF_UP) ) {
-                       netdev_err(dev, "trying to enslave non-active device %s\n",
-                                  slave_name);
-                       if (slave_dev)
-                               dev_put(slave_dev);
-                       return  -EPERM;
-               }
-
-               return  enslave( dev, slave_dev );
-
-       case  SIOCDEVEMANSIPATE :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-
-               return  emancipate( dev );
-
-#endif /* CONFIG_SBNI_MULTILINE */
-
-       default :
-               return  -EOPNOTSUPP;
-       }
-
-       return  error;
-}
-
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-static int
-enslave( struct net_device  *dev,  struct net_device  *slave_dev )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-       struct net_local  *snl = netdev_priv(slave_dev);
-
-       if( nl->state & FL_SLAVE )      /* This isn't master or free device */
-               return  -EBUSY;
-
-       if( snl->state & FL_SLAVE )     /* That was already enslaved */
-               return  -EBUSY;
-
-       spin_lock( &nl->lock );
-       spin_lock( &snl->lock );
-
-       /* append to list */
-       snl->link = nl->link;
-       nl->link  = slave_dev;
-       snl->master = dev;
-       snl->state |= FL_SLAVE;
-
-       /* Summary statistics of MultiLine operation will be stored
-          in master's counters */
-       memset( &slave_dev->stats, 0, sizeof(struct net_device_stats) );
-       netif_stop_queue( slave_dev );
-       netif_wake_queue( dev );        /* Now we are able to transmit */
-
-       spin_unlock( &snl->lock );
-       spin_unlock( &nl->lock );
-       netdev_notice(dev, "slave device (%s) attached\n", slave_dev->name);
-       return  0;
-}
-
-
-static int
-emancipate( struct net_device  *dev )
-{
-       struct net_local   *snl = netdev_priv(dev);
-       struct net_device  *p   = snl->master;
-       struct net_local   *nl  = netdev_priv(p);
-
-       if( !(snl->state & FL_SLAVE) )
-               return  -EINVAL;
-
-       spin_lock( &nl->lock );
-       spin_lock( &snl->lock );
-       drop_xmit_queue( dev );
-
-       /* exclude from list */
-       for(;;) {       /* must be in list */
-               struct net_local  *t = netdev_priv(p);
-               if( t->link == dev ) {
-                       t->link = snl->link;
-                       break;
-               }
-               p = t->link;
-       }
-
-       snl->link = NULL;
-       snl->master = dev;
-       snl->state &= ~FL_SLAVE;
-
-       netif_start_queue( dev );
-
-       spin_unlock( &snl->lock );
-       spin_unlock( &nl->lock );
-
-       dev_put( dev );
-       return  0;
-}
-
-#endif
-
-static void
-set_multicast_list( struct net_device  *dev )
-{
-       return;         /* sbni always operate in promiscuos mode */
-}
-
-
-#ifdef MODULE
-module_param_hw_array(io, int, ioport, NULL, 0);
-module_param_hw_array(irq, int, irq, NULL, 0);
-module_param_array(baud, int, NULL, 0);
-module_param_array(rxl, int, NULL, 0);
-module_param_array(mac, int, NULL, 0);
-module_param(skip_pci_probe, bool, 0);
-
-MODULE_LICENSE("GPL");
-
-
-int __init init_module( void )
-{
-       struct net_device  *dev;
-       int err;
-
-       while( num < SBNI_MAX_NUM_CARDS ) {
-               dev = alloc_netdev(sizeof(struct net_local), "sbni%d",
-                                  NET_NAME_UNKNOWN, sbni_devsetup);
-               if( !dev)
-                       break;
-
-               sprintf( dev->name, "sbni%d", num );
-
-               err = sbni_init(dev);
-               if (err) {
-                       free_netdev(dev);
-                       break;
-               }
-
-               if( register_netdev( dev ) ) {
-                       release_region( dev->base_addr, SBNI_IO_EXTENT );
-                       free_netdev( dev );
-                       break;
-               }
-       }
-
-       return  *sbni_cards  ?  0  :  -ENODEV;
-}
-
-void
-cleanup_module(void)
-{
-       int i;
-
-       for (i = 0;  i < SBNI_MAX_NUM_CARDS;  ++i) {
-               struct net_device *dev = sbni_cards[i];
-               if (dev != NULL) {
-                       unregister_netdev(dev);
-                       release_region(dev->base_addr, SBNI_IO_EXTENT);
-                       free_netdev(dev);
-               }
-       }
-}
-
-#else  /* MODULE */
-
-static int __init
-sbni_setup( char  *p )
-{
-       int  n, parm;
-
-       if( *p++ != '(' )
-               goto  bad_param;
-
-       for( n = 0, parm = 0;  *p  &&  n < 8; ) {
-               (*dest[ parm ])[ n ] = simple_strtoul( p, &p, 0 );
-               if( !*p  ||  *p == ')' )
-                       return 1;
-               if( *p == ';' ) {
-                       ++p;
-                       ++n;
-                       parm = 0;
-               } else if( *p++ != ',' ) {
-                       break;
-               } else {
-                       if( ++parm >= 5 )
-                               break;
-               }
-       }
-bad_param:
-       pr_err("Error in sbni kernel parameter!\n");
-       return 0;
-}
-
-__setup( "sbni=", sbni_setup );
-
-#endif /* MODULE */
-
-/* -------------------------------------------------------------------------- */
-
-static u32
-calc_crc32( u32  crc,  u8  *p,  u32  len )
-{
-       while( len-- )
-               crc = CRC32( *p++, crc );
-
-       return  crc;
-}
-
-static u32  crc32tab[] __attribute__ ((aligned(8))) = {
-       0xD202EF8D,  0xA505DF1B,  0x3C0C8EA1,  0x4B0BBE37,
-       0xD56F2B94,  0xA2681B02,  0x3B614AB8,  0x4C667A2E,
-       0xDCD967BF,  0xABDE5729,  0x32D70693,  0x45D03605,
-       0xDBB4A3A6,  0xACB39330,  0x35BAC28A,  0x42BDF21C,
-       0xCFB5FFE9,  0xB8B2CF7F,  0x21BB9EC5,  0x56BCAE53,
-       0xC8D83BF0,  0xBFDF0B66,  0x26D65ADC,  0x51D16A4A,
-       0xC16E77DB,  0xB669474D,  0x2F6016F7,  0x58672661,
-       0xC603B3C2,  0xB1048354,  0x280DD2EE,  0x5F0AE278,
-       0xE96CCF45,  0x9E6BFFD3,  0x0762AE69,  0x70659EFF,
-       0xEE010B5C,  0x99063BCA,  0x000F6A70,  0x77085AE6,
-       0xE7B74777,  0x90B077E1,  0x09B9265B,  0x7EBE16CD,
-       0xE0DA836E,  0x97DDB3F8,  0x0ED4E242,  0x79D3D2D4,
-       0xF4DBDF21,  0x83DCEFB7,  0x1AD5BE0D,  0x6DD28E9B,
-       0xF3B61B38,  0x84B12BAE,  0x1DB87A14,  0x6ABF4A82,
-       0xFA005713,  0x8D076785,  0x140E363F,  0x630906A9,
-       0xFD6D930A,  0x8A6AA39C,  0x1363F226,  0x6464C2B0,
-       0xA4DEAE1D,  0xD3D99E8B,  0x4AD0CF31,  0x3DD7FFA7,
-       0xA3B36A04,  0xD4B45A92,  0x4DBD0B28,  0x3ABA3BBE,
-       0xAA05262F,  0xDD0216B9,  0x440B4703,  0x330C7795,
-       0xAD68E236,  0xDA6FD2A0,  0x4366831A,  0x3461B38C,
-       0xB969BE79,  0xCE6E8EEF,  0x5767DF55,  0x2060EFC3,
-       0xBE047A60,  0xC9034AF6,  0x500A1B4C,  0x270D2BDA,
-       0xB7B2364B,  0xC0B506DD,  0x59BC5767,  0x2EBB67F1,
-       0xB0DFF252,  0xC7D8C2C4,  0x5ED1937E,  0x29D6A3E8,
-       0x9FB08ED5,  0xE8B7BE43,  0x71BEEFF9,  0x06B9DF6F,
-       0x98DD4ACC,  0xEFDA7A5A,  0x76D32BE0,  0x01D41B76,
-       0x916B06E7,  0xE66C3671,  0x7F6567CB,  0x0862575D,
-       0x9606C2FE,  0xE101F268,  0x7808A3D2,  0x0F0F9344,
-       0x82079EB1,  0xF500AE27,  0x6C09FF9D,  0x1B0ECF0B,
-       0x856A5AA8,  0xF26D6A3E,  0x6B643B84,  0x1C630B12,
-       0x8CDC1683,  0xFBDB2615,  0x62D277AF,  0x15D54739,
-       0x8BB1D29A,  0xFCB6E20C,  0x65BFB3B6,  0x12B88320,
-       0x3FBA6CAD,  0x48BD5C3B,  0xD1B40D81,  0xA6B33D17,
-       0x38D7A8B4,  0x4FD09822,  0xD6D9C998,  0xA1DEF90E,
-       0x3161E49F,  0x4666D409,  0xDF6F85B3,  0xA868B525,
-       0x360C2086,  0x410B1010,  0xD80241AA,  0xAF05713C,
-       0x220D7CC9,  0x550A4C5F,  0xCC031DE5,  0xBB042D73,
-       0x2560B8D0,  0x52678846,  0xCB6ED9FC,  0xBC69E96A,
-       0x2CD6F4FB,  0x5BD1C46D,  0xC2D895D7,  0xB5DFA541,
-       0x2BBB30E2,  0x5CBC0074,  0xC5B551CE,  0xB2B26158,
-       0x04D44C65,  0x73D37CF3,  0xEADA2D49,  0x9DDD1DDF,
-       0x03B9887C,  0x74BEB8EA,  0xEDB7E950,  0x9AB0D9C6,
-       0x0A0FC457,  0x7D08F4C1,  0xE401A57B,  0x930695ED,
-       0x0D62004E,  0x7A6530D8,  0xE36C6162,  0x946B51F4,
-       0x19635C01,  0x6E646C97,  0xF76D3D2D,  0x806A0DBB,
-       0x1E0E9818,  0x6909A88E,  0xF000F934,  0x8707C9A2,
-       0x17B8D433,  0x60BFE4A5,  0xF9B6B51F,  0x8EB18589,
-       0x10D5102A,  0x67D220BC,  0xFEDB7106,  0x89DC4190,
-       0x49662D3D,  0x3E611DAB,  0xA7684C11,  0xD06F7C87,
-       0x4E0BE924,  0x390CD9B2,  0xA0058808,  0xD702B89E,
-       0x47BDA50F,  0x30BA9599,  0xA9B3C423,  0xDEB4F4B5,
-       0x40D06116,  0x37D75180,  0xAEDE003A,  0xD9D930AC,
-       0x54D13D59,  0x23D60DCF,  0xBADF5C75,  0xCDD86CE3,
-       0x53BCF940,  0x24BBC9D6,  0xBDB2986C,  0xCAB5A8FA,
-       0x5A0AB56B,  0x2D0D85FD,  0xB404D447,  0xC303E4D1,
-       0x5D677172,  0x2A6041E4,  0xB369105E,  0xC46E20C8,
-       0x72080DF5,  0x050F3D63,  0x9C066CD9,  0xEB015C4F,
-       0x7565C9EC,  0x0262F97A,  0x9B6BA8C0,  0xEC6C9856,
-       0x7CD385C7,  0x0BD4B551,  0x92DDE4EB,  0xE5DAD47D,
-       0x7BBE41DE,  0x0CB97148,  0x95B020F2,  0xE2B71064,
-       0x6FBF1D91,  0x18B82D07,  0x81B17CBD,  0xF6B64C2B,
-       0x68D2D988,  0x1FD5E91E,  0x86DCB8A4,  0xF1DB8832,
-       0x616495A3,  0x1663A535,  0x8F6AF48F,  0xF86DC419,
-       0x660951BA,  0x110E612C,  0x88073096,  0xFF000000
-};
-
diff --git a/drivers/net/wan/sbni.h b/drivers/net/wan/sbni.h
deleted file mode 100644 (file)
index 8426451..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-/* sbni.h:  definitions for a Granch SBNI12 driver, version 5.0.0
- * Written 2001 Denis I.Timofeev (timofeev@granch.ru)
- * This file is distributed under the GNU GPL
- */
-
-#ifndef SBNI_H
-#define SBNI_H
-
-#ifdef SBNI_DEBUG
-#define DP( A ) A
-#else
-#define DP( A )
-#endif
-
-
-/* We don't have official vendor id yet... */
-#define SBNI_PCI_VENDOR        0x55 
-#define SBNI_PCI_DEVICE        0x9f
-
-#define ISA_MODE 0x00
-#define PCI_MODE 0x01
-
-#define        SBNI_IO_EXTENT  4
-
-enum sbni_reg {
-       CSR0 = 0,
-       CSR1 = 1,
-       DAT  = 2
-};
-
-/* CSR0 mapping */
-enum {
-       BU_EMP = 0x02,
-       RC_CHK = 0x04,
-       CT_ZER = 0x08,
-       TR_REQ = 0x10,
-       TR_RDY = 0x20,
-       EN_INT = 0x40,
-       RC_RDY = 0x80
-};
-
-
-/* CSR1 mapping */
-#define PR_RES 0x80
-
-struct sbni_csr1 {
-#ifdef __LITTLE_ENDIAN_BITFIELD
-       u8 rxl  : 5;
-       u8 rate : 2;
-       u8      : 1;
-#else
-       u8      : 1;
-       u8 rate : 2;
-       u8 rxl  : 5;
-#endif
-};
-
-/* fields in frame header */
-#define FRAME_ACK_MASK  (unsigned short)0x7000
-#define FRAME_LEN_MASK  (unsigned short)0x03FF
-#define FRAME_FIRST     (unsigned short)0x8000
-#define FRAME_RETRY     (unsigned short)0x0800
-
-#define FRAME_SENT_BAD  (unsigned short)0x4000
-#define FRAME_SENT_OK   (unsigned short)0x3000
-
-
-/* state flags */
-enum {
-       FL_WAIT_ACK    = 0x01,
-       FL_NEED_RESEND = 0x02,
-       FL_PREV_OK     = 0x04,
-       FL_SLOW_MODE   = 0x08,
-       FL_SECONDARY   = 0x10,
-#ifdef CONFIG_SBNI_MULTILINE
-       FL_SLAVE       = 0x20,
-#endif
-       FL_LINE_DOWN   = 0x40
-};
-
-
-enum {
-       DEFAULT_IOBASEADDR = 0x210,
-       DEFAULT_INTERRUPTNUMBER = 5,
-       DEFAULT_RATE = 0,
-       DEFAULT_FRAME_LEN = 1012
-};
-
-#define DEF_RXL_DELTA  -1
-#define DEF_RXL                0xf
-
-#define SBNI_SIG 0x5a
-
-#define        SBNI_MIN_LEN    60      /* Shortest Ethernet frame without FCS */
-#define SBNI_MAX_FRAME 1023
-#define ETHER_MAX_LEN  1518
-
-#define SBNI_TIMEOUT   (HZ/10)
-
-#define TR_ERROR_COUNT 32
-#define CHANGE_LEVEL_START_TICKS 4
-
-#define SBNI_MAX_NUM_CARDS     16
-
-/* internal SBNI-specific statistics */
-struct sbni_in_stats {
-       u32     all_rx_number;
-       u32     bad_rx_number;
-       u32     timeout_number;
-       u32     all_tx_number;
-       u32     resend_tx_number;
-};
-
-/* SBNI ioctl params */
-#define SIOCDEVGETINSTATS      SIOCDEVPRIVATE
-#define SIOCDEVRESINSTATS      SIOCDEVPRIVATE+1
-#define SIOCDEVGHWSTATE        SIOCDEVPRIVATE+2
-#define SIOCDEVSHWSTATE        SIOCDEVPRIVATE+3
-#define SIOCDEVENSLAVE         SIOCDEVPRIVATE+4
-#define SIOCDEVEMANSIPATE      SIOCDEVPRIVATE+5
-
-
-/* data packet for SIOCDEVGHWSTATE/SIOCDEVSHWSTATE ioctl requests */
-struct sbni_flags {
-       u32     rxl             : 4;
-       u32     rate            : 2;
-       u32     fixed_rxl       : 1;
-       u32     slow_mode       : 1;
-       u32     mac_addr        : 24;
-};
-
-/*
- * CRC-32 stuff
- */
-#define CRC32(c,crc) (crc32tab[((size_t)(crc) ^ (c)) & 0xff] ^ (((crc) >> 8) & 0x00FFFFFF))
-      /* CRC generator 0xEDB88320 */
-      /* CRC remainder 0x2144DF1C */
-      /* CRC initial value 0x00000000 */
-#define CRC32_REMAINDER 0x2144DF1C
-#define CRC32_INITIAL 0x00000000
-
-#ifndef __initdata
-#define __initdata
-#endif
-
-#endif
-
index 4403e21..eddd20a 100644 (file)
@@ -124,14 +124,6 @@ static int sealevel_close(struct net_device *d)
        return 0;
 }
 
-static int sealevel_ioctl(struct net_device *d, struct ifreq *ifr, int cmd)
-{
-       /* struct slvl_device *slvl=dev_to_chan(d);
-        * z8530_ioctl(d,&slvl->sync.chanA,ifr,cmd)
-        */
-       return hdlc_ioctl(d, ifr, cmd);
-}
-
 /*     Passed network frames, fire them downwind. */
 
 static netdev_tx_t sealevel_queue_xmit(struct sk_buff *skb,
@@ -152,7 +144,7 @@ static const struct net_device_ops sealevel_ops = {
        .ndo_open       = sealevel_open,
        .ndo_stop       = sealevel_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = sealevel_ioctl,
+       .ndo_siocwandev = hdlc_ioctl,
 };
 
 static int slvl_setup(struct slvl_device *sv, int iobase, int irq)
index f22e484..5a9e262 100644 (file)
@@ -343,20 +343,17 @@ static int wanxl_attach(struct net_device *dev, unsigned short encoding,
        return 0;
 }
 
-static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int wanxl_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
        const size_t size = sizeof(sync_serial_settings);
        sync_serial_settings line;
        struct port *port = dev_to_port(dev);
 
-       if (cmd != SIOCWANDEV)
-               return hdlc_ioctl(dev, ifr, cmd);
-
-       switch (ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_IFACE:
-               ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_IFACE_SYNC_SERIAL;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
                memset(&line, 0, sizeof(line));
@@ -364,7 +361,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                line.clock_rate = 0;
                line.loopback = 0;
 
-               if (copy_to_user(ifr->ifr_settings.ifs_ifsu.sync, &line, size))
+               if (copy_to_user(ifs->ifs_ifsu.sync, &line, size))
                        return -EFAULT;
                return 0;
 
@@ -374,7 +371,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                if (dev->flags & IFF_UP)
                        return -EBUSY;
 
-               if (copy_from_user(&line, ifr->ifr_settings.ifs_ifsu.sync,
+               if (copy_from_user(&line, ifs->ifs_ifsu.sync,
                                   size))
                        return -EFAULT;
 
@@ -389,7 +386,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                return 0;
 
        default:
-               return hdlc_ioctl(dev, ifr, cmd);
+               return hdlc_ioctl(dev, ifs);
        }
 }
 
@@ -545,7 +542,7 @@ static const struct net_device_ops wanxl_ops = {
        .ndo_open       = wanxl_open,
        .ndo_stop       = wanxl_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = wanxl_ioctl,
+       .ndo_siocwandev = wanxl_ioctl,
        .ndo_get_stats  = wanxl_get_stats,
 };
 
index 71878ab..4d4e2f9 100644 (file)
@@ -3393,19 +3393,12 @@ static int ath10k_pci_claim(struct ath10k *ar)
        }
 
        /* Target expects 32 bit DMA. Enforce it. */
-       ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+       ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
        if (ret) {
                ath10k_err(ar, "failed to set dma mask to 32-bit: %d\n", ret);
                goto err_region;
        }
 
-       ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (ret) {
-               ath10k_err(ar, "failed to set consistent dma mask to 32-bit: %d\n",
-                          ret);
-               goto err_region;
-       }
-
        pci_set_master(pdev);
 
        /* Arrange for access to Target SoC registers. */
index 603d2f9..9a22481 100644 (file)
@@ -1406,11 +1406,6 @@ ath11k_update_per_peer_tx_stats(struct ath11k *ar,
         * Firmware rate's control to be skipped for this?
         */
 
-       if (flags == WMI_RATE_PREAMBLE_HE && mcs > 11) {
-               ath11k_warn(ab, "Invalid HE mcs %d peer stats",  mcs);
-               return;
-       }
-
        if (flags == WMI_RATE_PREAMBLE_HE && mcs > ATH11K_HE_MCS_MAX) {
                ath11k_warn(ab, "Invalid HE mcs %d peer stats",  mcs);
                return;
index 646ad79..5abb38c 100644 (file)
@@ -933,20 +933,14 @@ static int ath11k_pci_claim(struct ath11k_pci *ab_pci, struct pci_dev *pdev)
                goto disable_device;
        }
 
-       ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(ATH11K_PCI_DMA_MASK));
+       ret = dma_set_mask_and_coherent(&pdev->dev,
+                                       DMA_BIT_MASK(ATH11K_PCI_DMA_MASK));
        if (ret) {
                ath11k_err(ab, "failed to set pci dma mask to %d: %d\n",
                           ATH11K_PCI_DMA_MASK, ret);
                goto release_region;
        }
 
-       ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(ATH11K_PCI_DMA_MASK));
-       if (ret) {
-               ath11k_err(ab, "failed to set pci consistent dma mask to %d: %d\n",
-                          ATH11K_PCI_DMA_MASK, ret);
-               goto release_region;
-       }
-
        pci_set_master(pdev);
 
        ab->mem_len = pci_resource_len(pdev, ATH11K_PCI_BAR_NUM);
index 43b4ae8..86b8cb9 100644 (file)
@@ -191,7 +191,7 @@ ath5k_pci_probe(struct pci_dev *pdev,
        }
 
        /* XXX 32-bit addressing only */
-       ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+       ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
        if (ret) {
                dev_err(&pdev->dev, "32-bit DMA not available\n");
                goto err_dis;
index b137e7f..bd1ef63 100644 (file)
@@ -2504,8 +2504,10 @@ static int ath6kl_wmi_sync_point(struct wmi *wmi, u8 if_idx)
                goto free_data_skb;
 
        for (index = 0; index < num_pri_streams; index++) {
-               if (WARN_ON(!data_sync_bufs[index].skb))
+               if (WARN_ON(!data_sync_bufs[index].skb)) {
+                       ret = -ENOMEM;
                        goto free_data_skb;
+               }
 
                ep_id = ath6kl_ac2_endpoint_id(wmi->parent_dev,
                                               data_sync_bufs[index].
index b4885a7..b0a4ca3 100644 (file)
@@ -3351,7 +3351,8 @@ found:
                        "Found block at %x: code=%d ref=%d length=%d major=%d minor=%d\n",
                        cptr, code, reference, length, major, minor);
                if ((!AR_SREV_9485(ah) && length >= 1024) ||
-                   (AR_SREV_9485(ah) && length > EEPROM_DATA_LEN_9485)) {
+                   (AR_SREV_9485(ah) && length > EEPROM_DATA_LEN_9485) ||
+                   (length > cptr)) {
                        ath_dbg(common, EEPROM, "Skipping bad header\n");
                        cptr -= COMP_HDR_LEN;
                        continue;
index 2ca3b86..172081f 100644 (file)
@@ -1621,7 +1621,6 @@ static void ath9k_hw_apply_gpio_override(struct ath_hw *ah)
                ath9k_hw_gpio_request_out(ah, i, NULL,
                                          AR_GPIO_OUTPUT_MUX_AS_OUTPUT);
                ath9k_hw_set_gpio(ah, i, !!(ah->gpio_val & BIT(i)));
-               ath9k_hw_gpio_free(ah, i);
        }
 }
 
@@ -2728,14 +2727,17 @@ static void ath9k_hw_gpio_cfg_output_mux(struct ath_hw *ah, u32 gpio, u32 type)
 static void ath9k_hw_gpio_cfg_soc(struct ath_hw *ah, u32 gpio, bool out,
                                  const char *label)
 {
+       int err;
+
        if (ah->caps.gpio_requested & BIT(gpio))
                return;
 
-       /* may be requested by BSP, free anyway */
-       gpio_free(gpio);
-
-       if (gpio_request_one(gpio, out ? GPIOF_OUT_INIT_LOW : GPIOF_IN, label))
+       err = gpio_request_one(gpio, out ? GPIOF_OUT_INIT_LOW : GPIOF_IN, label);
+       if (err) {
+               ath_err(ath9k_hw_common(ah), "request GPIO%d failed:%d\n",
+                       gpio, err);
                return;
+       }
 
        ah->caps.gpio_requested |= BIT(gpio);
 }
index cff9af3..a074e23 100644 (file)
@@ -896,18 +896,12 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (pcim_enable_device(pdev))
                return -EIO;
 
-       ret =  pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+       ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
        if (ret) {
                pr_err("32-bit DMA not available\n");
                return ret;
        }
 
-       ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (ret) {
-               pr_err("32-bit DMA consistent DMA enable failed\n");
-               return ret;
-       }
-
        /*
         * Cache line size is used to size and align various
         * structures used to communicate with the hardware.
index d202f21..ec913ec 100644 (file)
@@ -408,13 +408,14 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
                wcn36xx_dbg(WCN36XX_DBG_MAC, "wcn36xx_config channel switch=%d\n",
                            ch);
 
-               if (wcn->sw_scan_opchannel == ch) {
+               if (wcn->sw_scan_opchannel == ch && wcn->sw_scan_channel) {
                        /* If channel is the initial operating channel, we may
                         * want to receive/transmit regular data packets, then
                         * simply stop the scan session and exit PS mode.
                         */
                        wcn36xx_smd_finish_scan(wcn, HAL_SYS_MODE_SCAN,
                                                wcn->sw_scan_vif);
+                       wcn->sw_scan_channel = 0;
                } else if (wcn->sw_scan) {
                        /* A scan is ongoing, do not change the operating
                         * channel, but start a scan session on the channel.
@@ -422,6 +423,7 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
                        wcn36xx_smd_init_scan(wcn, HAL_SYS_MODE_SCAN,
                                              wcn->sw_scan_vif);
                        wcn36xx_smd_start_scan(wcn, ch);
+                       wcn->sw_scan_channel = ch;
                } else {
                        wcn36xx_change_opchannel(wcn, ch);
                }
@@ -702,6 +704,7 @@ static void wcn36xx_sw_scan_start(struct ieee80211_hw *hw,
 
        wcn->sw_scan = true;
        wcn->sw_scan_vif = vif;
+       wcn->sw_scan_channel = 0;
        if (vif_priv->sta_assoc)
                wcn->sw_scan_opchannel = WCN36XX_HW_CHANNEL(wcn);
        else
@@ -1500,6 +1503,13 @@ static int wcn36xx_probe(struct platform_device *pdev)
                goto out_wq;
        }
 
+       wcn->nv_file = WLAN_NV_FILE;
+       ret = of_property_read_string(wcn->dev->parent->of_node, "firmware-name", &wcn->nv_file);
+       if (ret < 0 && ret != -EINVAL) {
+               wcn36xx_err("failed to read \"firmware-name\" property: %d\n", ret);
+               goto out_wq;
+       }
+
        wcn->smd_channel = qcom_wcnss_open_channel(wcnss, "WLAN_CTRL", wcn36xx_smd_rsp_process, hw);
        if (IS_ERR(wcn->smd_channel)) {
                wcn36xx_err("failed to open WLAN_CTRL channel\n");
index 0e3be17..57fa857 100644 (file)
@@ -504,10 +504,10 @@ int wcn36xx_smd_load_nv(struct wcn36xx *wcn)
        u16 fm_offset = 0;
 
        if (!wcn->nv) {
-               ret = request_firmware(&wcn->nv, WLAN_NV_FILE, wcn->dev);
+               ret = request_firmware(&wcn->nv, wcn->nv_file, wcn->dev);
                if (ret) {
                        wcn36xx_err("Failed to load nv file %s: %d\n",
-                                     WLAN_NV_FILE, ret);
+                                   wcn->nv_file, ret);
                        goto out;
                }
        }
index 1b83115..cab196b 100644 (file)
@@ -287,6 +287,10 @@ int wcn36xx_rx_skb(struct wcn36xx *wcn, struct sk_buff *skb)
                status.rate_idx = 0;
        }
 
+       if (ieee80211_is_beacon(hdr->frame_control) ||
+           ieee80211_is_probe_resp(hdr->frame_control))
+               status.boottime_ns = ktime_get_boottime_ns();
+
        memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
 
        if (ieee80211_is_beacon(hdr->frame_control)) {
index 6121d8a..add6e52 100644 (file)
@@ -199,6 +199,7 @@ struct wcn36xx {
        struct device           *dev;
        struct list_head        vif_list;
 
+       const char              *nv_file;
        const struct firmware   *nv;
 
        u8                      fw_revision;
@@ -246,6 +247,7 @@ struct wcn36xx {
        struct cfg80211_scan_request *scan_req;
        bool                    sw_scan;
        u8                      sw_scan_opchannel;
+       u8                      sw_scan_channel;
        struct ieee80211_vif    *sw_scan_vif;
        struct mutex            scan_lock;
        bool                    scan_aborted;
index e481674..29a9f17 100644 (file)
 
 #include "wil6210.h"
 
-static int wil_ethtoolops_get_coalesce(struct net_device *ndev,
-                                      struct ethtool_coalesce *cp)
+static int
+wil_ethtoolops_get_coalesce(struct net_device *ndev,
+                           struct ethtool_coalesce *cp,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct wil6210_priv *wil = ndev_to_wil(ndev);
        u32 tx_itr_en, tx_itr_val = 0;
@@ -45,8 +48,11 @@ out:
        return ret;
 }
 
-static int wil_ethtoolops_set_coalesce(struct net_device *ndev,
-                                      struct ethtool_coalesce *cp)
+static int
+wil_ethtoolops_set_coalesce(struct net_device *ndev,
+                           struct ethtool_coalesce *cp,
+                           struct kernel_ethtool_coalesce *kernel_coal,
+                           struct netlink_ext_ack *extack)
 {
        struct wil6210_priv *wil = ndev_to_wil(ndev);
        struct wireless_dev *wdev = ndev->ieee80211_ptr;
index 9b15bc3..13c1350 100644 (file)
@@ -23,7 +23,8 @@ brcmfmac-objs += \
                feature.o \
                btcoex.o \
                vendor.o \
-               pno.o
+               pno.o \
+               xtlv.o
 brcmfmac-$(CONFIG_BRCMFMAC_PROTO_BCDC) += \
                bcdc.o \
                fwsignal.o
index 633d0ab..ac02244 100644 (file)
@@ -128,7 +128,8 @@ int brcmf_sdiod_intr_register(struct brcmf_sdio_dev *sdiodev)
 
                if (sdiodev->bus_if->chip == BRCM_CC_43362_CHIP_ID) {
                        /* assign GPIO to SDIO core */
-                       addr = CORE_CC_REG(SI_ENUM_BASE, gpiocontrol);
+                       addr = brcmf_chip_enum_base(sdiodev->func1->device);
+                       addr = CORE_CC_REG(addr, gpiocontrol);
                        gpiocontrol = brcmf_sdiod_readl(sdiodev, addr, &ret);
                        gpiocontrol |= 0x2;
                        brcmf_sdiod_writel(sdiodev, addr, gpiocontrol, &ret);
@@ -990,6 +991,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
        BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4359),
        BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_CYPRESS_4373),
        BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_CYPRESS_43012),
+       BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752),
        BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_CYPRESS_89359),
        { /* end: all zeroes */ }
 };
index cedba56..f7b96cd 100644 (file)
@@ -1829,6 +1829,14 @@ brcmf_set_key_mgmt(struct net_device *ndev, struct cfg80211_connect_params *sme)
                                profile->use_fwsup = BRCMF_PROFILE_FWSUP_SAE;
                        }
                        break;
+               case WLAN_AKM_SUITE_FT_OVER_SAE:
+                       val = WPA3_AUTH_SAE_PSK | WPA2_AUTH_FT;
+                       profile->is_ft = true;
+                       if (sme->crypto.sae_pwd) {
+                               brcmf_dbg(INFO, "using SAE offload\n");
+                               profile->use_fwsup = BRCMF_PROFILE_FWSUP_SAE;
+                       }
+                       break;
                default:
                        bphy_err(drvr, "invalid cipher group (%d)\n",
                                 sme->crypto.cipher_group);
index 45037de..1ee49f9 100644 (file)
@@ -139,6 +139,8 @@ struct sbconfig {
        u32 sbidhigh;   /* identification */
 };
 
+#define INVALID_RAMBASE                        ((u32)(~0))
+
 /* bankidx and bankinfo reg defines corerev >= 8 */
 #define SOCRAM_BANKINFO_RETNTRAM_MASK  0x00010000
 #define SOCRAM_BANKINFO_SZMASK         0x0000007f
@@ -527,7 +529,7 @@ static int brcmf_chip_cores_check(struct brcmf_chip_priv *ci)
        int idx = 1;
 
        list_for_each_entry(core, &ci->cores, list) {
-               brcmf_dbg(INFO, " [%-2d] core 0x%x:%-2d base 0x%08x wrap 0x%08x\n",
+               brcmf_dbg(INFO, " [%-2d] core 0x%x:%-3d base 0x%08x wrap 0x%08x\n",
                          idx++, core->pub.id, core->pub.rev, core->pub.base,
                          core->wrapbase);
 
@@ -727,11 +729,13 @@ static u32 brcmf_chip_tcm_rambase(struct brcmf_chip_priv *ci)
        case BRCM_CC_4364_CHIP_ID:
        case CY_CC_4373_CHIP_ID:
                return 0x160000;
+       case CY_CC_43752_CHIP_ID:
+               return 0x170000;
        default:
                brcmf_err("unknown chip: %s\n", ci->pub.name);
                break;
        }
-       return 0;
+       return INVALID_RAMBASE;
 }
 
 int brcmf_chip_get_raminfo(struct brcmf_chip *pub)
@@ -746,7 +750,7 @@ int brcmf_chip_get_raminfo(struct brcmf_chip *pub)
                mem_core = container_of(mem, struct brcmf_core_priv, pub);
                ci->pub.ramsize = brcmf_chip_tcm_ramsize(mem_core);
                ci->pub.rambase = brcmf_chip_tcm_rambase(ci);
-               if (!ci->pub.rambase) {
+               if (ci->pub.rambase == INVALID_RAMBASE) {
                        brcmf_err("RAM base not provided with ARM CR4 core\n");
                        return -EINVAL;
                }
@@ -757,7 +761,7 @@ int brcmf_chip_get_raminfo(struct brcmf_chip *pub)
                                                pub);
                        ci->pub.ramsize = brcmf_chip_sysmem_ramsize(mem_core);
                        ci->pub.rambase = brcmf_chip_tcm_rambase(ci);
-                       if (!ci->pub.rambase) {
+                       if (ci->pub.rambase == INVALID_RAMBASE) {
                                brcmf_err("RAM base not provided with ARM CA7 core\n");
                                return -EINVAL;
                        }
@@ -894,7 +898,8 @@ int brcmf_chip_dmp_erom_scan(struct brcmf_chip_priv *ci)
        u32 base, wrap;
        int err;
 
-       eromaddr = ci->ops->read32(ci->ctx, CORE_CC_REG(SI_ENUM_BASE, eromptr));
+       eromaddr = ci->ops->read32(ci->ctx,
+                                  CORE_CC_REG(ci->pub.enum_base, eromptr));
 
        while (desc_type != DMP_DESC_EOT) {
                val = brcmf_chip_dmp_get_desc(ci, &eromaddr, &desc_type);
@@ -942,6 +947,11 @@ int brcmf_chip_dmp_erom_scan(struct brcmf_chip_priv *ci)
        return 0;
 }
 
+u32 brcmf_chip_enum_base(u16 devid)
+{
+       return SI_ENUM_BASE_DEFAULT;
+}
+
 static int brcmf_chip_recognition(struct brcmf_chip_priv *ci)
 {
        struct brcmf_core *core;
@@ -954,7 +964,8 @@ static int brcmf_chip_recognition(struct brcmf_chip_priv *ci)
         * For different chiptypes or old sdio hosts w/o chipcommon,
         * other ways of recognition should be added here.
         */
-       regdata = ci->ops->read32(ci->ctx, CORE_CC_REG(SI_ENUM_BASE, chipid));
+       regdata = ci->ops->read32(ci->ctx,
+                                 CORE_CC_REG(ci->pub.enum_base, chipid));
        ci->pub.chip = regdata & CID_ID_MASK;
        ci->pub.chiprev = (regdata & CID_REV_MASK) >> CID_REV_SHIFT;
        socitype = (regdata & CID_TYPE_MASK) >> CID_TYPE_SHIFT;
@@ -974,7 +985,7 @@ static int brcmf_chip_recognition(struct brcmf_chip_priv *ci)
                ci->resetcore = brcmf_chip_sb_resetcore;
 
                core = brcmf_chip_add_core(ci, BCMA_CORE_CHIPCOMMON,
-                                          SI_ENUM_BASE, 0);
+                                          SI_ENUM_BASE_DEFAULT, 0);
                brcmf_chip_sb_corerev(ci, core);
                core = brcmf_chip_add_core(ci, BCMA_CORE_SDIO_DEV,
                                           BCM4329_CORE_BUS_BASE, 0);
@@ -1088,7 +1099,7 @@ static int brcmf_chip_setup(struct brcmf_chip_priv *chip)
        return ret;
 }
 
-struct brcmf_chip *brcmf_chip_attach(void *ctx,
+struct brcmf_chip *brcmf_chip_attach(void *ctx, u16 devid,
                                     const struct brcmf_buscore_ops *ops)
 {
        struct brcmf_chip_priv *chip;
@@ -1113,6 +1124,7 @@ struct brcmf_chip *brcmf_chip_attach(void *ctx,
        chip->num_cores = 0;
        chip->ops = ops;
        chip->ctx = ctx;
+       chip->pub.enum_base = brcmf_chip_enum_base(devid);
 
        err = ops->prepare(ctx);
        if (err < 0)
@@ -1411,6 +1423,7 @@ bool brcmf_chip_sr_capable(struct brcmf_chip *pub)
                reg = chip->ops->read32(chip->ctx, addr);
                return (reg & CC_SR_CTL0_ENABLE_MASK) != 0;
        case BRCM_CC_4359_CHIP_ID:
+       case CY_CC_43752_CHIP_ID:
        case CY_CC_43012_CHIP_ID:
                addr = CORE_CC_REG(pmu->base, retention_ctl);
                reg = chip->ops->read32(chip->ctx, addr);
index 8fa3865..d69f101 100644 (file)
@@ -15,6 +15,7 @@
  *
  * @chip: chip identifier.
  * @chiprev: chip revision.
+ * @enum_base: base address of core enumeration space.
  * @cc_caps: chipcommon core capabilities.
  * @cc_caps_ext: chipcommon core extended capabilities.
  * @pmucaps: PMU capabilities.
@@ -27,6 +28,7 @@
 struct brcmf_chip {
        u32 chip;
        u32 chiprev;
+       u32 enum_base;
        u32 cc_caps;
        u32 cc_caps_ext;
        u32 pmucaps;
@@ -70,7 +72,7 @@ struct brcmf_buscore_ops {
 };
 
 int brcmf_chip_get_raminfo(struct brcmf_chip *pub);
-struct brcmf_chip *brcmf_chip_attach(void *ctx,
+struct brcmf_chip *brcmf_chip_attach(void *ctx, u16 devid,
                                     const struct brcmf_buscore_ops *ops);
 void brcmf_chip_detach(struct brcmf_chip *chip);
 struct brcmf_core *brcmf_chip_get_core(struct brcmf_chip *chip, u16 coreid);
@@ -85,5 +87,6 @@ void brcmf_chip_set_passive(struct brcmf_chip *ci);
 bool brcmf_chip_set_active(struct brcmf_chip *ci, u32 rstvec);
 bool brcmf_chip_sr_capable(struct brcmf_chip *pub);
 char *brcmf_chip_name(u32 chipid, u32 chiprev, char *buf, uint len);
+u32 brcmf_chip_enum_base(u16 devid);
 
 #endif /* BRCMF_AXIDMP_H */
index d40104b..0eb13e5 100644 (file)
@@ -431,8 +431,6 @@ struct brcmf_fw {
        void (*done)(struct device *dev, int err, struct brcmf_fw_request *req);
 };
 
-static void brcmf_fw_request_done(const struct firmware *fw, void *ctx);
-
 #ifdef CONFIG_EFI
 /* In some cases the EFI-var stored nvram contains "ccode=ALL" or "ccode=XV"
  * to specify "worldwide" compatible settings, but these 2 ccode-s do not work
@@ -594,28 +592,47 @@ static int brcmf_fw_complete_request(const struct firmware *fw,
        return (cur->flags & BRCMF_FW_REQF_OPTIONAL) ? 0 : ret;
 }
 
+static char *brcm_alt_fw_path(const char *path, const char *board_type)
+{
+       char alt_path[BRCMF_FW_NAME_LEN];
+       char suffix[5];
+
+       strscpy(alt_path, path, BRCMF_FW_NAME_LEN);
+       /* At least one character + suffix */
+       if (strlen(alt_path) < 5)
+               return NULL;
+
+       /* strip .txt or .bin at the end */
+       strscpy(suffix, alt_path + strlen(alt_path) - 4, 5);
+       alt_path[strlen(alt_path) - 4] = 0;
+       strlcat(alt_path, ".", BRCMF_FW_NAME_LEN);
+       strlcat(alt_path, board_type, BRCMF_FW_NAME_LEN);
+       strlcat(alt_path, suffix, BRCMF_FW_NAME_LEN);
+
+       return kstrdup(alt_path, GFP_KERNEL);
+}
+
 static int brcmf_fw_request_firmware(const struct firmware **fw,
                                     struct brcmf_fw *fwctx)
 {
        struct brcmf_fw_item *cur = &fwctx->req->items[fwctx->curpos];
        int ret;
 
-       /* nvram files are board-specific, first try a board-specific path */
+       /* Files can be board-specific, first try a board-specific path */
        if (cur->type == BRCMF_FW_TYPE_NVRAM && fwctx->req->board_type) {
-               char alt_path[BRCMF_FW_NAME_LEN];
+               char *alt_path;
 
-               strlcpy(alt_path, cur->path, BRCMF_FW_NAME_LEN);
-               /* strip .txt at the end */
-               alt_path[strlen(alt_path) - 4] = 0;
-               strlcat(alt_path, ".", BRCMF_FW_NAME_LEN);
-               strlcat(alt_path, fwctx->req->board_type, BRCMF_FW_NAME_LEN);
-               strlcat(alt_path, ".txt", BRCMF_FW_NAME_LEN);
+               alt_path = brcm_alt_fw_path(cur->path, fwctx->req->board_type);
+               if (!alt_path)
+                       goto fallback;
 
                ret = request_firmware(fw, alt_path, fwctx->dev);
+               kfree(alt_path);
                if (ret == 0)
                        return ret;
        }
 
+fallback:
        return request_firmware(fw, cur->path, fwctx->dev);
 }
 
@@ -639,6 +656,22 @@ static void brcmf_fw_request_done(const struct firmware *fw, void *ctx)
        kfree(fwctx);
 }
 
+static void brcmf_fw_request_done_alt_path(const struct firmware *fw, void *ctx)
+{
+       struct brcmf_fw *fwctx = ctx;
+       struct brcmf_fw_item *first = &fwctx->req->items[0];
+       int ret = 0;
+
+       /* Fall back to canonical path if board firmware not found */
+       if (!fw)
+               ret = request_firmware_nowait(THIS_MODULE, true, first->path,
+                                             fwctx->dev, GFP_KERNEL, fwctx,
+                                             brcmf_fw_request_done);
+
+       if (fw || ret < 0)
+               brcmf_fw_request_done(fw, ctx);
+}
+
 static bool brcmf_fw_request_is_valid(struct brcmf_fw_request *req)
 {
        struct brcmf_fw_item *item;
@@ -660,6 +693,7 @@ int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
 {
        struct brcmf_fw_item *first = &req->items[0];
        struct brcmf_fw *fwctx;
+       char *alt_path;
        int ret;
 
        brcmf_dbg(TRACE, "enter: dev=%s\n", dev_name(dev));
@@ -677,9 +711,18 @@ int brcmf_fw_get_firmwares(struct device *dev, struct brcmf_fw_request *req,
        fwctx->req = req;
        fwctx->done = fw_cb;
 
-       ret = request_firmware_nowait(THIS_MODULE, true, first->path,
-                                     fwctx->dev, GFP_KERNEL, fwctx,
-                                     brcmf_fw_request_done);
+       /* First try alternative board-specific path if any */
+       alt_path = brcm_alt_fw_path(first->path, fwctx->req->board_type);
+       if (alt_path) {
+               ret = request_firmware_nowait(THIS_MODULE, true, alt_path,
+                                             fwctx->dev, GFP_KERNEL, fwctx,
+                                             brcmf_fw_request_done_alt_path);
+               kfree(alt_path);
+       } else {
+               ret = request_firmware_nowait(THIS_MODULE, true, first->path,
+                                             fwctx->dev, GFP_KERNEL, fwctx,
+                                             brcmf_fw_request_done);
+       }
        if (ret < 0)
                brcmf_fw_request_done(NULL, fwctx);
 
index 9ed8542..d5578ca 100644 (file)
@@ -15,6 +15,7 @@
 #include "bus.h"
 #include "debug.h"
 #include "tracepoint.h"
+#include "xtlv.h"
 #include "fwil.h"
 #include "proto.h"
 
@@ -150,7 +151,8 @@ brcmf_fil_cmd_data_get(struct brcmf_if *ifp, u32 cmd, void *data, u32 len)
        mutex_lock(&ifp->drvr->proto_block);
        err = brcmf_fil_cmd_data(ifp, cmd, data, len, false);
 
-       brcmf_dbg(FIL, "ifidx=%d, cmd=%d, len=%d\n", ifp->ifidx, cmd, len);
+       brcmf_dbg(FIL, "ifidx=%d, cmd=%d, len=%d, err=%d\n", ifp->ifidx, cmd,
+                 len, err);
        brcmf_dbg_hex_dump(BRCMF_FIL_ON(), data,
                           min_t(uint, len, MAX_HEX_DUMP_LEN), "data\n");
 
@@ -260,7 +262,8 @@ brcmf_fil_iovar_data_get(struct brcmf_if *ifp, char *name, void *data,
                bphy_err(drvr, "Creating iovar failed\n");
        }
 
-       brcmf_dbg(FIL, "ifidx=%d, name=%s, len=%d\n", ifp->ifidx, name, len);
+       brcmf_dbg(FIL, "ifidx=%d, name=%s, len=%d, err=%d\n", ifp->ifidx, name,
+                 len, err);
        brcmf_dbg_hex_dump(BRCMF_FIL_ON(), data,
                           min_t(uint, len, MAX_HEX_DUMP_LEN), "data\n");
 
@@ -383,14 +386,13 @@ brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, char *name,
                err = -EPERM;
                bphy_err(drvr, "Creating bsscfg failed\n");
        }
-       brcmf_dbg(FIL, "ifidx=%d, bsscfgidx=%d, name=%s, len=%d\n", ifp->ifidx,
-                 ifp->bsscfgidx, name, len);
+       brcmf_dbg(FIL, "ifidx=%d, bsscfgidx=%d, name=%s, len=%d, err=%d\n",
+                 ifp->ifidx, ifp->bsscfgidx, name, len, err);
        brcmf_dbg_hex_dump(BRCMF_FIL_ON(), data,
                           min_t(uint, len, MAX_HEX_DUMP_LEN), "data\n");
 
        mutex_unlock(&drvr->proto_block);
        return err;
-
 }
 
 s32
@@ -414,3 +416,117 @@ brcmf_fil_bsscfg_int_get(struct brcmf_if *ifp, char *name, u32 *data)
                *data = le32_to_cpu(data_le);
        return err;
 }
+
+static u32 brcmf_create_xtlv(char *name, u16 id, char *data, u32 len,
+                            char *buf, u32 buflen)
+{
+       u32 iolen;
+       u32 nmlen;
+
+       nmlen = strlen(name) + 1;
+       iolen = nmlen + brcmf_xtlv_data_size(len, BRCMF_XTLV_OPTION_ALIGN32);
+
+       if (iolen > buflen) {
+               brcmf_err("buffer is too short\n");
+               return 0;
+       }
+
+       memcpy(buf, name, nmlen);
+       brcmf_xtlv_pack_header((void *)(buf + nmlen), id, len, data,
+                              BRCMF_XTLV_OPTION_ALIGN32);
+
+       return iolen;
+}
+
+s32 brcmf_fil_xtlv_data_set(struct brcmf_if *ifp, char *name, u16 id,
+                           void *data, u32 len)
+{
+       struct brcmf_pub *drvr = ifp->drvr;
+       s32 err;
+       u32 buflen;
+
+       mutex_lock(&drvr->proto_block);
+
+       brcmf_dbg(FIL, "ifidx=%d, name=%s, id=%u, len=%u\n", ifp->ifidx, name,
+                 id, len);
+       brcmf_dbg_hex_dump(BRCMF_FIL_ON(), data,
+                          min_t(uint, len, MAX_HEX_DUMP_LEN), "data\n");
+
+       buflen = brcmf_create_xtlv(name, id, data, len,
+                                  drvr->proto_buf, sizeof(drvr->proto_buf));
+       if (buflen) {
+               err = brcmf_fil_cmd_data(ifp, BRCMF_C_SET_VAR, drvr->proto_buf,
+                                        buflen, true);
+       } else {
+               err = -EPERM;
+               bphy_err(drvr, "Creating xtlv failed\n");
+       }
+
+       mutex_unlock(&drvr->proto_block);
+       return err;
+}
+
+s32 brcmf_fil_xtlv_data_get(struct brcmf_if *ifp, char *name, u16 id,
+                           void *data, u32 len)
+{
+       struct brcmf_pub *drvr = ifp->drvr;
+       s32 err;
+       u32 buflen;
+
+       mutex_lock(&drvr->proto_block);
+
+       buflen = brcmf_create_xtlv(name, id, data, len,
+                                  drvr->proto_buf, sizeof(drvr->proto_buf));
+       if (buflen) {
+               err = brcmf_fil_cmd_data(ifp, BRCMF_C_GET_VAR, drvr->proto_buf,
+                                        buflen, false);
+               if (err == 0)
+                       memcpy(data, drvr->proto_buf, len);
+       } else {
+               err = -EPERM;
+               bphy_err(drvr, "Creating bsscfg failed\n");
+       }
+       brcmf_dbg(FIL, "ifidx=%d, name=%s, id=%u, len=%u, err=%d\n",
+                 ifp->ifidx, name, id, len, err);
+       brcmf_dbg_hex_dump(BRCMF_FIL_ON(), data,
+                          min_t(uint, len, MAX_HEX_DUMP_LEN), "data\n");
+
+       mutex_unlock(&drvr->proto_block);
+       return err;
+}
+
+s32 brcmf_fil_xtlv_int_set(struct brcmf_if *ifp, char *name, u16 id, u32 data)
+{
+       __le32 data_le = cpu_to_le32(data);
+
+       return brcmf_fil_xtlv_data_set(ifp, name, id, &data_le,
+                                        sizeof(data_le));
+}
+
+s32 brcmf_fil_xtlv_int_get(struct brcmf_if *ifp, char *name, u16 id, u32 *data)
+{
+       __le32 data_le = cpu_to_le32(*data);
+       s32 err;
+
+       err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le));
+       if (err == 0)
+               *data = le32_to_cpu(data_le);
+       return err;
+}
+
+s32 brcmf_fil_xtlv_int8_get(struct brcmf_if *ifp, char *name, u16 id, u8 *data)
+{
+       return brcmf_fil_xtlv_data_get(ifp, name, id, data, sizeof(*data));
+}
+
+s32 brcmf_fil_xtlv_int16_get(struct brcmf_if *ifp, char *name, u16 id, u16 *data)
+{
+       __le16 data_le = cpu_to_le16(*data);
+       s32 err;
+
+       err = brcmf_fil_xtlv_data_get(ifp, name, id, &data_le, sizeof(data_le));
+       if (err == 0)
+               *data = le16_to_cpu(data_le);
+       return err;
+}
+
index ae4cf43..cb26f8c 100644 (file)
@@ -97,5 +97,13 @@ s32 brcmf_fil_bsscfg_data_get(struct brcmf_if *ifp, char *name, void *data,
                              u32 len);
 s32 brcmf_fil_bsscfg_int_set(struct brcmf_if *ifp, char *name, u32 data);
 s32 brcmf_fil_bsscfg_int_get(struct brcmf_if *ifp, char *name, u32 *data);
+s32 brcmf_fil_xtlv_data_set(struct brcmf_if *ifp, char *name, u16 id,
+                           void *data, u32 len);
+s32 brcmf_fil_xtlv_data_get(struct brcmf_if *ifp, char *name, u16 id,
+                           void *data, u32 len);
+s32 brcmf_fil_xtlv_int_set(struct brcmf_if *ifp, char *name, u16 id, u32 data);
+s32 brcmf_fil_xtlv_int_get(struct brcmf_if *ifp, char *name, u16 id, u32 *data);
+s32 brcmf_fil_xtlv_int8_get(struct brcmf_if *ifp, char *name, u16 id, u8 *data);
+s32 brcmf_fil_xtlv_int16_get(struct brcmf_if *ifp, char *name, u16 id, u16 *data);
 
 #endif /* _fwil_h_ */
index c49dd0c..8b14999 100644 (file)
@@ -1886,7 +1886,8 @@ brcmf_pcie_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        devinfo->pdev = pdev;
        pcie_bus_dev = NULL;
-       devinfo->ci = brcmf_chip_attach(devinfo, &brcmf_pcie_buscore_ops);
+       devinfo->ci = brcmf_chip_attach(devinfo, pdev->device,
+                                       &brcmf_pcie_buscore_ops);
        if (IS_ERR(devinfo->ci)) {
                ret = PTR_ERR(devinfo->ci);
                devinfo->ci = NULL;
@@ -2075,7 +2076,7 @@ cleanup:
 
        err = brcmf_pcie_probe(pdev, NULL);
        if (err)
-               brcmf_err(bus, "probe after resume failed, err=%d\n", err);
+               __brcmf_err(NULL, __func__, "probe after resume failed, err=%d\n", err);
 
        return err;
 }
index 97ee9e2..8effeb7 100644 (file)
@@ -617,6 +617,7 @@ BRCMF_FW_DEF(4339, "brcmfmac4339-sdio");
 BRCMF_FW_DEF(43430A0, "brcmfmac43430a0-sdio");
 /* Note the names are not postfixed with a1 for backward compatibility */
 BRCMF_FW_CLM_DEF(43430A1, "brcmfmac43430-sdio");
+BRCMF_FW_DEF(43430B0, "brcmfmac43430b0-sdio");
 BRCMF_FW_CLM_DEF(43455, "brcmfmac43455-sdio");
 BRCMF_FW_DEF(43456, "brcmfmac43456-sdio");
 BRCMF_FW_CLM_DEF(4354, "brcmfmac4354-sdio");
@@ -624,11 +625,15 @@ BRCMF_FW_CLM_DEF(4356, "brcmfmac4356-sdio");
 BRCMF_FW_DEF(4359, "brcmfmac4359-sdio");
 BRCMF_FW_CLM_DEF(4373, "brcmfmac4373-sdio");
 BRCMF_FW_CLM_DEF(43012, "brcmfmac43012-sdio");
+BRCMF_FW_CLM_DEF(43752, "brcmfmac43752-sdio");
 
 /* firmware config files */
 MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.txt");
 MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-pcie.*.txt");
 
+/* per-board firmware binaries */
+MODULE_FIRMWARE(BRCMF_FW_DEFAULT_PATH "brcmfmac*-sdio.*.bin");
+
 static const struct brcmf_firmware_mapping brcmf_sdio_fwnames[] = {
        BRCMF_FW_ENTRY(BRCM_CC_43143_CHIP_ID, 0xFFFFFFFF, 43143),
        BRCMF_FW_ENTRY(BRCM_CC_43241_CHIP_ID, 0x0000001F, 43241B0),
@@ -643,14 +648,16 @@ static const struct brcmf_firmware_mapping brcmf_sdio_fwnames[] = {
        BRCMF_FW_ENTRY(BRCM_CC_43362_CHIP_ID, 0xFFFFFFFE, 43362),
        BRCMF_FW_ENTRY(BRCM_CC_4339_CHIP_ID, 0xFFFFFFFF, 4339),
        BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0x00000001, 43430A0),
-       BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0xFFFFFFFE, 43430A1),
+       BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0x00000002, 43430A1),
+       BRCMF_FW_ENTRY(BRCM_CC_43430_CHIP_ID, 0xFFFFFFFC, 43430B0),
        BRCMF_FW_ENTRY(BRCM_CC_4345_CHIP_ID, 0x00000200, 43456),
        BRCMF_FW_ENTRY(BRCM_CC_4345_CHIP_ID, 0xFFFFFDC0, 43455),
        BRCMF_FW_ENTRY(BRCM_CC_4354_CHIP_ID, 0xFFFFFFFF, 4354),
        BRCMF_FW_ENTRY(BRCM_CC_4356_CHIP_ID, 0xFFFFFFFF, 4356),
        BRCMF_FW_ENTRY(BRCM_CC_4359_CHIP_ID, 0xFFFFFFFF, 4359),
        BRCMF_FW_ENTRY(CY_CC_4373_CHIP_ID, 0xFFFFFFFF, 4373),
-       BRCMF_FW_ENTRY(CY_CC_43012_CHIP_ID, 0xFFFFFFFF, 43012)
+       BRCMF_FW_ENTRY(CY_CC_43012_CHIP_ID, 0xFFFFFFFF, 43012),
+       BRCMF_FW_ENTRY(CY_CC_43752_CHIP_ID, 0xFFFFFFFF, 43752)
 };
 
 #define TXCTL_CREDITS  2
@@ -3416,7 +3423,8 @@ err:
 
 static bool brcmf_sdio_aos_no_decode(struct brcmf_sdio *bus)
 {
-       if (bus->ci->chip == CY_CC_43012_CHIP_ID)
+       if (bus->ci->chip == CY_CC_43012_CHIP_ID ||
+           bus->ci->chip == CY_CC_43752_CHIP_ID)
                return true;
        else
                return false;
@@ -3907,7 +3915,7 @@ static u32 brcmf_sdio_buscore_read32(void *ctx, u32 addr)
         * It can be identified as 4339 by looking at the chip revision. It
         * is corrected here so the chip.c module has the right info.
         */
-       if (addr == CORE_CC_REG(SI_ENUM_BASE, chipid) &&
+       if (addr == CORE_CC_REG(SI_ENUM_BASE_DEFAULT, chipid) &&
            (sdiodev->func1->device == SDIO_DEVICE_ID_BROADCOM_4339 ||
             sdiodev->func1->device == SDIO_DEVICE_ID_BROADCOM_4335_4339)) {
                rev = (val & CID_REV_MASK) >> CID_REV_SHIFT;
@@ -3943,12 +3951,15 @@ brcmf_sdio_probe_attach(struct brcmf_sdio *bus)
        int reg_addr;
        u32 reg_val;
        u32 drivestrength;
+       u32 enum_base;
 
        sdiodev = bus->sdiodev;
        sdio_claim_host(sdiodev->func1);
 
-       pr_debug("F1 signature read @0x18000000=0x%4x\n",
-                brcmf_sdiod_readl(sdiodev, SI_ENUM_BASE, NULL));
+       enum_base = brcmf_chip_enum_base(sdiodev->func1->device);
+
+       pr_debug("F1 signature read @0x%08x=0x%4x\n", enum_base,
+                brcmf_sdiod_readl(sdiodev, enum_base, NULL));
 
        /*
         * Force PLL off until brcmf_chip_attach()
@@ -3967,7 +3978,8 @@ brcmf_sdio_probe_attach(struct brcmf_sdio *bus)
                goto fail;
        }
 
-       bus->ci = brcmf_chip_attach(sdiodev, &brcmf_sdio_buscore_ops);
+       bus->ci = brcmf_chip_attach(sdiodev, sdiodev->func1->device,
+                                   &brcmf_sdio_buscore_ops);
        if (IS_ERR(bus->ci)) {
                brcmf_err("brcmf_chip_attach failed!\n");
                bus->ci = NULL;
@@ -4257,6 +4269,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 
                switch (sdiod->func1->device) {
                case SDIO_DEVICE_ID_BROADCOM_CYPRESS_4373:
+               case SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752:
                        brcmf_dbg(INFO, "set F2 watermark to 0x%x*4 bytes\n",
                                  CY_4373_F2_WATERMARK);
                        brcmf_sdiod_writeb(sdiod, SBSDIO_WATERMARK,
@@ -4442,7 +4455,7 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
        bus->tx_seq = SDPCM_SEQ_WRAP - 1;
 
        /* single-threaded workqueue */
-       wq = alloc_ordered_workqueue("brcmf_wq/%s", WQ_MEM_RECLAIM,
+       wq = alloc_ordered_workqueue("brcmf_wq/%s", WQ_MEM_RECLAIM | WQ_HIGHPRI,
                                     dev_name(&sdiodev->func1->dev));
        if (!wq) {
                brcmf_err("insufficient memory to create txworkqueue\n");
@@ -4616,4 +4629,3 @@ int brcmf_sdio_sleep(struct brcmf_sdio *bus, bool sleep)
 
        return ret;
 }
-
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.c
new file mode 100644 (file)
index 0000000..2f3c451
--- /dev/null
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: ISC
+/*
+ * Copyright (c) 2019 Broadcom
+ */
+
+#include <asm/unaligned.h>
+#include <linux/string.h>
+#include <linux/bug.h>
+
+#include "xtlv.h"
+
+static int brcmf_xtlv_header_size(u16 opts)
+{
+       int len = (int)offsetof(struct brcmf_xtlv, data);
+
+       if (opts & BRCMF_XTLV_OPTION_IDU8)
+               --len;
+       if (opts & BRCMF_XTLV_OPTION_LENU8)
+               --len;
+
+       return len;
+}
+
+int brcmf_xtlv_data_size(int dlen, u16 opts)
+{
+       int hsz;
+
+       hsz = brcmf_xtlv_header_size(opts);
+       if (opts & BRCMF_XTLV_OPTION_ALIGN32)
+               return roundup(dlen + hsz, 4);
+
+       return dlen + hsz;
+}
+
+void brcmf_xtlv_pack_header(struct brcmf_xtlv *xtlv, u16 id, u16 len,
+                           const u8 *data, u16 opts)
+{
+       u8 *data_buf;
+       u16 mask = BRCMF_XTLV_OPTION_IDU8 | BRCMF_XTLV_OPTION_LENU8;
+
+       if (!(opts & mask)) {
+               u8 *idp = (u8 *)xtlv;
+               u8 *lenp = idp + sizeof(xtlv->id);
+
+               put_unaligned_le16(id, idp);
+               put_unaligned_le16(len, lenp);
+               data_buf = lenp + sizeof(u16);
+       } else if ((opts & mask) == mask) { /* u8 id and u8 len */
+               u8 *idp = (u8 *)xtlv;
+               u8 *lenp = idp + 1;
+
+               *idp = (u8)id;
+               *lenp = (u8)len;
+               data_buf = lenp + sizeof(u8);
+       } else if (opts & BRCMF_XTLV_OPTION_IDU8) { /* u8 id, u16 len */
+               u8 *idp = (u8 *)xtlv;
+               u8 *lenp = idp + 1;
+
+               *idp = (u8)id;
+               put_unaligned_le16(len, lenp);
+               data_buf = lenp + sizeof(u16);
+       } else if (opts & BRCMF_XTLV_OPTION_LENU8) { /* u16 id, u8 len */
+               u8 *idp = (u8 *)xtlv;
+               u8 *lenp = idp + sizeof(u16);
+
+               put_unaligned_le16(id, idp);
+               *lenp = (u8)len;
+               data_buf = lenp + sizeof(u8);
+       } else {
+               WARN(true, "Unexpected xtlv option");
+               return;
+       }
+
+       if (opts & BRCMF_XTLV_OPTION_LENU8) {
+               WARN_ON(len > 0x00ff);
+               len &= 0xff;
+       }
+
+       if (data)
+               memcpy(data_buf, data, len);
+}
+
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/xtlv.h
new file mode 100644 (file)
index 0000000..e1930ce
--- /dev/null
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: ISC
+/*
+ * Copyright (c) 2019 Broadcom
+ */
+#ifndef __BRCMF_XTLV_H
+#define __BRCMF_XTLV_H
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+/* bcm type(id), length, value with w/16 bit id/len. The structure below
+ * is nominal, and is used to support variable length id and type. See
+ * xtlv options below.
+ */
+struct brcmf_xtlv {
+       u16 id;
+       u16 len;
+       u8 data[0];
+};
+
+enum brcmf_xtlv_option {
+       BRCMF_XTLV_OPTION_ALIGN32 = BIT(0),
+       BRCMF_XTLV_OPTION_IDU8 = BIT(1),
+       BRCMF_XTLV_OPTION_LENU8 = BIT(2),
+};
+
+int brcmf_xtlv_data_size(int dlen, u16 opts);
+void brcmf_xtlv_pack_header(struct brcmf_xtlv *xtlv, u16 id, u16 len,
+                           const u8 *data, u16 opts);
+
+#endif /* __BRCMF_XTLV_H */
index 26de1bd..8ddfc3d 100644 (file)
@@ -704,7 +704,7 @@ static void brcms_c_write_inits(struct brcms_hardware *wlc_hw,
 static void brcms_c_write_mhf(struct brcms_hardware *wlc_hw, u16 *mhfs)
 {
        u8 idx;
-       u16 addr[] = {
+       static const u16 addr[] = {
                M_HOST_FLAGS1, M_HOST_FLAGS2, M_HOST_FLAGS3, M_HOST_FLAGS4,
                M_HOST_FLAGS5
        };
index 00309b2..9d81320 100644 (file)
@@ -52,6 +52,7 @@
 #define BRCM_CC_4371_CHIP_ID           0x4371
 #define CY_CC_4373_CHIP_ID             0x4373
 #define CY_CC_43012_CHIP_ID            43012
+#define CY_CC_43752_CHIP_ID            43752
 
 /* USB Device IDs */
 #define BRCM_USB_43143_DEVICE_ID       0xbd1e
index 92d942b..8249211 100644 (file)
@@ -6,7 +6,7 @@
 #ifndef        _BRCM_SOC_H
 #define        _BRCM_SOC_H
 
-#define SI_ENUM_BASE           0x18000000      /* Enumeration space base */
+#define SI_ENUM_BASE_DEFAULT   0x18000000
 
 /* Common core control flags */
 #define        SICF_BIST_EN            0x8000
index fd37d4d..65dd8cf 100644 (file)
@@ -1144,7 +1144,7 @@ static int waitbusy(struct airo_info *ai);
 static irqreturn_t airo_interrupt(int irq, void* dev_id);
 static int airo_thread(void *data);
 static void timer_func(struct net_device *dev);
-static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+static int airo_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *, int cmd);
 static struct iw_statistics *airo_get_wireless_stats(struct net_device *dev);
 #ifdef CISCO_EXT
 static int readrids(struct net_device *dev, aironet_ioctl *comp);
@@ -2664,7 +2664,7 @@ static const struct net_device_ops airo11_netdev_ops = {
        .ndo_start_xmit         = airo_start_xmit11,
        .ndo_get_stats          = airo_get_stats,
        .ndo_set_mac_address    = airo_set_mac_address,
-       .ndo_do_ioctl           = airo_ioctl,
+       .ndo_siocdevprivate     = airo_siocdevprivate,
 };
 
 static void wifi_setup(struct net_device *dev)
@@ -2764,7 +2764,7 @@ static const struct net_device_ops airo_netdev_ops = {
        .ndo_get_stats          = airo_get_stats,
        .ndo_set_rx_mode        = airo_set_multicast_list,
        .ndo_set_mac_address    = airo_set_mac_address,
-       .ndo_do_ioctl           = airo_ioctl,
+       .ndo_siocdevprivate     = airo_siocdevprivate,
        .ndo_validate_addr      = eth_validate_addr,
 };
 
@@ -2775,7 +2775,7 @@ static const struct net_device_ops mpi_netdev_ops = {
        .ndo_get_stats          = airo_get_stats,
        .ndo_set_rx_mode        = airo_set_multicast_list,
        .ndo_set_mac_address    = airo_set_mac_address,
-       .ndo_do_ioctl           = airo_ioctl,
+       .ndo_siocdevprivate     = airo_siocdevprivate,
        .ndo_validate_addr      = eth_validate_addr,
 };
 
@@ -7661,7 +7661,8 @@ static const struct iw_handler_def        airo_handler_def =
  * Javier Achirica did a great job of merging code from the unnamed CISCO
  * developer that added support for flashing the card.
  */
-static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int airo_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                              void __user *data, int cmd)
 {
        int rc = 0;
        struct airo_info *ai = dev->ml_priv;
@@ -7678,7 +7679,7 @@ static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
        {
                int val = AIROMAGIC;
                aironet_ioctl com;
-               if (copy_from_user(&com, rq->ifr_data, sizeof(com)))
+               if (copy_from_user(&com, data, sizeof(com)))
                        rc = -EFAULT;
                else if (copy_to_user(com.data, (char *)&val, sizeof(val)))
                        rc = -EFAULT;
@@ -7694,7 +7695,7 @@ static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
                 */
        {
                aironet_ioctl com;
-               if (copy_from_user(&com, rq->ifr_data, sizeof(com))) {
+               if (copy_from_user(&com, data, sizeof(com))) {
                        rc = -EFAULT;
                        break;
                }
index 5a2a723..7a684b7 100644 (file)
@@ -927,7 +927,8 @@ static u8 qos_oui[QOS_OUI_LEN] = { 0x00, 0x50, 0xF2 };
 static int libipw_verify_qos_info(struct libipw_qos_information_element
                                     *info_element, int sub_type)
 {
-
+       if (info_element->elementID != QOS_ELEMENT_ID)
+               return -1;
        if (info_element->qui_subtype != sub_type)
                return -1;
        if (memcmp(info_element->qui, qos_oui, QOS_OUI_LEN))
@@ -943,57 +944,34 @@ static int libipw_verify_qos_info(struct libipw_qos_information_element
 /*
  * Parse a QoS parameter element
  */
-static int libipw_read_qos_param_element(struct libipw_qos_parameter_info
-                                           *element_param, struct libipw_info_element
-                                           *info_element)
+static int libipw_read_qos_param_element(
+                       struct libipw_qos_parameter_info *element_param,
+                       struct libipw_info_element *info_element)
 {
-       int ret = 0;
-       u16 size = sizeof(struct libipw_qos_parameter_info) - 2;
+       size_t size = sizeof(*element_param);
 
-       if ((info_element == NULL) || (element_param == NULL))
+       if (!element_param || !info_element || info_element->len != size - 2)
                return -1;
 
-       if (info_element->id == QOS_ELEMENT_ID && info_element->len == size) {
-               memcpy(element_param->info_element.qui, info_element->data,
-                      info_element->len);
-               element_param->info_element.elementID = info_element->id;
-               element_param->info_element.length = info_element->len;
-       } else
-               ret = -1;
-       if (ret == 0)
-               ret = libipw_verify_qos_info(&element_param->info_element,
-                                               QOS_OUI_PARAM_SUB_TYPE);
-       return ret;
+       memcpy(element_param, info_element, size);
+       return libipw_verify_qos_info(&element_param->info_element,
+                                     QOS_OUI_PARAM_SUB_TYPE);
 }
 
 /*
  * Parse a QoS information element
  */
-static int libipw_read_qos_info_element(struct
-                                          libipw_qos_information_element
-                                          *element_info, struct libipw_info_element
-                                          *info_element)
+static int libipw_read_qos_info_element(
+                       struct libipw_qos_information_element *element_info,
+                       struct libipw_info_element *info_element)
 {
-       int ret = 0;
-       u16 size = sizeof(struct libipw_qos_information_element) - 2;
+       size_t size = sizeof(struct libipw_qos_information_element) - 2;
 
-       if (element_info == NULL)
+       if (!element_info || !info_element || info_element->len != size - 2)
                return -1;
-       if (info_element == NULL)
-               return -1;
-
-       if ((info_element->id == QOS_ELEMENT_ID) && (info_element->len == size)) {
-               memcpy(element_info->qui, info_element->data,
-                      info_element->len);
-               element_info->elementID = info_element->id;
-               element_info->length = info_element->len;
-       } else
-               ret = -1;
 
-       if (ret == 0)
-               ret = libipw_verify_qos_info(element_info,
-                                               QOS_OUI_INFO_SUB_TYPE);
-       return ret;
+       memcpy(element_info, info_element, size);
+       return libipw_verify_qos_info(element_info, QOS_OUI_INFO_SUB_TYPE);
 }
 
 /*
index d9baa2f..36d1e6b 100644 (file)
@@ -179,8 +179,8 @@ static struct libipw_txb *libipw_alloc_txb(int nr_frags, int txb_size,
 {
        struct libipw_txb *txb;
        int i;
-       txb = kmalloc(sizeof(struct libipw_txb) + (sizeof(u8 *) * nr_frags),
-                     gfp_mask);
+
+       txb = kmalloc(struct_size(txb, fragments, nr_frags), gfp_mask);
        if (!txb)
                return NULL;
 
index 6ff2674..45abb25 100644 (file)
@@ -571,20 +571,18 @@ il3945_tx_skb(struct il_priv *il,
 
        /* Physical address of this Tx command's header (not MAC header!),
         * within command buffer array. */
-       txcmd_phys =
-           pci_map_single(il->pci_dev, &out_cmd->hdr, firstlen,
-                          PCI_DMA_TODEVICE);
-       if (unlikely(pci_dma_mapping_error(il->pci_dev, txcmd_phys)))
+       txcmd_phys = dma_map_single(&il->pci_dev->dev, &out_cmd->hdr, firstlen,
+                                   DMA_TO_DEVICE);
+       if (unlikely(dma_mapping_error(&il->pci_dev->dev, txcmd_phys)))
                goto drop_unlock;
 
        /* Set up TFD's 2nd entry to point directly to remainder of skb,
         * if any (802.11 null frames have no payload). */
        secondlen = skb->len - hdr_len;
        if (secondlen > 0) {
-               phys_addr =
-                   pci_map_single(il->pci_dev, skb->data + hdr_len, secondlen,
-                                  PCI_DMA_TODEVICE);
-               if (unlikely(pci_dma_mapping_error(il->pci_dev, phys_addr)))
+               phys_addr = dma_map_single(&il->pci_dev->dev, skb->data + hdr_len,
+                                          secondlen, DMA_TO_DEVICE);
+               if (unlikely(dma_mapping_error(&il->pci_dev->dev, phys_addr)))
                        goto drop_unlock;
        }
 
@@ -1015,11 +1013,11 @@ il3945_rx_allocate(struct il_priv *il, gfp_t priority)
 
                /* Get physical address of RB/SKB */
                page_dma =
-                   pci_map_page(il->pci_dev, page, 0,
+                   dma_map_page(&il->pci_dev->dev, page, 0,
                                 PAGE_SIZE << il->hw_params.rx_page_order,
-                                PCI_DMA_FROMDEVICE);
+                                DMA_FROM_DEVICE);
 
-               if (unlikely(pci_dma_mapping_error(il->pci_dev, page_dma))) {
+               if (unlikely(dma_mapping_error(&il->pci_dev->dev, page_dma))) {
                        __free_pages(page, il->hw_params.rx_page_order);
                        break;
                }
@@ -1028,9 +1026,9 @@ il3945_rx_allocate(struct il_priv *il, gfp_t priority)
 
                if (list_empty(&rxq->rx_used)) {
                        spin_unlock_irqrestore(&rxq->lock, flags);
-                       pci_unmap_page(il->pci_dev, page_dma,
+                       dma_unmap_page(&il->pci_dev->dev, page_dma,
                                       PAGE_SIZE << il->hw_params.rx_page_order,
-                                      PCI_DMA_FROMDEVICE);
+                                      DMA_FROM_DEVICE);
                        __free_pages(page, il->hw_params.rx_page_order);
                        return;
                }
@@ -1062,9 +1060,10 @@ il3945_rx_queue_reset(struct il_priv *il, struct il_rx_queue *rxq)
                /* In the reset function, these buffers may have been allocated
                 * to an SKB, so we need to unmap and free potential storage */
                if (rxq->pool[i].page != NULL) {
-                       pci_unmap_page(il->pci_dev, rxq->pool[i].page_dma,
+                       dma_unmap_page(&il->pci_dev->dev,
+                                      rxq->pool[i].page_dma,
                                       PAGE_SIZE << il->hw_params.rx_page_order,
-                                      PCI_DMA_FROMDEVICE);
+                                      DMA_FROM_DEVICE);
                        __il_free_pages(il, rxq->pool[i].page);
                        rxq->pool[i].page = NULL;
                }
@@ -1111,9 +1110,10 @@ il3945_rx_queue_free(struct il_priv *il, struct il_rx_queue *rxq)
        int i;
        for (i = 0; i < RX_QUEUE_SIZE + RX_FREE_BUFFERS; i++) {
                if (rxq->pool[i].page != NULL) {
-                       pci_unmap_page(il->pci_dev, rxq->pool[i].page_dma,
+                       dma_unmap_page(&il->pci_dev->dev,
+                                      rxq->pool[i].page_dma,
                                       PAGE_SIZE << il->hw_params.rx_page_order,
-                                      PCI_DMA_FROMDEVICE);
+                                      DMA_FROM_DEVICE);
                        __il_free_pages(il, rxq->pool[i].page);
                        rxq->pool[i].page = NULL;
                }
@@ -1213,9 +1213,9 @@ il3945_rx_handle(struct il_priv *il)
 
                rxq->queue[i] = NULL;
 
-               pci_unmap_page(il->pci_dev, rxb->page_dma,
+               dma_unmap_page(&il->pci_dev->dev, rxb->page_dma,
                               PAGE_SIZE << il->hw_params.rx_page_order,
-                              PCI_DMA_FROMDEVICE);
+                              DMA_FROM_DEVICE);
                pkt = rxb_addr(rxb);
 
                len = le32_to_cpu(pkt->len_n_flags) & IL_RX_FRAME_SIZE_MSK;
@@ -1260,11 +1260,11 @@ il3945_rx_handle(struct il_priv *il)
                spin_lock_irqsave(&rxq->lock, flags);
                if (rxb->page != NULL) {
                        rxb->page_dma =
-                           pci_map_page(il->pci_dev, rxb->page, 0,
-                                        PAGE_SIZE << il->hw_params.
-                                        rx_page_order, PCI_DMA_FROMDEVICE);
-                       if (unlikely(pci_dma_mapping_error(il->pci_dev,
-                                                          rxb->page_dma))) {
+                           dma_map_page(&il->pci_dev->dev, rxb->page, 0,
+                                        PAGE_SIZE << il->hw_params.rx_page_order,
+                                        DMA_FROM_DEVICE);
+                       if (unlikely(dma_mapping_error(&il->pci_dev->dev,
+                                                      rxb->page_dma))) {
                                __il_free_pages(il, rxb->page);
                                rxb->page = NULL;
                                list_add_tail(&rxb->list, &rxq->rx_used);
@@ -3616,9 +3616,7 @@ il3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        pci_set_master(pdev);
 
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (!err)
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
        if (err) {
                IL_WARN("No suitable DMA available.\n");
                goto out_pci_disable_device;
index 0597d82..a773939 100644 (file)
@@ -652,16 +652,16 @@ il3945_hw_txq_free_tfd(struct il_priv *il, struct il_tx_queue *txq)
 
        /* Unmap tx_cmd */
        if (counter)
-               pci_unmap_single(dev, dma_unmap_addr(&txq->meta[idx], mapping),
+               dma_unmap_single(&dev->dev,
+                                dma_unmap_addr(&txq->meta[idx], mapping),
                                 dma_unmap_len(&txq->meta[idx], len),
-                                PCI_DMA_TODEVICE);
+                                DMA_TO_DEVICE);
 
        /* unmap chunks if any */
 
        for (i = 1; i < counter; i++)
-               pci_unmap_single(dev, le32_to_cpu(tfd->tbs[i].addr),
-                                le32_to_cpu(tfd->tbs[i].len),
-                                PCI_DMA_TODEVICE);
+               dma_unmap_single(&dev->dev, le32_to_cpu(tfd->tbs[i].addr),
+                                le32_to_cpu(tfd->tbs[i].len), DMA_TO_DEVICE);
 
        /* free SKB */
        if (txq->skbs) {
index 341d6a2..0223532 100644 (file)
@@ -94,9 +94,10 @@ il4965_rx_queue_reset(struct il_priv *il, struct il_rx_queue *rxq)
                /* In the reset function, these buffers may have been allocated
                 * to an SKB, so we need to unmap and free potential storage */
                if (rxq->pool[i].page != NULL) {
-                       pci_unmap_page(il->pci_dev, rxq->pool[i].page_dma,
+                       dma_unmap_page(&il->pci_dev->dev,
+                                      rxq->pool[i].page_dma,
                                       PAGE_SIZE << il->hw_params.rx_page_order,
-                                      PCI_DMA_FROMDEVICE);
+                                      DMA_FROM_DEVICE);
                        __il_free_pages(il, rxq->pool[i].page);
                        rxq->pool[i].page = NULL;
                }
@@ -342,11 +343,10 @@ il4965_rx_allocate(struct il_priv *il, gfp_t priority)
                }
 
                /* Get physical address of the RB */
-               page_dma =
-                   pci_map_page(il->pci_dev, page, 0,
-                                PAGE_SIZE << il->hw_params.rx_page_order,
-                                PCI_DMA_FROMDEVICE);
-               if (unlikely(pci_dma_mapping_error(il->pci_dev, page_dma))) {
+               page_dma = dma_map_page(&il->pci_dev->dev, page, 0,
+                                       PAGE_SIZE << il->hw_params.rx_page_order,
+                                       DMA_FROM_DEVICE);
+               if (unlikely(dma_mapping_error(&il->pci_dev->dev, page_dma))) {
                        __free_pages(page, il->hw_params.rx_page_order);
                        break;
                }
@@ -355,9 +355,9 @@ il4965_rx_allocate(struct il_priv *il, gfp_t priority)
 
                if (list_empty(&rxq->rx_used)) {
                        spin_unlock_irqrestore(&rxq->lock, flags);
-                       pci_unmap_page(il->pci_dev, page_dma,
+                       dma_unmap_page(&il->pci_dev->dev, page_dma,
                                       PAGE_SIZE << il->hw_params.rx_page_order,
-                                      PCI_DMA_FROMDEVICE);
+                                      DMA_FROM_DEVICE);
                        __free_pages(page, il->hw_params.rx_page_order);
                        return;
                }
@@ -409,9 +409,10 @@ il4965_rx_queue_free(struct il_priv *il, struct il_rx_queue *rxq)
        int i;
        for (i = 0; i < RX_QUEUE_SIZE + RX_FREE_BUFFERS; i++) {
                if (rxq->pool[i].page != NULL) {
-                       pci_unmap_page(il->pci_dev, rxq->pool[i].page_dma,
+                       dma_unmap_page(&il->pci_dev->dev,
+                                      rxq->pool[i].page_dma,
                                       PAGE_SIZE << il->hw_params.rx_page_order,
-                                      PCI_DMA_FROMDEVICE);
+                                      DMA_FROM_DEVICE);
                        __il_free_pages(il, rxq->pool[i].page);
                        rxq->pool[i].page = NULL;
                }
@@ -1815,20 +1816,18 @@ il4965_tx_skb(struct il_priv *il,
 
        /* Physical address of this Tx command's header (not MAC header!),
         * within command buffer array. */
-       txcmd_phys =
-           pci_map_single(il->pci_dev, &out_cmd->hdr, firstlen,
-                          PCI_DMA_BIDIRECTIONAL);
-       if (unlikely(pci_dma_mapping_error(il->pci_dev, txcmd_phys)))
+       txcmd_phys = dma_map_single(&il->pci_dev->dev, &out_cmd->hdr, firstlen,
+                                   DMA_BIDIRECTIONAL);
+       if (unlikely(dma_mapping_error(&il->pci_dev->dev, txcmd_phys)))
                goto drop_unlock;
 
        /* Set up TFD's 2nd entry to point directly to remainder of skb,
         * if any (802.11 null frames have no payload). */
        secondlen = skb->len - hdr_len;
        if (secondlen > 0) {
-               phys_addr =
-                   pci_map_single(il->pci_dev, skb->data + hdr_len, secondlen,
-                                  PCI_DMA_TODEVICE);
-               if (unlikely(pci_dma_mapping_error(il->pci_dev, phys_addr)))
+               phys_addr = dma_map_single(&il->pci_dev->dev, skb->data + hdr_len,
+                                          secondlen, DMA_TO_DEVICE);
+               if (unlikely(dma_mapping_error(&il->pci_dev->dev, phys_addr)))
                        goto drop_unlock;
        }
 
@@ -1853,8 +1852,8 @@ il4965_tx_skb(struct il_priv *il,
            offsetof(struct il_tx_cmd, scratch);
 
        /* take back ownership of DMA buffer to enable update */
-       pci_dma_sync_single_for_cpu(il->pci_dev, txcmd_phys, firstlen,
-                                   PCI_DMA_BIDIRECTIONAL);
+       dma_sync_single_for_cpu(&il->pci_dev->dev, txcmd_phys, firstlen,
+                               DMA_BIDIRECTIONAL);
        tx_cmd->dram_lsb_ptr = cpu_to_le32(scratch_phys);
        tx_cmd->dram_msb_ptr = il_get_dma_hi_addr(scratch_phys);
 
@@ -1869,8 +1868,8 @@ il4965_tx_skb(struct il_priv *il,
        if (info->flags & IEEE80211_TX_CTL_AMPDU)
                il->ops->txq_update_byte_cnt_tbl(il, txq, le16_to_cpu(tx_cmd->len));
 
-       pci_dma_sync_single_for_device(il->pci_dev, txcmd_phys, firstlen,
-                                      PCI_DMA_BIDIRECTIONAL);
+       dma_sync_single_for_device(&il->pci_dev->dev, txcmd_phys, firstlen,
+                                  DMA_BIDIRECTIONAL);
 
        /* Tell device the write idx *just past* this latest filled TFD */
        q->write_ptr = il_queue_inc_wrap(q->write_ptr, q->n_bd);
@@ -3929,15 +3928,15 @@ il4965_hw_txq_free_tfd(struct il_priv *il, struct il_tx_queue *txq)
 
        /* Unmap tx_cmd */
        if (num_tbs)
-               pci_unmap_single(dev, dma_unmap_addr(&txq->meta[idx], mapping),
+               dma_unmap_single(&dev->dev,
+                                dma_unmap_addr(&txq->meta[idx], mapping),
                                 dma_unmap_len(&txq->meta[idx], len),
-                                PCI_DMA_BIDIRECTIONAL);
+                                DMA_BIDIRECTIONAL);
 
        /* Unmap chunks, if any. */
        for (i = 1; i < num_tbs; i++)
-               pci_unmap_single(dev, il4965_tfd_tb_get_addr(tfd, i),
-                                il4965_tfd_tb_get_len(tfd, i),
-                                PCI_DMA_TODEVICE);
+               dma_unmap_single(&dev->dev, il4965_tfd_tb_get_addr(tfd, i),
+                                il4965_tfd_tb_get_len(tfd, i), DMA_TO_DEVICE);
 
        /* free SKB */
        if (txq->skbs) {
@@ -4243,9 +4242,9 @@ il4965_rx_handle(struct il_priv *il)
 
                rxq->queue[i] = NULL;
 
-               pci_unmap_page(il->pci_dev, rxb->page_dma,
+               dma_unmap_page(&il->pci_dev->dev, rxb->page_dma,
                               PAGE_SIZE << il->hw_params.rx_page_order,
-                              PCI_DMA_FROMDEVICE);
+                              DMA_FROM_DEVICE);
                pkt = rxb_addr(rxb);
 
                len = le32_to_cpu(pkt->len_n_flags) & IL_RX_FRAME_SIZE_MSK;
@@ -4290,12 +4289,12 @@ il4965_rx_handle(struct il_priv *il)
                spin_lock_irqsave(&rxq->lock, flags);
                if (rxb->page != NULL) {
                        rxb->page_dma =
-                           pci_map_page(il->pci_dev, rxb->page, 0,
-                                        PAGE_SIZE << il->hw_params.
-                                        rx_page_order, PCI_DMA_FROMDEVICE);
+                           dma_map_page(&il->pci_dev->dev, rxb->page, 0,
+                                        PAGE_SIZE << il->hw_params.rx_page_order,
+                                        DMA_FROM_DEVICE);
 
-                       if (unlikely(pci_dma_mapping_error(il->pci_dev,
-                                                          rxb->page_dma))) {
+                       if (unlikely(dma_mapping_error(&il->pci_dev->dev,
+                                                      rxb->page_dma))) {
                                __il_free_pages(il, rxb->page);
                                rxb->page = NULL;
                                list_add_tail(&rxb->list, &rxq->rx_used);
@@ -6514,14 +6513,9 @@ il4965_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        pci_set_master(pdev);
 
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(36));
-       if (!err)
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(36));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36));
        if (err) {
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-               if (!err)
-                       err =
-                           pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+               err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
                /* both attempts failed: */
                if (err) {
                        IL_WARN("No suitable DMA available.\n");
index 219fed9..683b632 100644 (file)
@@ -2819,10 +2819,10 @@ il_cmd_queue_unmap(struct il_priv *il)
                i = il_get_cmd_idx(q, q->read_ptr, 0);
 
                if (txq->meta[i].flags & CMD_MAPPED) {
-                       pci_unmap_single(il->pci_dev,
+                       dma_unmap_single(&il->pci_dev->dev,
                                         dma_unmap_addr(&txq->meta[i], mapping),
                                         dma_unmap_len(&txq->meta[i], len),
-                                        PCI_DMA_BIDIRECTIONAL);
+                                        DMA_BIDIRECTIONAL);
                        txq->meta[i].flags = 0;
                }
 
@@ -2831,10 +2831,10 @@ il_cmd_queue_unmap(struct il_priv *il)
 
        i = q->n_win;
        if (txq->meta[i].flags & CMD_MAPPED) {
-               pci_unmap_single(il->pci_dev,
+               dma_unmap_single(&il->pci_dev->dev,
                                 dma_unmap_addr(&txq->meta[i], mapping),
                                 dma_unmap_len(&txq->meta[i], len),
-                                PCI_DMA_BIDIRECTIONAL);
+                                DMA_BIDIRECTIONAL);
                txq->meta[i].flags = 0;
        }
 }
@@ -3197,10 +3197,9 @@ il_enqueue_hcmd(struct il_priv *il, struct il_host_cmd *cmd)
        }
 #endif
 
-       phys_addr =
-           pci_map_single(il->pci_dev, &out_cmd->hdr, fix_size,
-                          PCI_DMA_BIDIRECTIONAL);
-       if (unlikely(pci_dma_mapping_error(il->pci_dev, phys_addr))) {
+       phys_addr = dma_map_single(&il->pci_dev->dev, &out_cmd->hdr, fix_size,
+                                  DMA_BIDIRECTIONAL);
+       if (unlikely(dma_mapping_error(&il->pci_dev->dev, phys_addr))) {
                idx = -ENOMEM;
                goto out;
        }
@@ -3298,8 +3297,8 @@ il_tx_cmd_complete(struct il_priv *il, struct il_rx_buf *rxb)
 
        txq->time_stamp = jiffies;
 
-       pci_unmap_single(il->pci_dev, dma_unmap_addr(meta, mapping),
-                        dma_unmap_len(meta, len), PCI_DMA_BIDIRECTIONAL);
+       dma_unmap_single(&il->pci_dev->dev, dma_unmap_addr(meta, mapping),
+                        dma_unmap_len(meta, len), DMA_BIDIRECTIONAL);
 
        /* Input error checking is done when commands are added to queue. */
        if (meta->flags & CMD_WANT_SKB) {
index 7f1faa9..52d1d39 100644 (file)
@@ -9,7 +9,7 @@
 #include "iwl-prph.h"
 
 /* Highest firmware API version supported */
-#define IWL_22000_UCODE_API_MAX        64
+#define IWL_22000_UCODE_API_MAX        65
 
 /* Lowest firmware API version supported */
 #define IWL_22000_UCODE_API_MIN        39
@@ -154,7 +154,7 @@ static const struct iwl_ht_params iwl_22000_ht_params = {
        .apmg_not_supported = true,                                     \
        .trans.mq_rx_supported = true,                                  \
        .vht_mu_mimo_supported = true,                                  \
-       .mac_addr_from_csr = true,                                      \
+       .mac_addr_from_csr = 0x380,                                     \
        .ht_params = &iwl_22000_ht_params,                              \
        .nvm_ver = IWL_22000_NVM_VERSION,                               \
        .trans.use_tfh = true,                                          \
@@ -215,6 +215,67 @@ static const struct iwl_ht_params iwl_22000_ht_params = {
                },                                                      \
        }
 
+#define IWL_DEVICE_BZ_COMMON                                           \
+       .ucode_api_max = IWL_22000_UCODE_API_MAX,                       \
+       .ucode_api_min = IWL_22000_UCODE_API_MIN,                       \
+       .led_mode = IWL_LED_RF_STATE,                                   \
+       .nvm_hw_section_num = 10,                                       \
+       .non_shared_ant = ANT_B,                                        \
+       .dccm_offset = IWL_22000_DCCM_OFFSET,                           \
+       .dccm_len = IWL_22000_DCCM_LEN,                                 \
+       .dccm2_offset = IWL_22000_DCCM2_OFFSET,                         \
+       .dccm2_len = IWL_22000_DCCM2_LEN,                               \
+       .smem_offset = IWL_22000_SMEM_OFFSET,                           \
+       .smem_len = IWL_22000_SMEM_LEN,                                 \
+       .features = IWL_TX_CSUM_NETIF_FLAGS | NETIF_F_RXCSUM,           \
+       .apmg_not_supported = true,                                     \
+       .trans.mq_rx_supported = true,                                  \
+       .vht_mu_mimo_supported = true,                                  \
+       .mac_addr_from_csr = 0x30,                                      \
+       .ht_params = &iwl_22000_ht_params,                              \
+       .nvm_ver = IWL_22000_NVM_VERSION,                               \
+       .trans.use_tfh = true,                                          \
+       .trans.rf_id = true,                                            \
+       .trans.gen2 = true,                                             \
+       .nvm_type = IWL_NVM_EXT,                                        \
+       .dbgc_supported = true,                                         \
+       .min_umac_error_event_table = 0x400000,                         \
+       .d3_debug_data_base_addr = 0x401000,                            \
+       .d3_debug_data_length = 60 * 1024,                              \
+       .mon_smem_regs = {                                              \
+               .write_ptr = {                                          \
+                       .addr = LDBG_M2S_BUF_WPTR,                      \
+                       .mask = LDBG_M2S_BUF_WPTR_VAL_MSK,              \
+       },                                                              \
+               .cycle_cnt = {                                          \
+                       .addr = LDBG_M2S_BUF_WRAP_CNT,                  \
+                       .mask = LDBG_M2S_BUF_WRAP_CNT_VAL_MSK,          \
+               },                                                      \
+       }
+
+#define IWL_DEVICE_BZ                                                  \
+       IWL_DEVICE_BZ_COMMON,                                           \
+       .trans.umac_prph_offset = 0x300000,                             \
+       .trans.device_family = IWL_DEVICE_FAMILY_BZ,                    \
+       .trans.base_params = &iwl_ax210_base_params,                    \
+       .min_txq_size = 128,                                            \
+       .gp2_reg_addr = 0xd02c68,                                       \
+       .min_256_ba_txq_size = 1024,                                    \
+       .mon_dram_regs = {                                              \
+               .write_ptr = {                                          \
+                       .addr = DBGC_CUR_DBGBUF_STATUS,                 \
+                       .mask = DBGC_CUR_DBGBUF_STATUS_OFFSET_MSK,      \
+               },                                                      \
+               .cycle_cnt = {                                          \
+                       .addr = DBGC_DBGBUF_WRAP_AROUND,                \
+                       .mask = 0xffffffff,                             \
+               },                                                      \
+               .cur_frag = {                                           \
+                       .addr = DBGC_CUR_DBGBUF_STATUS,                 \
+                       .mask = DBGC_CUR_DBGBUF_STATUS_IDX_MSK,         \
+               },                                                      \
+       }
+
 const struct iwl_cfg_trans_params iwl_qnj_trans_cfg = {
        .mq_rx_supported = true,
        .use_tfh = true,
@@ -373,7 +434,7 @@ const struct iwl_cfg_trans_params iwl_ma_trans_cfg = {
 };
 
 const struct iwl_cfg_trans_params iwl_bz_trans_cfg = {
-       .device_family = IWL_DEVICE_FAMILY_AX210,
+       .device_family = IWL_DEVICE_FAMILY_BZ,
        .base_params = &iwl_ax210_base_params,
        .mq_rx_supported = true,
        .use_tfh = true,
@@ -394,6 +455,7 @@ const char iwl_ax211_name[] = "Intel(R) Wi-Fi 6E AX211 160MHz";
 const char iwl_ax221_name[] = "Intel(R) Wi-Fi 6E AX221 160MHz";
 const char iwl_ax231_name[] = "Intel(R) Wi-Fi 6E AX231 160MHz";
 const char iwl_ax411_name[] = "Intel(R) Wi-Fi 6E AX411 160MHz";
+const char iwl_bz_name[] = "Intel(R) TBD Bz device";
 
 const char iwl_ax200_killer_1650w_name[] =
        "Killer(R) Wi-Fi 6 AX1650w 160MHz Wireless Network Adapter (200D2W)";
@@ -763,28 +825,28 @@ const struct iwl_cfg iwl_cfg_quz_a0_hr_b0 = {
 const struct iwl_cfg iwl_cfg_bz_a0_hr_b0 = {
        .fw_name_pre = IWL_BZ_A_HR_B_FW_PRE,
        .uhb_supported = true,
-       IWL_DEVICE_AX210,
+       IWL_DEVICE_BZ,
        .num_rbds = IWL_NUM_RBDS_AX210_HE,
 };
 
 const struct iwl_cfg iwl_cfg_bz_a0_gf_a0 = {
        .fw_name_pre = IWL_BZ_A_GF_A_FW_PRE,
        .uhb_supported = true,
-       IWL_DEVICE_AX210,
+       IWL_DEVICE_BZ,
        .num_rbds = IWL_NUM_RBDS_AX210_HE,
 };
 
 const struct iwl_cfg iwl_cfg_bz_a0_gf4_a0 = {
        .fw_name_pre = IWL_BZ_A_GF4_A_FW_PRE,
        .uhb_supported = true,
-       IWL_DEVICE_AX210,
+       IWL_DEVICE_BZ,
        .num_rbds = IWL_NUM_RBDS_AX210_HE,
 };
 
 const struct iwl_cfg iwl_cfg_bz_a0_mr_a0 = {
        .fw_name_pre = IWL_BZ_A_MR_A_FW_PRE,
        .uhb_supported = true,
-       IWL_DEVICE_AX210,
+       IWL_DEVICE_BZ,
        .num_rbds = IWL_NUM_RBDS_AX210_HE,
 };
 
index 871533b..7a7ca06 100644 (file)
@@ -89,7 +89,7 @@ static const struct iwl_tt_params iwl9000_tt_params = {
        .apmg_not_supported = true,                                     \
        .num_rbds = 512,                                                \
        .vht_mu_mimo_supported = true,                                  \
-       .mac_addr_from_csr = true,                                      \
+       .mac_addr_from_csr = 0x380,                                     \
        .nvm_type = IWL_NVM_EXT,                                        \
        .dbgc_supported = true,                                         \
        .min_umac_error_event_table = 0x800000,                         \
index c01523f..cc7b69f 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /******************************************************************************
  *
- * Copyright(c) 2003 - 2014, 2018 - 2020  Intel Corporation. All rights reserved.
+ * Copyright(c) 2003 - 2014, 2018 - 2021  Intel Corporation. All rights reserved.
  * Copyright(c) 2015 Intel Deutschland GmbH
  *
  * Portions of this file are derived from the ipw3945 project, as well
@@ -1950,7 +1950,7 @@ static void iwlagn_fw_error(struct iwl_priv *priv, bool ondemand)
        }
 }
 
-static void iwl_nic_error(struct iwl_op_mode *op_mode)
+static void iwl_nic_error(struct iwl_op_mode *op_mode, bool sync)
 {
        struct iwl_priv *priv = IWL_OP_MODE_GET_DVM(op_mode);
 
index 80475c7..3cd7b42 100644 (file)
@@ -318,7 +318,7 @@ iwlagn_accumulative_statistics(struct iwl_priv *priv,
                    (__le32 *)&priv->delta_stats._name,         \
                    (__le32 *)&priv->max_delta_stats._name,     \
                    (__le32 *)&priv->accum_stats._name,         \
-                   sizeof(*_name));
+                   sizeof(*_name))
 
        ACCUM(common);
        ACCUM(rx_non_phy);
index 34933f1..1efac0b 100644 (file)
@@ -264,7 +264,7 @@ int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
                goto out_free;
        }
 
-       enabled = !!wifi_pkg->package.elements[0].integer.value;
+       enabled = !!wifi_pkg->package.elements[1].integer.value;
 
        if (!enabled) {
                *block_list_size = -1;
@@ -273,15 +273,15 @@ int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
                goto out_free;
        }
 
-       if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
-           wifi_pkg->package.elements[1].integer.value >
+       if (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER ||
+           wifi_pkg->package.elements[2].integer.value >
            APCI_WTAS_BLACK_LIST_MAX) {
                IWL_DEBUG_RADIO(fwrt, "TAS invalid array size %llu\n",
                                wifi_pkg->package.elements[1].integer.value);
                ret = -EINVAL;
                goto out_free;
        }
-       *block_list_size = wifi_pkg->package.elements[1].integer.value;
+       *block_list_size = wifi_pkg->package.elements[2].integer.value;
 
        IWL_DEBUG_RADIO(fwrt, "TAS array size %d\n", *block_list_size);
        if (*block_list_size > APCI_WTAS_BLACK_LIST_MAX) {
@@ -294,15 +294,15 @@ int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
        for (i = 0; i < *block_list_size; i++) {
                u32 country;
 
-               if (wifi_pkg->package.elements[2 + i].type !=
+               if (wifi_pkg->package.elements[3 + i].type !=
                    ACPI_TYPE_INTEGER) {
                        IWL_DEBUG_RADIO(fwrt,
-                                       "TAS invalid array elem %d\n", 2 + i);
+                                       "TAS invalid array elem %d\n", 3 + i);
                        ret = -EINVAL;
                        goto out_free;
                }
 
-               country = wifi_pkg->package.elements[2 + i].integer.value;
+               country = wifi_pkg->package.elements[3 + i].integer.value;
                block_list_array[i] = cpu_to_le32(country);
                IWL_DEBUG_RADIO(fwrt, "TAS block list country %d\n", country);
        }
@@ -412,20 +412,35 @@ IWL_EXPORT_SYMBOL(iwl_acpi_get_eckv);
 
 static int iwl_sar_set_profile(union acpi_object *table,
                               struct iwl_sar_profile *profile,
-                              bool enabled)
+                              bool enabled, u8 num_chains, u8 num_sub_bands)
 {
-       int i;
-
-       profile->enabled = enabled;
-
-       for (i = 0; i < ACPI_SAR_TABLE_SIZE; i++) {
-               if (table[i].type != ACPI_TYPE_INTEGER ||
-                   table[i].integer.value > U8_MAX)
-                       return -EINVAL;
+       int i, j, idx = 0;
 
-               profile->table[i] = table[i].integer.value;
+       /*
+        * The table from ACPI is flat, but we store it in a
+        * structured array.
+        */
+       for (i = 0; i < ACPI_SAR_NUM_CHAINS_REV2; i++) {
+               for (j = 0; j < ACPI_SAR_NUM_SUB_BANDS_REV2; j++) {
+                       /* if we don't have the values, use the default */
+                       if (i >= num_chains || j >= num_sub_bands) {
+                               profile->chains[i].subbands[j] = 0;
+                       } else {
+                               if (table[idx].type != ACPI_TYPE_INTEGER ||
+                                   table[idx].integer.value > U8_MAX)
+                                       return -EINVAL;
+
+                               profile->chains[i].subbands[j] =
+                                       table[idx].integer.value;
+
+                               idx++;
+                       }
+               }
        }
 
+       /* Only if all values were valid can the profile be enabled */
+       profile->enabled = enabled;
+
        return 0;
 }
 
@@ -433,10 +448,10 @@ static int iwl_sar_fill_table(struct iwl_fw_runtime *fwrt,
                              __le16 *per_chain, u32 n_subbands,
                              int prof_a, int prof_b)
 {
-       int profs[ACPI_SAR_NUM_CHAIN_LIMITS] = { prof_a, prof_b };
-       int i, j, idx;
+       int profs[ACPI_SAR_NUM_CHAINS_REV0] = { prof_a, prof_b };
+       int i, j;
 
-       for (i = 0; i < ACPI_SAR_NUM_CHAIN_LIMITS; i++) {
+       for (i = 0; i < ACPI_SAR_NUM_CHAINS_REV0; i++) {
                struct iwl_sar_profile *prof;
 
                /* don't allow SAR to be disabled (profile 0 means disable) */
@@ -467,11 +482,10 @@ static int iwl_sar_fill_table(struct iwl_fw_runtime *fwrt,
                               i, profs[i]);
                IWL_DEBUG_RADIO(fwrt, "  Chain[%d]:\n", i);
                for (j = 0; j < n_subbands; j++) {
-                       idx = i * ACPI_SAR_NUM_SUB_BANDS + j;
                        per_chain[i * n_subbands + j] =
-                               cpu_to_le16(prof->table[idx]);
+                               cpu_to_le16(prof->chains[i].subbands[j]);
                        IWL_DEBUG_RADIO(fwrt, "    Band[%d] = %d * .125dBm\n",
-                                       j, prof->table[idx]);
+                                       j, prof->chains[i].subbands[j]);
                }
        }
 
@@ -486,7 +500,7 @@ int iwl_sar_select_profile(struct iwl_fw_runtime *fwrt,
 
        for (i = 0; i < n_tables; i++) {
                ret = iwl_sar_fill_table(fwrt,
-                        &per_chain[i * n_subbands * ACPI_SAR_NUM_CHAIN_LIMITS],
+                        &per_chain[i * n_subbands * ACPI_SAR_NUM_CHAINS_REV0],
                         n_subbands, prof_a, prof_b);
                if (ret)
                        break;
@@ -501,28 +515,71 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
        union acpi_object *wifi_pkg, *table, *data;
        bool enabled;
        int ret, tbl_rev;
+       u8 num_chains, num_sub_bands;
 
        data = iwl_acpi_get_object(fwrt->dev, ACPI_WRDS_METHOD);
        if (IS_ERR(data))
                return PTR_ERR(data);
 
+       /* start by trying to read revision 2 */
        wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
-                                        ACPI_WRDS_WIFI_DATA_SIZE, &tbl_rev);
-       if (IS_ERR(wifi_pkg)) {
-               ret = PTR_ERR(wifi_pkg);
-               goto out_free;
+                                        ACPI_WRDS_WIFI_DATA_SIZE_REV2,
+                                        &tbl_rev);
+       if (!IS_ERR(wifi_pkg)) {
+               if (tbl_rev != 2) {
+                       ret = PTR_ERR(wifi_pkg);
+                       goto out_free;
+               }
+
+               num_chains = ACPI_SAR_NUM_CHAINS_REV2;
+               num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV2;
+
+               goto read_table;
        }
 
-       if (tbl_rev != 0) {
-               ret = -EINVAL;
-               goto out_free;
+       /* then try revision 1 */
+       wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+                                        ACPI_WRDS_WIFI_DATA_SIZE_REV1,
+                                        &tbl_rev);
+       if (!IS_ERR(wifi_pkg)) {
+               if (tbl_rev != 1) {
+                       ret = PTR_ERR(wifi_pkg);
+                       goto out_free;
+               }
+
+               num_chains = ACPI_SAR_NUM_CHAINS_REV1;
+               num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV1;
+
+               goto read_table;
+       }
+
+       /* then finally revision 0 */
+       wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+                                        ACPI_WRDS_WIFI_DATA_SIZE_REV0,
+                                        &tbl_rev);
+       if (!IS_ERR(wifi_pkg)) {
+               if (tbl_rev != 0) {
+                       ret = PTR_ERR(wifi_pkg);
+                       goto out_free;
+               }
+
+               num_chains = ACPI_SAR_NUM_CHAINS_REV0;
+               num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV0;
+
+               goto read_table;
        }
 
+       ret = PTR_ERR(wifi_pkg);
+       goto out_free;
+
+read_table:
        if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
                ret = -EINVAL;
                goto out_free;
        }
 
+       IWL_DEBUG_RADIO(fwrt, "Reading WRDS tbl_rev=%d\n", tbl_rev);
+
        enabled = !!(wifi_pkg->package.elements[1].integer.value);
 
        /* position of the actual table */
@@ -531,7 +588,8 @@ int iwl_sar_get_wrds_table(struct iwl_fw_runtime *fwrt)
        /* The profile from WRDS is officially profile 1, but goes
         * into sar_profiles[0] (because we don't have a profile 0).
         */
-       ret = iwl_sar_set_profile(table, &fwrt->sar_profiles[0], enabled);
+       ret = iwl_sar_set_profile(table, &fwrt->sar_profiles[0], enabled,
+                                 num_chains, num_sub_bands);
 out_free:
        kfree(data);
        return ret;
@@ -544,23 +602,64 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
        bool enabled;
        int i, n_profiles, tbl_rev, pos;
        int ret = 0;
+       u8 num_chains, num_sub_bands;
 
        data = iwl_acpi_get_object(fwrt->dev, ACPI_EWRD_METHOD);
        if (IS_ERR(data))
                return PTR_ERR(data);
 
+       /* start by trying to read revision 2 */
        wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
-                                        ACPI_EWRD_WIFI_DATA_SIZE, &tbl_rev);
-       if (IS_ERR(wifi_pkg)) {
-               ret = PTR_ERR(wifi_pkg);
-               goto out_free;
+                                        ACPI_EWRD_WIFI_DATA_SIZE_REV2,
+                                        &tbl_rev);
+       if (!IS_ERR(wifi_pkg)) {
+               if (tbl_rev != 2) {
+                       ret = PTR_ERR(wifi_pkg);
+                       goto out_free;
+               }
+
+               num_chains = ACPI_SAR_NUM_CHAINS_REV2;
+               num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV2;
+
+               goto read_table;
        }
 
-       if (tbl_rev != 0) {
-               ret = -EINVAL;
-               goto out_free;
+       /* then try revision 1 */
+       wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+                                        ACPI_EWRD_WIFI_DATA_SIZE_REV1,
+                                        &tbl_rev);
+       if (!IS_ERR(wifi_pkg)) {
+               if (tbl_rev != 1) {
+                       ret = PTR_ERR(wifi_pkg);
+                       goto out_free;
+               }
+
+               num_chains = ACPI_SAR_NUM_CHAINS_REV1;
+               num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV1;
+
+               goto read_table;
+       }
+
+       /* then finally revision 0 */
+       wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+                                        ACPI_EWRD_WIFI_DATA_SIZE_REV0,
+                                        &tbl_rev);
+       if (!IS_ERR(wifi_pkg)) {
+               if (tbl_rev != 0) {
+                       ret = PTR_ERR(wifi_pkg);
+                       goto out_free;
+               }
+
+               num_chains = ACPI_SAR_NUM_CHAINS_REV0;
+               num_sub_bands = ACPI_SAR_NUM_SUB_BANDS_REV0;
+
+               goto read_table;
        }
 
+       ret = PTR_ERR(wifi_pkg);
+       goto out_free;
+
+read_table:
        if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
            wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER) {
                ret = -EINVAL;
@@ -589,13 +688,13 @@ int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
                 * have profile 0).  So in the array we start from 1.
                 */
                ret = iwl_sar_set_profile(&wifi_pkg->package.elements[pos],
-                                         &fwrt->sar_profiles[i + 1],
-                                         enabled);
+                                         &fwrt->sar_profiles[i + 1], enabled,
+                                         num_chains, num_sub_bands);
                if (ret < 0)
                        break;
 
                /* go to the next table */
-               pos += ACPI_SAR_TABLE_SIZE;
+               pos += num_chains * num_sub_bands;
        }
 
 out_free:
@@ -607,41 +706,93 @@ IWL_EXPORT_SYMBOL(iwl_sar_get_ewrd_table);
 int iwl_sar_get_wgds_table(struct iwl_fw_runtime *fwrt)
 {
        union acpi_object *wifi_pkg, *data;
-       int i, j, ret, tbl_rev;
-       int idx = 1;
+       int i, j, k, ret, tbl_rev;
+       int idx = 1; /* start from one to skip the domain */
+       u8 num_bands;
 
        data = iwl_acpi_get_object(fwrt->dev, ACPI_WGDS_METHOD);
        if (IS_ERR(data))
                return PTR_ERR(data);
 
+       /* start by trying to read revision 2 */
        wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
-                                        ACPI_WGDS_WIFI_DATA_SIZE, &tbl_rev);
+                                        ACPI_WGDS_WIFI_DATA_SIZE_REV2,
+                                        &tbl_rev);
+       if (!IS_ERR(wifi_pkg)) {
+               if (tbl_rev != 2) {
+                       ret = PTR_ERR(wifi_pkg);
+                       goto out_free;
+               }
 
-       if (IS_ERR(wifi_pkg)) {
-               ret = PTR_ERR(wifi_pkg);
-               goto out_free;
+               num_bands = ACPI_GEO_NUM_BANDS_REV2;
+
+               goto read_table;
        }
 
-       if (tbl_rev > 1) {
-               ret = -EINVAL;
-               goto out_free;
+       /* then try revision 0 (which is the same as 1) */
+       wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+                                        ACPI_WGDS_WIFI_DATA_SIZE_REV0,
+                                        &tbl_rev);
+       if (!IS_ERR(wifi_pkg)) {
+               if (tbl_rev != 0 && tbl_rev != 1) {
+                       ret = PTR_ERR(wifi_pkg);
+                       goto out_free;
+               }
+
+               num_bands = ACPI_GEO_NUM_BANDS_REV0;
+
+               goto read_table;
        }
 
+       ret = PTR_ERR(wifi_pkg);
+       goto out_free;
+
+read_table:
        fwrt->geo_rev = tbl_rev;
        for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) {
-               for (j = 0; j < ACPI_GEO_TABLE_SIZE; j++) {
+               for (j = 0; j < ACPI_GEO_NUM_BANDS_REV2; j++) {
                        union acpi_object *entry;
 
-                       entry = &wifi_pkg->package.elements[idx++];
-                       if (entry->type != ACPI_TYPE_INTEGER ||
-                           entry->integer.value > U8_MAX) {
-                               ret = -EINVAL;
-                               goto out_free;
+                       /*
+                        * num_bands is either 2 or 3, if it's only 2 then
+                        * fill the third band (6 GHz) with the values from
+                        * 5 GHz (second band)
+                        */
+                       if (j >= num_bands) {
+                               fwrt->geo_profiles[i].bands[j].max =
+                                       fwrt->geo_profiles[i].bands[1].max;
+                       } else {
+                               entry = &wifi_pkg->package.elements[idx++];
+                               if (entry->type != ACPI_TYPE_INTEGER ||
+                                   entry->integer.value > U8_MAX) {
+                                       ret = -EINVAL;
+                                       goto out_free;
+                               }
+
+                               fwrt->geo_profiles[i].bands[j].max =
+                                       entry->integer.value;
                        }
 
-                       fwrt->geo_profiles[i].values[j] = entry->integer.value;
+                       for (k = 0; k < ACPI_GEO_NUM_CHAINS; k++) {
+                               /* same here as above */
+                               if (j >= num_bands) {
+                                       fwrt->geo_profiles[i].bands[j].chains[k] =
+                                               fwrt->geo_profiles[i].bands[1].chains[k];
+                               } else {
+                                       entry = &wifi_pkg->package.elements[idx++];
+                                       if (entry->type != ACPI_TYPE_INTEGER ||
+                                           entry->integer.value > U8_MAX) {
+                                               ret = -EINVAL;
+                                               goto out_free;
+                                       }
+
+                                       fwrt->geo_profiles[i].bands[j].chains[k] =
+                                               entry->integer.value;
+                               }
+                       }
                }
        }
+
        ret = 0;
 out_free:
        kfree(data);
@@ -673,43 +824,26 @@ IWL_EXPORT_SYMBOL(iwl_sar_geo_support);
 int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
                     struct iwl_per_chain_offset *table, u32 n_bands)
 {
-       int ret, i, j;
+       int i, j;
 
        if (!iwl_sar_geo_support(fwrt))
                return -EOPNOTSUPP;
 
-       ret = iwl_sar_get_wgds_table(fwrt);
-       if (ret < 0) {
-               IWL_DEBUG_RADIO(fwrt,
-                               "Geo SAR BIOS table invalid or unavailable. (%d)\n",
-                               ret);
-               /* we don't fail if the table is not available */
-               return -ENOENT;
-       }
-
        for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) {
                for (j = 0; j < n_bands; j++) {
                        struct iwl_per_chain_offset *chain =
                                &table[i * n_bands + j];
-                       u8 *value;
-
-                       if (j * ACPI_GEO_PER_CHAIN_SIZE >=
-                           ARRAY_SIZE(fwrt->geo_profiles[0].values))
-                               /*
-                                * Currently we only store lb an hb values, and
-                                * don't have any special ones for uhb. So leave
-                                * those empty for the time being
-                                */
-                               break;
-
-                       value = &fwrt->geo_profiles[i].values[j *
-                               ACPI_GEO_PER_CHAIN_SIZE];
-                       chain->max_tx_power = cpu_to_le16(value[0]);
-                       chain->chain_a = value[1];
-                       chain->chain_b = value[2];
+
+                       chain->max_tx_power =
+                               cpu_to_le16(fwrt->geo_profiles[i].bands[j].max);
+                       chain->chain_a = fwrt->geo_profiles[i].bands[j].chains[0];
+                       chain->chain_b = fwrt->geo_profiles[i].bands[j].chains[1];
                        IWL_DEBUG_RADIO(fwrt,
                                        "SAR geographic profile[%d] Band[%d]: chain A = %d chain B = %d max_tx_power = %d\n",
-                                       i, j, value[1], value[2], value[0]);
+                                       i, j,
+                                       fwrt->geo_profiles[i].bands[j].chains[0],
+                                       fwrt->geo_profiles[i].bands[j].chains[1],
+                                       fwrt->geo_profiles[i].bands[j].max);
                }
        }
 
index b858e99..16ed099 100644 (file)
 
 #define ACPI_WIFI_DOMAIN       (0x07)
 
-#define ACPI_SAR_TABLE_SIZE            10
 #define ACPI_SAR_PROFILE_NUM           4
 
-#define ACPI_GEO_TABLE_SIZE            6
 #define ACPI_NUM_GEO_PROFILES          3
 #define ACPI_GEO_PER_CHAIN_SIZE                3
 
-#define ACPI_SAR_NUM_CHAIN_LIMITS      2
-#define ACPI_SAR_NUM_SUB_BANDS         5
-#define ACPI_SAR_NUM_TABLES            1
+#define ACPI_SAR_NUM_CHAINS_REV0       2
+#define ACPI_SAR_NUM_CHAINS_REV1       2
+#define ACPI_SAR_NUM_CHAINS_REV2       4
+#define ACPI_SAR_NUM_SUB_BANDS_REV0    5
+#define ACPI_SAR_NUM_SUB_BANDS_REV1    11
+#define ACPI_SAR_NUM_SUB_BANDS_REV2    11
+
+#define ACPI_WRDS_WIFI_DATA_SIZE_REV0  (ACPI_SAR_NUM_CHAINS_REV0 * \
+                                        ACPI_SAR_NUM_SUB_BANDS_REV0 + 2)
+#define ACPI_WRDS_WIFI_DATA_SIZE_REV1  (ACPI_SAR_NUM_CHAINS_REV1 * \
+                                        ACPI_SAR_NUM_SUB_BANDS_REV1 + 2)
+#define ACPI_WRDS_WIFI_DATA_SIZE_REV2  (ACPI_SAR_NUM_CHAINS_REV2 * \
+                                        ACPI_SAR_NUM_SUB_BANDS_REV2 + 2)
+#define ACPI_EWRD_WIFI_DATA_SIZE_REV0  ((ACPI_SAR_PROFILE_NUM - 1) * \
+                                        ACPI_SAR_NUM_CHAINS_REV0 * \
+                                        ACPI_SAR_NUM_SUB_BANDS_REV0 + 3)
+#define ACPI_EWRD_WIFI_DATA_SIZE_REV1  ((ACPI_SAR_PROFILE_NUM - 1) * \
+                                        ACPI_SAR_NUM_CHAINS_REV1 * \
+                                        ACPI_SAR_NUM_SUB_BANDS_REV1 + 3)
+#define ACPI_EWRD_WIFI_DATA_SIZE_REV2  ((ACPI_SAR_PROFILE_NUM - 1) * \
+                                        ACPI_SAR_NUM_CHAINS_REV2 * \
+                                        ACPI_SAR_NUM_SUB_BANDS_REV2 + 3)
+
+/* revision 0 and 1 are identical, except for the semantics in the FW */
+#define ACPI_GEO_NUM_BANDS_REV0                2
+#define ACPI_GEO_NUM_BANDS_REV2                3
+#define ACPI_GEO_NUM_CHAINS            2
+
+#define ACPI_WGDS_WIFI_DATA_SIZE_REV0  (ACPI_NUM_GEO_PROFILES *   \
+                                        ACPI_GEO_NUM_BANDS_REV0 * \
+                                        ACPI_GEO_PER_CHAIN_SIZE + 1)
+#define ACPI_WGDS_WIFI_DATA_SIZE_REV2  (ACPI_NUM_GEO_PROFILES *   \
+                                        ACPI_GEO_NUM_BANDS_REV2 * \
+                                        ACPI_GEO_PER_CHAIN_SIZE + 1)
 
-#define ACPI_WRDS_WIFI_DATA_SIZE       (ACPI_SAR_TABLE_SIZE + 2)
-#define ACPI_EWRD_WIFI_DATA_SIZE       ((ACPI_SAR_PROFILE_NUM - 1) * \
-                                        ACPI_SAR_TABLE_SIZE + 3)
-#define ACPI_WGDS_WIFI_DATA_SIZE       19
 #define ACPI_WRDD_WIFI_DATA_SIZE       2
 #define ACPI_SPLC_WIFI_DATA_SIZE       2
 #define ACPI_ECKV_WIFI_DATA_SIZE       2
@@ -51,8 +76,6 @@
 #define APCI_WTAS_BLACK_LIST_MAX       16
 #define ACPI_WTAS_WIFI_DATA_SIZE       (3 + APCI_WTAS_BLACK_LIST_MAX)
 
-#define ACPI_WGDS_TABLE_SIZE           3
-
 #define ACPI_PPAG_WIFI_DATA_SIZE_V1    ((IWL_NUM_CHAIN_LIMITS * \
                                          IWL_NUM_SUB_BANDS_V1) + 2)
 #define ACPI_PPAG_WIFI_DATA_SIZE_V2    ((IWL_NUM_CHAIN_LIMITS * \
 #define ACPI_PPAG_MIN_HB -16
 #define ACPI_PPAG_MAX_HB 40
 
+/*
+ * The profile for revision 2 is a superset of revision 1, which is in
+ * turn a superset of revision 0.  So we can store all revisions
+ * inside revision 2, which is what we represent here.
+ */
+struct iwl_sar_profile_chain {
+       u8 subbands[ACPI_SAR_NUM_SUB_BANDS_REV2];
+};
+
 struct iwl_sar_profile {
        bool enabled;
-       u8 table[ACPI_SAR_TABLE_SIZE];
+       struct iwl_sar_profile_chain chains[ACPI_SAR_NUM_CHAINS_REV2];
+};
+
+/* Same thing as with SAR, all revisions fit in revision 2 */
+struct iwl_geo_profile_band {
+       u8 max;
+       u8 chains[ACPI_GEO_NUM_CHAINS];
 };
 
 struct iwl_geo_profile {
-       u8 values[ACPI_GEO_TABLE_SIZE];
+       struct iwl_geo_profile_band bands[ACPI_GEO_NUM_BANDS_REV2];
 };
 
 enum iwl_dsm_funcs_rev_0 {
@@ -234,7 +272,7 @@ static inline int iwl_sar_get_ewrd_table(struct iwl_fw_runtime *fwrt)
 
 static inline int iwl_sar_get_wgds_table(struct iwl_fw_runtime *fwrt)
 {
-       return -ENOENT;
+       return 1;
 }
 
 static inline bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt)
index 01580c9..3e81e93 100644 (file)
@@ -142,7 +142,7 @@ enum iwl_bt_mxbox_dw3 {
                                         "\t%s: %d%s",                      \
                                         #_field,                           \
                                         BT_MBOX_MSG(notif, _num, _field),  \
-                                        true ? "\n" : ", ");
+                                        true ? "\n" : ", ")
 enum iwl_bt_activity_grading {
        BT_OFF                  = 0,
        BT_ON_NO_CONNECTION     = 1,
index ce060c3..ee6b584 100644 (file)
@@ -550,7 +550,8 @@ enum iwl_legacy_cmds {
        WOWLAN_CONFIGURATION = 0xe1,
 
        /**
-        * @WOWLAN_TSC_RSC_PARAM: &struct iwl_wowlan_rsc_tsc_params_cmd
+        * @WOWLAN_TSC_RSC_PARAM: &struct iwl_wowlan_rsc_tsc_params_cmd_v4,
+        *      &struct iwl_wowlan_rsc_tsc_params_cmd
         */
        WOWLAN_TSC_RSC_PARAM = 0xe2,
 
index b2e7ef3..3ec82ca 100644 (file)
@@ -6,6 +6,7 @@
  */
 #ifndef __iwl_fw_api_d3_h__
 #define __iwl_fw_api_d3_h__
+#include <iwl-trans.h>
 
 /**
  * enum iwl_d0i3_flags - d0i3 flags
@@ -389,11 +390,14 @@ struct iwl_wowlan_config_cmd {
        u8 reserved;
 } __packed; /* WOWLAN_CONFIG_API_S_VER_5 */
 
+#define IWL_NUM_RSC    16
+#define WOWLAN_KEY_MAX_SIZE    32
+#define WOWLAN_GTK_KEYS_NUM     2
+#define WOWLAN_IGTK_KEYS_NUM   2
+
 /*
  * WOWLAN_TSC_RSC_PARAMS
  */
-#define IWL_NUM_RSC    16
-
 struct tkip_sc {
        __le16 iv16;
        __le16 pad;
@@ -425,11 +429,19 @@ struct iwl_wowlan_rsc_tsc_params_cmd_ver_2 {
        union iwl_all_tsc_rsc all_tsc_rsc;
 } __packed; /* ALL_TSC_RSC_API_S_VER_2 */
 
-struct iwl_wowlan_rsc_tsc_params_cmd {
+struct iwl_wowlan_rsc_tsc_params_cmd_v4 {
        struct iwl_wowlan_rsc_tsc_params_cmd_ver_2 params;
        __le32 sta_id;
 } __packed; /* ALL_TSC_RSC_API_S_VER_4 */
 
+struct iwl_wowlan_rsc_tsc_params_cmd {
+       __le64 ucast_rsc[IWL_MAX_TID_COUNT];
+       __le64 mcast_rsc[WOWLAN_GTK_KEYS_NUM][IWL_MAX_TID_COUNT];
+       __le32 sta_id;
+#define IWL_MCAST_KEY_MAP_INVALID      0xff
+       u8 mcast_key_id_map[4];
+} __packed; /* ALL_TSC_RSC_API_S_VER_5 */
+
 #define IWL_MIC_KEY_SIZE       8
 struct iwl_mic_keys {
        u8 tx[IWL_MIC_KEY_SIZE];
@@ -541,10 +553,6 @@ struct iwl_wowlan_gtk_status_v1 {
        struct iwl_wowlan_rsc_tsc_params_cmd_ver_2 rsc;
 } __packed; /* WOWLAN_GTK_MATERIAL_VER_1 */
 
-#define WOWLAN_KEY_MAX_SIZE    32
-#define WOWLAN_GTK_KEYS_NUM     2
-#define WOWLAN_IGTK_KEYS_NUM   2
-
 /**
  * struct iwl_wowlan_gtk_status - GTK status
  * @key: GTK material
index 5a2d9a1..d8b5870 100644 (file)
@@ -33,12 +33,11 @@ struct iwl_fw_ini_hcmd {
  *
  * @version: TLV version
  * @domain: domain of the TLV. One of &enum iwl_fw_ini_dbg_domain
- * @data: TLV data
  */
 struct iwl_fw_ini_header {
        __le32 version;
        __le32 domain;
-       u8 data[];
+       /* followed by the data */
 } __packed; /* FW_TLV_DEBUG_HEADER_S_VER_1 */
 
 /**
@@ -130,6 +129,7 @@ struct iwl_fw_ini_region_internal_buffer {
  *     &IWL_FW_INI_REGION_PERIPHERY_PHY, &IWL_FW_INI_REGION_PERIPHERY_AUX,
  *     &IWL_FW_INI_REGION_PAGING, &IWL_FW_INI_REGION_CSR,
  *     &IWL_FW_INI_REGION_DRAM_IMR and &IWL_FW_INI_REGION_PCI_IOSF_CONFIG
+ *     &IWL_FW_INI_REGION_DBGI_SRAM, &FW_TLV_DEBUG_REGION_TYPE_DBGI_SRAM,
  * @fifos: fifos configuration. Used by &IWL_FW_INI_REGION_TXF and
  *     &IWL_FW_INI_REGION_RXF
  * @err_table: error table configuration. Used by
@@ -249,7 +249,6 @@ struct iwl_fw_ini_hcmd_tlv {
  * @IWL_FW_INI_ALLOCATION_ID_DBGC1: allocation meant for DBGC1 configuration
  * @IWL_FW_INI_ALLOCATION_ID_DBGC2: allocation meant for DBGC2 configuration
  * @IWL_FW_INI_ALLOCATION_ID_DBGC3: allocation meant for DBGC3 configuration
- * @IWL_FW_INI_ALLOCATION_ID_INTERNAL: allocation meant for Intreanl SMEM in D3
  * @IWL_FW_INI_ALLOCATION_NUM: number of allocation ids
 */
 enum iwl_fw_ini_allocation_id {
@@ -257,7 +256,6 @@ enum iwl_fw_ini_allocation_id {
        IWL_FW_INI_ALLOCATION_ID_DBGC1,
        IWL_FW_INI_ALLOCATION_ID_DBGC2,
        IWL_FW_INI_ALLOCATION_ID_DBGC3,
-       IWL_FW_INI_ALLOCATION_ID_INTERNAL,
        IWL_FW_INI_ALLOCATION_NUM,
 }; /* FW_DEBUG_TLV_ALLOCATION_ID_E_VER_1 */
 
@@ -298,6 +296,7 @@ enum iwl_fw_ini_buffer_location {
  * @IWL_FW_INI_REGION_DRAM_IMR: IMR memory
  * @IWL_FW_INI_REGION_PCI_IOSF_CONFIG: PCI/IOSF config
  * @IWL_FW_INI_REGION_SPECIAL_DEVICE_MEMORY: special device memory
+ * @IWL_FW_INI_REGION_DBGI_SRAM: periphery registers of DBGI SRAM
  * @IWL_FW_INI_REGION_NUM: number of region types
  */
 enum iwl_fw_ini_region_type {
@@ -319,6 +318,7 @@ enum iwl_fw_ini_region_type {
        IWL_FW_INI_REGION_DRAM_IMR,
        IWL_FW_INI_REGION_PCI_IOSF_CONFIG,
        IWL_FW_INI_REGION_SPECIAL_DEVICE_MEMORY,
+       IWL_FW_INI_REGION_DBGI_SRAM,
        IWL_FW_INI_REGION_NUM
 }; /* FW_TLV_DEBUG_REGION_TYPE_API_E */
 
index 0e38eb1..6bbb8b8 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #ifndef __iwl_fw_api_location_h__
 #define __iwl_fw_api_location_h__
@@ -151,6 +151,10 @@ enum iwl_tof_mcsi_enable {
  *     is valid
  * @IWL_TOF_RESPONDER_CMD_VALID_NDP_PARAMS: NDP parameters are valid
  * @IWL_TOF_RESPONDER_CMD_VALID_LMR_FEEDBACK: LMR feedback support is valid
+ * @IWL_TOF_RESPONDER_CMD_VALID_SESSION_ID: session id flag is valid
+ * @IWL_TOF_RESPONDER_CMD_VALID_BSS_COLOR: the bss_color field is valid
+ * @IWL_TOF_RESPONDER_CMD_VALID_MIN_MAX_TIME_BETWEEN_MSR: the
+ *     min_time_between_msr and max_time_between_msr fields are valid
  */
 enum iwl_tof_responder_cmd_valid_field {
        IWL_TOF_RESPONDER_CMD_VALID_CHAN_INFO = BIT(0),
@@ -169,6 +173,9 @@ enum iwl_tof_responder_cmd_valid_field {
        IWL_TOF_RESPONDER_CMD_VALID_NDP_SUPPORT = BIT(22),
        IWL_TOF_RESPONDER_CMD_VALID_NDP_PARAMS = BIT(23),
        IWL_TOF_RESPONDER_CMD_VALID_LMR_FEEDBACK = BIT(24),
+       IWL_TOF_RESPONDER_CMD_VALID_SESSION_ID = BIT(25),
+       IWL_TOF_RESPONDER_CMD_VALID_BSS_COLOR = BIT(26),
+       IWL_TOF_RESPONDER_CMD_VALID_MIN_MAX_TIME_BETWEEN_MSR = BIT(27),
 };
 
 /**
@@ -186,6 +193,8 @@ enum iwl_tof_responder_cmd_valid_field {
  * @IWL_TOF_RESPONDER_FLAGS_NDP_SUPPORT: support NDP ranging
  * @IWL_TOF_RESPONDER_FLAGS_LMR_FEEDBACK: request for LMR feedback if the
  *     initiator supports it
+ * @IWL_TOF_RESPONDER_FLAGS_SESSION_ID: send the session id in the initial FTM
+ *     frame.
  */
 enum iwl_tof_responder_cfg_flags {
        IWL_TOF_RESPONDER_FLAGS_NON_ASAP_SUPPORT = BIT(0),
@@ -200,6 +209,7 @@ enum iwl_tof_responder_cfg_flags {
        IWL_TOF_RESPONDER_FLAGS_FTM_TX_ANT = RATE_MCS_ANT_ABC_MSK,
        IWL_TOF_RESPONDER_FLAGS_NDP_SUPPORT = BIT(24),
        IWL_TOF_RESPONDER_FLAGS_LMR_FEEDBACK = BIT(25),
+       IWL_TOF_RESPONDER_FLAGS_SESSION_ID = BIT(27),
 };
 
 /**
@@ -297,13 +307,13 @@ struct iwl_tof_responder_config_cmd_v7 {
  * @r2i_ndp_params: parameters for R2I NDP.
  *     bits 0 - 2: max number of LTF repetitions
  *     bits 3 - 5: max number of spatial streams (supported values are < 2)
- *     bits 6 - 7: max number of total LTFs
- *                 (&enum ieee80211_range_params_max_total_ltf)
+ *     bits 6 - 7: max number of total LTFs see
+ *     &enum ieee80211_range_params_max_total_ltf
  * @i2r_ndp_params: parameters for I2R NDP.
  *     bits 0 - 2: max number of LTF repetitions
  *     bits 3 - 5: max number of spatial streams
- *     bits 6 - 7: max number of total LTFs
- *                 (&enum ieee80211_range_params_max_total_ltf)
+ *     bits 6 - 7: max number of total LTFs see
+ *     &enum ieee80211_range_params_max_total_ltf
  */
 struct iwl_tof_responder_config_cmd_v8 {
        __le32 cmd_valid_fields;
@@ -322,6 +332,58 @@ struct iwl_tof_responder_config_cmd_v8 {
        u8 i2r_ndp_params;
 } __packed; /* TOF_RESPONDER_CONFIG_CMD_API_S_VER_8 */
 
+/**
+ * struct iwl_tof_responder_config_cmd_v9 - ToF AP mode (for debug)
+ * @cmd_valid_fields: &iwl_tof_responder_cmd_valid_field
+ * @responder_cfg_flags: &iwl_tof_responder_cfg_flags
+ * @format_bw: bits 0 - 3: &enum iwl_location_frame_format.
+ *             bits 4 - 7: &enum iwl_location_bw.
+ * @bss_color: current AP bss_color
+ * @channel_num: current AP Channel
+ * @ctrl_ch_position: coding of the control channel position relative to
+ *     the center frequency, see iwl_mvm_get_ctrl_pos()
+ * @sta_id: index of the AP STA when in AP mode
+ * @reserved1: reserved
+ * @toa_offset: Artificial addition [pSec] for the ToA - to be used for debug
+ *     purposes, simulating station movement by adding various values
+ *     to this field
+ * @common_calib: XVT: common calibration value
+ * @specific_calib: XVT: specific calibration value
+ * @bssid: Current AP BSSID
+ * @r2i_ndp_params: parameters for R2I NDP.
+ *     bits 0 - 2: max number of LTF repetitions
+ *     bits 3 - 5: max number of spatial streams (supported values are < 2)
+ *     bits 6 - 7: max number of total LTFs see
+ *     &enum ieee80211_range_params_max_total_ltf
+ * @i2r_ndp_params: parameters for I2R NDP.
+ *     bits 0 - 2: max number of LTF repetitions
+ *     bits 3 - 5: max number of spatial streams
+ *     bits 6 - 7: max number of total LTFs see
+ *     &enum ieee80211_range_params_max_total_ltf
+ * @min_time_between_msr: for non trigger based NDP ranging, minimum time
+ *     between measurements in milliseconds.
+ * @max_time_between_msr: for non trigger based NDP ranging, maximum time
+ *     between measurements in milliseconds.
+ */
+struct iwl_tof_responder_config_cmd_v9 {
+       __le32 cmd_valid_fields;
+       __le32 responder_cfg_flags;
+       u8 format_bw;
+       u8 bss_color;
+       u8 channel_num;
+       u8 ctrl_ch_position;
+       u8 sta_id;
+       u8 reserved1;
+       __le16 toa_offset;
+       __le16 common_calib;
+       __le16 specific_calib;
+       u8 bssid[ETH_ALEN];
+       u8 r2i_ndp_params;
+       u8 i2r_ndp_params;
+       __le16 min_time_between_msr;
+       __le16 max_time_between_msr;
+} __packed; /* TOF_RESPONDER_CONFIG_CMD_API_S_VER_8 */
+
 #define IWL_LCI_CIVIC_IE_MAX_SIZE      400
 
 /**
@@ -489,6 +551,10 @@ struct iwl_tof_range_req_ap_entry_v2 {
  *      instead of fw internal values.
  * @IWL_INITIATOR_AP_FLAGS_PMF: request to protect the negotiation and LMR
  *      frames with protected management frames.
+ * @IWL_INITIATOR_AP_FLAGS_TERMINATE_ON_LMR_FEEDBACK: terminate the session if
+ *     the responder asked for LMR feedback although the initiator did not set
+ *     the LMR feedback bit in the FTM request. If not set, the initiator will
+ *     continue with the session and will provide the LMR feedback.
  */
 enum iwl_initiator_ap_flags {
        IWL_INITIATOR_AP_FLAGS_ASAP = BIT(1),
@@ -504,6 +570,7 @@ enum iwl_initiator_ap_flags {
        IWL_INITIATOR_AP_FLAGS_LMR_FEEDBACK = BIT(12),
        IWL_INITIATOR_AP_FLAGS_USE_CALIB = BIT(13),
        IWL_INITIATOR_AP_FLAGS_PMF = BIT(14),
+       IWL_INITIATOR_AP_FLAGS_TERMINATE_ON_LMR_FEEDBACK = BIT(15),
 };
 
 /**
@@ -794,6 +861,90 @@ struct iwl_tof_range_req_ap_entry_v8 {
        u8 i2r_max_total_ltf;
 } __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_8 */
 
+/**
+ * struct iwl_tof_range_req_ap_entry_v9 - AP configuration parameters
+ * @initiator_ap_flags: see &enum iwl_initiator_ap_flags.
+ * @channel_num: AP Channel number
+ * @format_bw: bits 0 - 3: &enum iwl_location_frame_format.
+ *             bits 4 - 7: &enum iwl_location_bw.
+ * @ctrl_ch_position: Coding of the control channel position relative to the
+ *     center frequency, see iwl_mvm_get_ctrl_pos().
+ * @ftmr_max_retries: Max number of retries to send the FTMR in case of no
+ *     reply from the AP.
+ * @bssid: AP's BSSID
+ * @burst_period: For EDCA based ranging: Recommended value to be sent to the
+ *     AP. Measurement periodicity In units of 100ms. ignored if
+ *     num_of_bursts_exp = 0.
+ *     For non trigger based NDP ranging, the maximum time between
+ *     measurements in units of milliseconds.
+ * @samples_per_burst: the number of FTMs pairs in single Burst (1-31);
+ * @num_of_bursts: Recommended value to be sent to the AP. 2s Exponent of
+ *     the number of measurement iterations (min 2^0 = 1, max 2^14)
+ * @sta_id: the station id of the AP. Only relevant when associated to the AP,
+ *     otherwise should be set to &IWL_MVM_INVALID_STA.
+ * @cipher: pairwise cipher suite for secured measurement.
+ *          &enum iwl_location_cipher.
+ * @hltk: HLTK to be used for secured 11az measurement
+ * @tk: TK to be used for secured 11az measurement
+ * @calib: An array of calibration values per FTM rx bandwidth.
+ *         If &IWL_INITIATOR_AP_FLAGS_USE_CALIB is set, the fw will use the
+ *         calibration value that corresponds to the rx bandwidth of the FTM
+ *         frame.
+ * @beacon_interval: beacon interval of the AP in TUs. Only required if
+ *     &IWL_INITIATOR_AP_FLAGS_TB is set.
+ * @bss_color: the BSS color of the responder. Only valid if
+ *     &IWL_INITIATOR_AP_FLAGS_TB or &IWL_INITIATOR_AP_FLAGS_NON_TB is set.
+ * @rx_pn: the next expected PN for protected management frames Rx. LE byte
+ *     order. Only valid if &IWL_INITIATOR_AP_FLAGS_SECURED is set and sta_id
+ *     is set to &IWL_MVM_INVALID_STA.
+ * @tx_pn: the next PN to use for protected management frames Tx. LE byte
+ *     order. Only valid if &IWL_INITIATOR_AP_FLAGS_SECURED is set and sta_id
+ *     is set to &IWL_MVM_INVALID_STA.
+ * @r2i_ndp_params: parameters for R2I NDP ranging negotiation.
+ *      bits 0 - 2: max LTF repetitions
+ *      bits 3 - 5: max number of spatial streams
+ *      bits 6 - 7: reserved
+ * @i2r_ndp_params: parameters for I2R NDP ranging negotiation.
+ *      bits 0 - 2: max LTF repetitions
+ *      bits 3 - 5: max number of spatial streams (supported values are < 2)
+ *      bits 6 - 7: reserved
+ * @r2i_max_total_ltf: R2I Max Total LTFs for NDP ranging negotiation.
+ *      One of &enum ieee80211_range_params_max_total_ltf.
+ * @i2r_max_total_ltf: I2R Max Total LTFs for NDP ranging negotiation.
+ *      One of &enum ieee80211_range_params_max_total_ltf.
+ * @bss_color: the BSS color of the responder. Only valid if
+ *     &IWL_INITIATOR_AP_FLAGS_NON_TB or &IWL_INITIATOR_AP_FLAGS_TB is set.
+ * @band: 0 for 5.2 GHz, 1 for 2.4 GHz, 2 for 6GHz
+ * @min_time_between_msr: For non trigger based NDP ranging, the minimum time
+ *     between measurements in units of milliseconds
+ */
+struct iwl_tof_range_req_ap_entry_v9 {
+       __le32 initiator_ap_flags;
+       u8 channel_num;
+       u8 format_bw;
+       u8 ctrl_ch_position;
+       u8 ftmr_max_retries;
+       u8 bssid[ETH_ALEN];
+       __le16 burst_period;
+       u8 samples_per_burst;
+       u8 num_of_bursts;
+       u8 sta_id;
+       u8 cipher;
+       u8 hltk[HLTK_11AZ_LEN];
+       u8 tk[TK_11AZ_LEN];
+       __le16 calib[IWL_TOF_BW_NUM];
+       u16 beacon_interval;
+       u8 rx_pn[IEEE80211_CCMP_PN_LEN];
+       u8 tx_pn[IEEE80211_CCMP_PN_LEN];
+       u8 r2i_ndp_params;
+       u8 i2r_ndp_params;
+       u8 r2i_max_total_ltf;
+       u8 i2r_max_total_ltf;
+       u8 bss_color;
+       u8 band;
+       __le16 min_time_between_msr;
+} __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_9 */
+
 /**
  * enum iwl_tof_response_mode
  * @IWL_MVM_TOF_RESPONSE_ASAP: report each AP measurement separately as soon as
@@ -1043,6 +1194,34 @@ struct iwl_tof_range_req_cmd_v12 {
        struct iwl_tof_range_req_ap_entry_v8 ap[IWL_MVM_TOF_MAX_APS];
 } __packed; /* LOCATION_RANGE_REQ_CMD_API_S_VER_12 */
 
+/**
+ * struct iwl_tof_range_req_cmd_v13 - start measurement cmd
+ * @initiator_flags: see flags @ iwl_tof_initiator_flags
+ * @request_id: A Token incremented per request. The same Token will be
+ *             sent back in the range response
+ * @num_of_ap: Number of APs to measure (error if > IWL_MVM_TOF_MAX_APS)
+ * @range_req_bssid: ranging request BSSID
+ * @macaddr_mask: Bits set to 0 shall be copied from the MAC address template.
+ *               Bits set to 1 shall be randomized by the UMAC
+ * @macaddr_template: MAC address template to use for non-randomized bits
+ * @req_timeout_ms: Requested timeout of the response in units of milliseconds.
+ *     This is the session time for completing the measurement.
+ * @tsf_mac_id: report the measurement start time for each ap in terms of the
+ *     TSF of this mac id. 0xff to disable TSF reporting.
+ * @ap: per-AP request data, see &struct iwl_tof_range_req_ap_entry_v9.
+ */
+struct iwl_tof_range_req_cmd_v13 {
+       __le32 initiator_flags;
+       u8 request_id;
+       u8 num_of_ap;
+       u8 range_req_bssid[ETH_ALEN];
+       u8 macaddr_mask[ETH_ALEN];
+       u8 macaddr_template[ETH_ALEN];
+       __le32 req_timeout_ms;
+       __le32 tsf_mac_id;
+       struct iwl_tof_range_req_ap_entry_v9 ap[IWL_MVM_TOF_MAX_APS];
+} __packed; /* LOCATION_RANGE_REQ_CMD_API_S_VER_13 */
+
 /*
  * enum iwl_tof_range_request_status - status of the sent request
  * @IWL_TOF_RANGE_REQUEST_STATUS_SUCCESSFUL - FW successfully received the
index 93084bb..7be7715 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2017 Intel Deutschland GmbH
  */
 #ifndef __iwl_fw_api_mac_h__
@@ -137,12 +137,14 @@ struct iwl_mac_data_ibss {
  *     early termination detection.
  * @FLEXIBLE_TWT_SUPPORTED: AP supports flexible TWT schedule
  * @PROTECTED_TWT_SUPPORTED: AP supports protected TWT frames (with 11w)
+ * @BROADCAST_TWT_SUPPORTED: AP and STA support broadcast TWT
  */
 enum iwl_mac_data_policy {
        TWT_SUPPORTED = BIT(0),
        MORE_DATA_ACK_SUPPORTED = BIT(1),
        FLEXIBLE_TWT_SUPPORTED = BIT(2),
        PROTECTED_TWT_SUPPORTED = BIT(3),
+       BROADCAST_TWT_SUPPORTED = BIT(4),
 };
 
 /**
index f06214d..5204aa9 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright (C) 2012-2014 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
+ * Copyright (C) 2021 Intel Corporation
  */
 #ifndef __iwl_fw_api_offload_h__
 #define __iwl_fw_api_offload_h__
@@ -20,7 +21,7 @@ enum iwl_prot_offload_subcmd_ids {
 #define MAX_STORED_BEACON_SIZE 600
 
 /**
- * struct iwl_stored_beacon_notif - Stored beacon notification
+ * struct iwl_stored_beacon_notif_common - Stored beacon notif common fields
  *
  * @system_time: system time on air rise
  * @tsf: TSF on air rise
@@ -29,9 +30,8 @@ enum iwl_prot_offload_subcmd_ids {
  * @channel: channel this beacon was received on
  * @rates: rate in ucode internal format
  * @byte_count: frame's byte count
- * @data: beacon data, length in @byte_count
  */
-struct iwl_stored_beacon_notif {
+struct iwl_stored_beacon_notif_common {
        __le32 system_time;
        __le64 tsf;
        __le32 beacon_timestamp;
@@ -39,7 +39,32 @@ struct iwl_stored_beacon_notif {
        __le16 channel;
        __le32 rates;
        __le32 byte_count;
+} __packed;
+
+/**
+ * struct iwl_stored_beacon_notif - Stored beacon notification
+ *
+ * @common: fields common for all versions
+ * @data: beacon data, length in @byte_count
+ */
+struct iwl_stored_beacon_notif_v2 {
+       struct iwl_stored_beacon_notif_common common;
        u8 data[MAX_STORED_BEACON_SIZE];
 } __packed; /* WOWLAN_STROED_BEACON_INFO_S_VER_2 */
 
+/**
+ * struct iwl_stored_beacon_notif_v3 - Stored beacon notification
+ *
+ * @common: fields common for all versions
+ * @sta_id: station for which the beacon was received
+ * @reserved: reserved for alignment
+ * @data: beacon data, length in @byte_count
+ */
+struct iwl_stored_beacon_notif_v3 {
+       struct iwl_stored_beacon_notif_common common;
+       u8 sta_id;
+       u8 reserved[3];
+       u8 data[MAX_STORED_BEACON_SIZE];
+} __packed; /* WOWLAN_STROED_BEACON_INFO_S_VER_3 */
+
 #endif /* __iwl_fw_api_offload_h__ */
index b2605ae..8b20037 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -874,7 +874,7 @@ struct iwl_scan_probe_params_v3 {
        u8 reserved;
        struct iwl_ssid_ie direct_scan[PROBE_OPTION_MAX];
        __le32 short_ssid[SCAN_SHORT_SSID_MAX_SIZE];
-       u8 bssid_array[ETH_ALEN][SCAN_BSSID_MAX_SIZE];
+       u8 bssid_array[SCAN_BSSID_MAX_SIZE][ETH_ALEN];
 } __packed; /* SCAN_PROBE_PARAMS_API_S_VER_3 */
 
 /**
@@ -894,7 +894,7 @@ struct iwl_scan_probe_params_v4 {
        __le16 reserved;
        struct iwl_ssid_ie direct_scan[PROBE_OPTION_MAX];
        __le32 short_ssid[SCAN_SHORT_SSID_MAX_SIZE];
-       u8 bssid_array[ETH_ALEN][SCAN_BSSID_MAX_SIZE];
+       u8 bssid_array[SCAN_BSSID_MAX_SIZE][ETH_ALEN];
 } __packed; /* SCAN_PROBE_PARAMS_API_S_VER_4 */
 
 #define SCAN_MAX_NUM_CHANS_V3 67
index 12b2f2c..f1a3e14 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -384,13 +384,17 @@ struct iwl_mvm_add_sta_key_cmd_v1 {
  * @rx_mic_key: TKIP RX unicast or multicast key
  * @tx_mic_key: TKIP TX key
  * @transmit_seq_cnt: TSC, transmit packet number
+ *
+ * Note: This is used for both v2 and v3, the difference being
+ * in the way the common.rx_secur_seq_cnt is used, in v2 that's
+ * the strange hole format, in v3 it's just a u64.
  */
 struct iwl_mvm_add_sta_key_cmd {
        struct iwl_mvm_add_sta_key_common common;
        __le64 rx_mic_key;
        __le64 tx_mic_key;
        __le64 transmit_seq_cnt;
-} __packed; /* ADD_MODIFY_STA_KEY_API_S_VER_2 */
+} __packed; /* ADD_MODIFY_STA_KEY_API_S_VER_2, ADD_MODIFY_STA_KEY_API_S_VER_3 */
 
 /**
  * enum iwl_mvm_add_sta_rsp_status - status in the response to ADD_STA command
index df7c55e..6dcafd0 100644 (file)
@@ -1517,6 +1517,37 @@ iwl_dump_ini_special_mem_iter(struct iwl_fw_runtime *fwrt,
        return sizeof(*range) + le32_to_cpu(range->range_data_size);
 }
 
+static int
+iwl_dump_ini_dbgi_sram_iter(struct iwl_fw_runtime *fwrt,
+                           struct iwl_dump_ini_region_data *reg_data,
+                           void *range_ptr, int idx)
+{
+       struct iwl_fw_ini_region_tlv *reg = (void *)reg_data->reg_tlv->data;
+       struct iwl_fw_ini_error_dump_range *range = range_ptr;
+       __le32 *val = range->data;
+       u32 prph_data;
+       int i;
+
+       if (!iwl_trans_grab_nic_access(fwrt->trans))
+               return -EBUSY;
+
+       range->range_data_size = reg->dev_addr.size;
+       iwl_write_prph_no_grab(fwrt->trans, DBGI_SRAM_TARGET_ACCESS_CFG,
+                              DBGI_SRAM_TARGET_ACCESS_CFG_RESET_ADDRESS_MSK);
+       for (i = 0; i < (le32_to_cpu(reg->dev_addr.size) / 4); i++) {
+               prph_data = iwl_read_prph(fwrt->trans, (i % 2) ?
+                                         DBGI_SRAM_TARGET_ACCESS_RDATA_MSB :
+                                         DBGI_SRAM_TARGET_ACCESS_RDATA_LSB);
+               if (prph_data == 0x5a5a5a5a) {
+                       iwl_trans_release_nic_access(fwrt->trans);
+                       return -EBUSY;
+               }
+               *val++ = cpu_to_le32(prph_data);
+       }
+       iwl_trans_release_nic_access(fwrt->trans);
+       return sizeof(*range) + le32_to_cpu(range->range_data_size);
+}
+
 static int iwl_dump_ini_fw_pkt_iter(struct iwl_fw_runtime *fwrt,
                                    struct iwl_dump_ini_region_data *reg_data,
                                    void *range_ptr, int idx)
@@ -1547,7 +1578,7 @@ iwl_dump_ini_mem_fill_header(struct iwl_fw_runtime *fwrt,
 
        dump->header.version = cpu_to_le32(IWL_INI_DUMP_VER);
 
-       return dump->ranges;
+       return dump->data;
 }
 
 /**
@@ -1611,7 +1642,7 @@ iwl_dump_ini_mon_fill_header(struct iwl_fw_runtime *fwrt,
 
        data->header.version = cpu_to_le32(IWL_INI_DUMP_VER);
 
-       return data->ranges;
+       return data->data;
 }
 
 static void *
@@ -1647,7 +1678,7 @@ iwl_dump_ini_err_table_fill_header(struct iwl_fw_runtime *fwrt,
        dump->header.version = cpu_to_le32(IWL_INI_DUMP_VER);
        dump->version = reg->err_table.version;
 
-       return dump->ranges;
+       return dump->data;
 }
 
 static void *
@@ -1662,7 +1693,7 @@ iwl_dump_ini_special_mem_fill_header(struct iwl_fw_runtime *fwrt,
        dump->type = reg->special_mem.type;
        dump->version = reg->special_mem.version;
 
-       return dump->ranges;
+       return dump->data;
 }
 
 static u32 iwl_dump_ini_mem_ranges(struct iwl_fw_runtime *fwrt,
@@ -2189,6 +2220,12 @@ static const struct iwl_dump_ini_mem_ops iwl_dump_ini_region_ops[] = {
                .fill_mem_hdr = iwl_dump_ini_special_mem_fill_header,
                .fill_range = iwl_dump_ini_special_mem_iter,
        },
+       [IWL_FW_INI_REGION_DBGI_SRAM] = {
+               .get_num_of_ranges = iwl_dump_ini_mem_ranges,
+               .get_size = iwl_dump_ini_mem_get_size,
+               .fill_mem_hdr = iwl_dump_ini_mem_fill_header,
+               .fill_range = iwl_dump_ini_dbgi_sram_iter,
+       },
 };
 
 static u32 iwl_dump_ini_trigger(struct iwl_fw_runtime *fwrt,
@@ -2321,7 +2358,7 @@ static void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt,
                return;
 
        if (dump_data->monitor_only)
-               dump_mask &= IWL_FW_ERROR_DUMP_FW_MONITOR;
+               dump_mask &= BIT(IWL_FW_ERROR_DUMP_FW_MONITOR);
 
        fw_error_dump.trans_ptr = iwl_trans_dump_data(fwrt->trans, dump_mask);
        file_len = le32_to_cpu(dump_file->file_len);
@@ -2530,51 +2567,6 @@ int iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
 }
 IWL_EXPORT_SYMBOL(iwl_fw_dbg_collect);
 
-int iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt,
-                          struct iwl_fwrt_dump_data *dump_data)
-{
-       struct iwl_fw_ini_trigger_tlv *trig = dump_data->trig;
-       enum iwl_fw_ini_time_point tp_id = le32_to_cpu(trig->time_point);
-       u32 occur, delay;
-       unsigned long idx;
-
-       if (!iwl_fw_ini_trigger_on(fwrt, trig)) {
-               IWL_WARN(fwrt, "WRT: Trigger %d is not active, aborting dump\n",
-                        tp_id);
-               return -EINVAL;
-       }
-
-       delay = le32_to_cpu(trig->dump_delay);
-       occur = le32_to_cpu(trig->occurrences);
-       if (!occur)
-               return 0;
-
-       trig->occurrences = cpu_to_le32(--occur);
-
-       /* Check there is an available worker.
-        * ffz return value is undefined if no zero exists,
-        * so check against ~0UL first.
-        */
-       if (fwrt->dump.active_wks == ~0UL)
-               return -EBUSY;
-
-       idx = ffz(fwrt->dump.active_wks);
-
-       if (idx >= IWL_FW_RUNTIME_DUMP_WK_NUM ||
-           test_and_set_bit(fwrt->dump.wks[idx].idx, &fwrt->dump.active_wks))
-               return -EBUSY;
-
-       fwrt->dump.wks[idx].dump_data = *dump_data;
-
-       IWL_WARN(fwrt,
-                "WRT: Collecting data: ini trigger %d fired (delay=%dms).\n",
-                tp_id, (u32)(delay / USEC_PER_MSEC));
-
-       schedule_delayed_work(&fwrt->dump.wks[idx].wk, usecs_to_jiffies(delay));
-
-       return 0;
-}
-
 int iwl_fw_dbg_collect_trig(struct iwl_fw_runtime *fwrt,
                            struct iwl_fw_dbg_trigger_tlv *trigger,
                            const char *fmt, ...)
@@ -2703,6 +2695,58 @@ out:
        clear_bit(wk_idx, &fwrt->dump.active_wks);
 }
 
+int iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt,
+                          struct iwl_fwrt_dump_data *dump_data,
+                          bool sync)
+{
+       struct iwl_fw_ini_trigger_tlv *trig = dump_data->trig;
+       enum iwl_fw_ini_time_point tp_id = le32_to_cpu(trig->time_point);
+       u32 occur, delay;
+       unsigned long idx;
+
+       if (!iwl_fw_ini_trigger_on(fwrt, trig)) {
+               IWL_WARN(fwrt, "WRT: Trigger %d is not active, aborting dump\n",
+                        tp_id);
+               return -EINVAL;
+       }
+
+       delay = le32_to_cpu(trig->dump_delay);
+       occur = le32_to_cpu(trig->occurrences);
+       if (!occur)
+               return 0;
+
+       trig->occurrences = cpu_to_le32(--occur);
+
+       /* Check there is an available worker.
+        * ffz return value is undefined if no zero exists,
+        * so check against ~0UL first.
+        */
+       if (fwrt->dump.active_wks == ~0UL)
+               return -EBUSY;
+
+       idx = ffz(fwrt->dump.active_wks);
+
+       if (idx >= IWL_FW_RUNTIME_DUMP_WK_NUM ||
+           test_and_set_bit(fwrt->dump.wks[idx].idx, &fwrt->dump.active_wks))
+               return -EBUSY;
+
+       fwrt->dump.wks[idx].dump_data = *dump_data;
+
+       if (sync)
+               delay = 0;
+
+       IWL_WARN(fwrt,
+                "WRT: Collecting data: ini trigger %d fired (delay=%dms).\n",
+                tp_id, (u32)(delay / USEC_PER_MSEC));
+
+       schedule_delayed_work(&fwrt->dump.wks[idx].wk, usecs_to_jiffies(delay));
+
+       if (sync)
+               iwl_fw_dbg_collect_sync(fwrt, idx);
+
+       return 0;
+}
+
 void iwl_fw_error_dump_wk(struct work_struct *work)
 {
        struct iwl_fwrt_wk_data *wks =
index c0e84ef..8c3c890 100644 (file)
@@ -46,7 +46,8 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt,
 int iwl_fw_dbg_error_collect(struct iwl_fw_runtime *fwrt,
                             enum iwl_fw_dbg_trigger trig_type);
 int iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt,
-                          struct iwl_fwrt_dump_data *dump_data);
+                          struct iwl_fwrt_dump_data *dump_data,
+                          bool sync);
 int iwl_fw_dbg_collect(struct iwl_fw_runtime *fwrt,
                       enum iwl_fw_dbg_trigger trig, const char *str,
                       size_t len, struct iwl_fw_dbg_trigger_tlv *trigger);
@@ -284,7 +285,7 @@ static inline void iwl_fw_umac_set_alive_err_table(struct iwl_trans *trans,
                trans->dbg.umac_error_event_table = umac_error_event_table;
 }
 
-static inline void iwl_fw_error_collect(struct iwl_fw_runtime *fwrt)
+static inline void iwl_fw_error_collect(struct iwl_fw_runtime *fwrt, bool sync)
 {
        enum iwl_fw_ini_time_point tp_id;
 
@@ -300,7 +301,7 @@ static inline void iwl_fw_error_collect(struct iwl_fw_runtime *fwrt)
                tp_id = IWL_FW_INI_TIME_POINT_FW_ASSERT;
        }
 
-       iwl_dbg_tlv_time_point(fwrt, tp_id, NULL);
+       _iwl_dbg_tlv_time_point(fwrt, tp_id, NULL, sync);
 }
 
 void iwl_fw_error_print_fseq_regs(struct iwl_fw_runtime *fwrt);
index 9fffac9..521ca2b 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2014, 2018-2021 Intel Corporation
  * Copyright (C) 2014-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -305,11 +305,12 @@ struct iwl_fw_ini_error_dump_header {
 /**
  * struct iwl_fw_ini_error_dump - ini region dump
  * @header: the header of this region
- * @ranges: the memory ranges of this region
+ * @data: data of memory ranges in this region,
+ *     see &struct iwl_fw_ini_error_dump_range
  */
 struct iwl_fw_ini_error_dump {
        struct iwl_fw_ini_error_dump_header header;
-       struct iwl_fw_ini_error_dump_range ranges[];
+       u8 data[];
 } __packed;
 
 /* This bit is used to differentiate between lmac and umac rxf */
@@ -399,12 +400,13 @@ struct iwl_fw_ini_dump_info {
  * struct iwl_fw_ini_err_table_dump - ini error table dump
  * @header: header of the region
  * @version: error table version
- * @ranges: the memory ranges of this this region
+ * @data: data of memory ranges in this region,
+ *     see &struct iwl_fw_ini_error_dump_range
  */
 struct iwl_fw_ini_err_table_dump {
        struct iwl_fw_ini_error_dump_header header;
        __le32 version;
-       struct iwl_fw_ini_error_dump_range ranges[];
+       u8 data[];
 } __packed;
 
 /**
@@ -427,14 +429,15 @@ struct iwl_fw_error_dump_rb {
  * @write_ptr: write pointer position in the buffer
  * @cycle_cnt: cycles count
  * @cur_frag: current fragment in use
- * @ranges: the memory ranges of this this region
+ * @data: data of memory ranges in this region,
+ *     see &struct iwl_fw_ini_error_dump_range
  */
 struct iwl_fw_ini_monitor_dump {
        struct iwl_fw_ini_error_dump_header header;
        __le32 write_ptr;
        __le32 cycle_cnt;
        __le32 cur_frag;
-       struct iwl_fw_ini_error_dump_range ranges[];
+       u8 data[];
 } __packed;
 
 /**
@@ -442,13 +445,14 @@ struct iwl_fw_ini_monitor_dump {
  * @header: header of the region
  * @type: type of special memory
  * @version: struct special memory version
- * @ranges: the memory ranges of this this region
+ * @data: data of memory ranges in this region,
+ *     see &struct iwl_fw_ini_error_dump_range
  */
 struct iwl_fw_ini_special_device_memory {
        struct iwl_fw_ini_error_dump_header header;
        __le16 type;
        __le16 version;
-       struct iwl_fw_ini_error_dump_range ranges[];
+       u8 data[];
 } __packed;
 
 /**
index 9a8c7b7..6c8e9f3 100644 (file)
@@ -414,6 +414,7 @@ enum iwl_ucode_tlv_capa {
        IWL_UCODE_TLV_CAPA_PROTECTED_TWT                = (__force iwl_ucode_tlv_capa_t)56,
        IWL_UCODE_TLV_CAPA_FW_RESET_HANDSHAKE           = (__force iwl_ucode_tlv_capa_t)57,
        IWL_UCODE_TLV_CAPA_PASSIVE_6GHZ_SCAN            = (__force iwl_ucode_tlv_capa_t)58,
+       IWL_UCODE_TLV_CAPA_HIDDEN_6GHZ_SCAN             = (__force iwl_ucode_tlv_capa_t)59,
        IWL_UCODE_TLV_CAPA_BROADCAST_TWT                = (__force iwl_ucode_tlv_capa_t)60,
 
        /* set 2 */
index b4b1f75..314ed90 100644 (file)
@@ -24,7 +24,7 @@ static bool iwl_pnvm_complete_fn(struct iwl_notif_wait_data *notif_wait,
        struct iwl_pnvm_init_complete_ntfy *pnvm_ntf = (void *)pkt->data;
 
        IWL_DEBUG_FW(trans,
-                    "PNVM complete notification received with status %d\n",
+                    "PNVM complete notification received with status 0x%0x\n",
                     le32_to_cpu(pnvm_ntf->status));
 
        return true;
@@ -230,19 +230,10 @@ static int iwl_pnvm_parse(struct iwl_trans *trans, const u8 *data,
 static int iwl_pnvm_get_from_fs(struct iwl_trans *trans, u8 **data, size_t *len)
 {
        const struct firmware *pnvm;
-       char pnvm_name[64];
+       char pnvm_name[MAX_PNVM_NAME];
        int ret;
 
-       /*
-        * The prefix unfortunately includes a hyphen at the end, so
-        * don't add the dot here...
-        */
-       snprintf(pnvm_name, sizeof(pnvm_name), "%spnvm",
-                trans->cfg->fw_name_pre);
-
-       /* ...but replace the hyphen with the dot here. */
-       if (strlen(trans->cfg->fw_name_pre) < sizeof(pnvm_name))
-               pnvm_name[strlen(trans->cfg->fw_name_pre) - 1] = '.';
+       iwl_pnvm_get_fs_name(trans, pnvm_name, sizeof(pnvm_name));
 
        ret = firmware_request_nowarn(&pnvm, pnvm_name, trans->dev);
        if (ret) {
index 61d3d4e..203c367 100644 (file)
 
 #define MVM_UCODE_PNVM_TIMEOUT (HZ / 4)
 
+#define MAX_PNVM_NAME  64
+
 int iwl_pnvm_load(struct iwl_trans *trans,
                  struct iwl_notif_wait_data *notif_wait);
 
+static inline
+void iwl_pnvm_get_fs_name(struct iwl_trans *trans,
+                         u8 *pnvm_name, size_t max_len)
+{
+       int pre_len;
+
+       /*
+        * The prefix unfortunately includes a hyphen at the end, so
+        * don't add the dot here...
+        */
+       snprintf(pnvm_name, max_len, "%spnvm", trans->cfg->fw_name_pre);
+
+       /* ...but replace the hyphen with the dot here. */
+       pre_len = strlen(trans->cfg->fw_name_pre);
+       if (pre_len < max_len && pre_len > 0)
+               pnvm_name[pre_len - 1] = '.';
+}
+
 #endif /* __IWL_PNVM_H__ */
index bf6ee56..7eb534d 100644 (file)
@@ -33,6 +33,7 @@ enum iwl_device_family {
        IWL_DEVICE_FAMILY_9000,
        IWL_DEVICE_FAMILY_22000,
        IWL_DEVICE_FAMILY_AX210,
+       IWL_DEVICE_FAMILY_BZ,
 };
 
 /*
@@ -321,7 +322,7 @@ struct iwl_fw_mon_regs {
  * @host_interrupt_operation_mode: device needs host interrupt operation
  *     mode set
  * @nvm_hw_section_num: the ID of the HW NVM section
- * @mac_addr_from_csr: read HW address from CSR registers
+ * @mac_addr_from_csr: read HW address from CSR registers at this offset
  * @features: hw features, any combination of feature_passlist
  * @pwr_tx_backoffs: translation table between power limits and backoffs
  * @max_tx_agg_size: max TX aggregation size of the ADDBA request/response
@@ -343,6 +344,8 @@ struct iwl_fw_mon_regs {
  *     supports 256 BA aggregation
  * @num_rbds: number of receive buffer descriptors to use
  *     (only used for multi-queue capable devices)
+ * @mac_addr_csr_base: CSR base register for MAC address access, if not set
+ *     assume 0x380
  *
  * We enable the driver to be backward compatible wrt. hardware features.
  * API differences in uCode shouldn't be handled here but through TLVs
@@ -378,7 +381,7 @@ struct iwl_cfg {
            internal_wimax_coex:1,
            host_interrupt_operation_mode:1,
            high_temp:1,
-           mac_addr_from_csr:1,
+           mac_addr_from_csr:10,
            lp_xtal_workaround:1,
            disable_dummy_notification:1,
            apmg_not_supported:1,
@@ -512,6 +515,7 @@ extern const char iwl_ax211_name[];
 extern const char iwl_ax221_name[];
 extern const char iwl_ax231_name[];
 extern const char iwl_ax411_name[];
+extern const char iwl_bz_name[];
 #if IS_ENABLED(CONFIG_IWLDVM)
 extern const struct iwl_cfg iwl5300_agn_cfg;
 extern const struct iwl_cfg iwl5100_agn_cfg;
index 47e5a17..cf79640 100644 (file)
 /* GIO Chicken Bits (PCI Express bus link power management) */
 #define CSR_GIO_CHICKEN_BITS    (CSR_BASE+0x100)
 
+/* Doorbell NMI (since Bz) */
+#define CSR_DOORBELL_VECTOR    (CSR_BASE + 0x130)
+#define CSR_DOORBELL_VECTOR_NMI        BIT(1)
+
 /* host chicken bits */
 #define CSR_HOST_CHICKEN       (CSR_BASE + 0x204)
 #define CSR_HOST_CHICKEN_PM_IDLE_SRC_DIS_SB_PME        BIT(19)
 #define CSR_GP_CNTRL_REG_FLAG_RFKILL_WAKE_L1A_EN     (0x04000000)
 #define CSR_GP_CNTRL_REG_FLAG_HW_RF_KILL_SW          (0x08000000)
 
+/* From Bz we use these instead during init/reset flow */
+#define CSR_GP_CNTRL_REG_FLAG_MAC_INIT                 BIT(6)
+#define CSR_GP_CNTRL_REG_FLAG_ROM_START                        BIT(7)
+#define CSR_GP_CNTRL_REG_FLAG_MAC_STATUS               BIT(20)
+#define CSR_GP_CNTRL_REG_FLAG_BZ_MAC_ACCESS_REQ                BIT(21)
+#define CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS        BIT(28)
+#define CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_REQ   BIT(29)
+#define CSR_GP_CNTRL_REG_FLAG_SW_RESET                 BIT(31)
 
 /* HW REV */
 #define CSR_HW_REV_DASH(_val)          (((_val) & 0x0000003) >> 0)
@@ -604,10 +616,10 @@ enum msix_hw_int_causes {
  *                     HW address related registers                          *
  *****************************************************************************/
 
-#define CSR_ADDR_BASE                  (0x380)
-#define CSR_MAC_ADDR0_OTP              (CSR_ADDR_BASE)
-#define CSR_MAC_ADDR1_OTP              (CSR_ADDR_BASE + 4)
-#define CSR_MAC_ADDR0_STRAP            (CSR_ADDR_BASE + 8)
-#define CSR_MAC_ADDR1_STRAP            (CSR_ADDR_BASE + 0xC)
+#define CSR_ADDR_BASE(trans)                   ((trans)->cfg->mac_addr_from_csr)
+#define CSR_MAC_ADDR0_OTP(trans)               (CSR_ADDR_BASE(trans) + 0x00)
+#define CSR_MAC_ADDR1_OTP(trans)               (CSR_ADDR_BASE(trans) + 0x04)
+#define CSR_MAC_ADDR0_STRAP(trans)             (CSR_ADDR_BASE(trans) + 0x08)
+#define CSR_MAC_ADDR1_STRAP(trans)             (CSR_ADDR_BASE(trans) + 0x0c)
 
 #endif /* !__iwl_csr_h__ */
index 0ddd255..125479b 100644 (file)
@@ -131,8 +131,7 @@ static int iwl_dbg_tlv_alloc_buf_alloc(struct iwl_trans *trans,
                goto err;
 
        if (buf_location == IWL_FW_INI_LOCATION_SRAM_PATH &&
-           alloc_id != IWL_FW_INI_ALLOCATION_ID_DBGC1 &&
-           alloc_id != IWL_FW_INI_ALLOCATION_ID_INTERNAL)
+           alloc_id != IWL_FW_INI_ALLOCATION_ID_DBGC1)
                goto err;
 
        trans->dbg.fw_mon_cfg[alloc_id] = *alloc;
@@ -435,13 +434,16 @@ static int iwl_dbg_tlv_parse_bin(struct iwl_trans *trans, const u8 *data,
 void iwl_dbg_tlv_load_bin(struct device *dev, struct iwl_trans *trans)
 {
        const struct firmware *fw;
+       const char *yoyo_bin = "iwl-debug-yoyo.bin";
        int res;
 
        if (!iwlwifi_mod_params.enable_ini ||
            trans->trans_cfg->device_family <= IWL_DEVICE_FAMILY_9000)
                return;
 
-       res = firmware_request_nowarn(&fw, "iwl-debug-yoyo.bin", dev);
+       res = firmware_request_nowarn(&fw, yoyo_bin, dev);
+       IWL_DEBUG_FW(trans, "%s %s\n", res ? "didn't load" : "loaded", yoyo_bin);
+
        if (res)
                return;
 
@@ -621,6 +623,7 @@ static int iwl_dbg_tlv_apply_buffer(struct iwl_fw_runtime *fwrt,
                        .id = WIDE_ID(DEBUG_GROUP, BUFFER_ALLOCATION),
                        .data[0] = &data,
                        .len[0] = sizeof(data),
+                       .flags = CMD_SEND_IN_RFKILL,
                };
                int ret, j;
 
@@ -683,7 +686,7 @@ static void iwl_dbg_tlv_periodic_trig_handler(struct timer_list *t)
        };
        int ret;
 
-       ret = iwl_fw_dbg_ini_collect(timer_node->fwrt, &dump_data);
+       ret = iwl_fw_dbg_ini_collect(timer_node->fwrt, &dump_data, false);
        if (!ret || ret == -EBUSY) {
                u32 occur = le32_to_cpu(dump_data.trig->occurrences);
                u32 collect_interval = le32_to_cpu(dump_data.trig->data[0]);
@@ -927,7 +930,7 @@ static bool iwl_dbg_tlv_check_fw_pkt(struct iwl_fw_runtime *fwrt,
 }
 
 static int
-iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt,
+iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt, bool sync,
                       struct list_head *active_trig_list,
                       union iwl_dbg_tlv_tp_data *tp_data,
                       bool (*data_check)(struct iwl_fw_runtime *fwrt,
@@ -946,7 +949,7 @@ iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt,
                int ret, i;
 
                if (!num_data) {
-                       ret = iwl_fw_dbg_ini_collect(fwrt, &dump_data);
+                       ret = iwl_fw_dbg_ini_collect(fwrt, &dump_data, sync);
                        if (ret)
                                return ret;
                }
@@ -955,7 +958,7 @@ iwl_dbg_tlv_tp_trigger(struct iwl_fw_runtime *fwrt,
                        if (!data_check ||
                            data_check(fwrt, &dump_data, tp_data,
                                       le32_to_cpu(dump_data.trig->data[i]))) {
-                               ret = iwl_fw_dbg_ini_collect(fwrt, &dump_data);
+                               ret = iwl_fw_dbg_ini_collect(fwrt, &dump_data, sync);
                                if (ret)
                                        return ret;
 
@@ -1043,9 +1046,10 @@ static void iwl_dbg_tlv_init_cfg(struct iwl_fw_runtime *fwrt)
        }
 }
 
-void iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
-                           enum iwl_fw_ini_time_point tp_id,
-                           union iwl_dbg_tlv_tp_data *tp_data)
+void _iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
+                            enum iwl_fw_ini_time_point tp_id,
+                            union iwl_dbg_tlv_tp_data *tp_data,
+                            bool sync)
 {
        struct list_head *hcmd_list, *trig_list;
 
@@ -1060,12 +1064,12 @@ void iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
        switch (tp_id) {
        case IWL_FW_INI_TIME_POINT_EARLY:
                iwl_dbg_tlv_init_cfg(fwrt);
-               iwl_dbg_tlv_tp_trigger(fwrt, trig_list, tp_data, NULL);
+               iwl_dbg_tlv_tp_trigger(fwrt, sync, trig_list, tp_data, NULL);
                break;
        case IWL_FW_INI_TIME_POINT_AFTER_ALIVE:
                iwl_dbg_tlv_apply_buffers(fwrt);
                iwl_dbg_tlv_send_hcmds(fwrt, hcmd_list);
-               iwl_dbg_tlv_tp_trigger(fwrt, trig_list, tp_data, NULL);
+               iwl_dbg_tlv_tp_trigger(fwrt, sync, trig_list, tp_data, NULL);
                break;
        case IWL_FW_INI_TIME_POINT_PERIODIC:
                iwl_dbg_tlv_set_periodic_trigs(fwrt);
@@ -1075,13 +1079,13 @@ void iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
        case IWL_FW_INI_TIME_POINT_MISSED_BEACONS:
        case IWL_FW_INI_TIME_POINT_FW_DHC_NOTIFICATION:
                iwl_dbg_tlv_send_hcmds(fwrt, hcmd_list);
-               iwl_dbg_tlv_tp_trigger(fwrt, trig_list, tp_data,
+               iwl_dbg_tlv_tp_trigger(fwrt, sync, trig_list, tp_data,
                                       iwl_dbg_tlv_check_fw_pkt);
                break;
        default:
                iwl_dbg_tlv_send_hcmds(fwrt, hcmd_list);
-               iwl_dbg_tlv_tp_trigger(fwrt, trig_list, tp_data, NULL);
+               iwl_dbg_tlv_tp_trigger(fwrt, sync, trig_list, tp_data, NULL);
                break;
        }
 }
-IWL_EXPORT_SYMBOL(iwl_dbg_tlv_time_point);
+IWL_EXPORT_SYMBOL(_iwl_dbg_tlv_time_point);
index 92c7205..c12b1fd 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #ifndef __iwl_dbg_tlv_h__
 #define __iwl_dbg_tlv_h__
@@ -48,9 +48,25 @@ void iwl_dbg_tlv_free(struct iwl_trans *trans);
 void iwl_dbg_tlv_alloc(struct iwl_trans *trans, const struct iwl_ucode_tlv *tlv,
                       bool ext);
 void iwl_dbg_tlv_init(struct iwl_trans *trans);
-void iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
-                           enum iwl_fw_ini_time_point tp_id,
-                           union iwl_dbg_tlv_tp_data *tp_data);
+void _iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
+                            enum iwl_fw_ini_time_point tp_id,
+                            union iwl_dbg_tlv_tp_data *tp_data,
+                            bool sync);
+
+static inline void iwl_dbg_tlv_time_point(struct iwl_fw_runtime *fwrt,
+                                         enum iwl_fw_ini_time_point tp_id,
+                                         union iwl_dbg_tlv_tp_data *tp_data)
+{
+       _iwl_dbg_tlv_time_point(fwrt, tp_id, tp_data, false);
+}
+
+static inline void iwl_dbg_tlv_time_point_sync(struct iwl_fw_runtime *fwrt,
+                                              enum iwl_fw_ini_time_point tp_id,
+                                              union iwl_dbg_tlv_tp_data *tp_data)
+{
+       _iwl_dbg_tlv_time_point(fwrt, tp_id, tp_data, true);
+}
+
 void iwl_dbg_tlv_del_timers(struct iwl_trans *trans);
 
 #endif /* __iwl_dbg_tlv_h__*/
index 977dce6..77124b8 100644 (file)
@@ -78,7 +78,7 @@ enum {
 };
 
 /* Protects the table contents, i.e. the ops pointer & drv list */
-static struct mutex iwlwifi_opmode_table_mtx;
+static DEFINE_MUTEX(iwlwifi_opmode_table_mtx);
 static struct iwlwifi_opmode_table {
        const char *name;                       /* name: iwldvm, iwlmvm, etc */
        const struct iwl_op_mode_ops *ops;      /* pointer to op_mode ops */
@@ -1754,8 +1754,6 @@ static int __init iwl_drv_init(void)
 {
        int i, err;
 
-       mutex_init(&iwlwifi_opmode_table_mtx);
-
        for (i = 0; i < ARRAY_SIZE(iwlwifi_opmode_table); i++)
                INIT_LIST_HEAD(&iwlwifi_opmode_table[i].drv);
 
index 33d42e0..2517c4a 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2003-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2003-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2015-2016 Intel Deutschland GmbH
  */
 #include <linux/delay.h>
@@ -213,9 +213,12 @@ void iwl_force_nmi(struct iwl_trans *trans)
        else if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
                iwl_write_umac_prph(trans, UREG_NIC_SET_NMI_DRIVER,
                                UREG_NIC_SET_NMI_DRIVER_NMI_FROM_DRIVER);
-       else
+       else if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_BZ)
                iwl_write_umac_prph(trans, UREG_DOORBELL_TO_ISR6,
                                    UREG_DOORBELL_TO_ISR6_NMI_BIT);
+       else
+               iwl_write32(trans, CSR_DOORBELL_VECTOR,
+                           CSR_DOORBELL_VECTOR_NMI);
 }
 IWL_EXPORT_SYMBOL(iwl_force_nmi);
 
@@ -398,6 +401,7 @@ int iwl_dump_fh(struct iwl_trans *trans, char **buf)
 int iwl_finish_nic_init(struct iwl_trans *trans,
                        const struct iwl_cfg_trans_params *cfg_trans)
 {
+       u32 poll_ready;
        int err;
 
        if (cfg_trans->bisr_workaround) {
@@ -409,7 +413,16 @@ int iwl_finish_nic_init(struct iwl_trans *trans,
         * Set "initialization complete" bit to move adapter from
         * D0U* --> D0A* (powered-up active) state.
         */
-       iwl_set_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+       if (cfg_trans->device_family >= IWL_DEVICE_FAMILY_BZ) {
+               iwl_set_bit(trans, CSR_GP_CNTRL,
+                           CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
+                           CSR_GP_CNTRL_REG_FLAG_MAC_INIT);
+               poll_ready = CSR_GP_CNTRL_REG_FLAG_MAC_STATUS;
+       } else {
+               iwl_set_bit(trans, CSR_GP_CNTRL,
+                           CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+               poll_ready = CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY;
+       }
 
        if (cfg_trans->device_family == IWL_DEVICE_FAMILY_8000)
                udelay(2);
@@ -419,10 +432,7 @@ int iwl_finish_nic_init(struct iwl_trans *trans,
         * device-internal resources is supported, e.g. iwl_write_prph()
         * and accesses to uCode SRAM.
         */
-       err = iwl_poll_bit(trans, CSR_GP_CNTRL,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
-                          CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
-                          25000);
+       err = iwl_poll_bit(trans, CSR_GP_CNTRL, poll_ready, poll_ready, 25000);
        if (err < 0)
                IWL_DEBUG_INFO(trans, "Failed to wake NIC\n");
 
@@ -468,5 +478,5 @@ void iwl_trans_sync_nmi_with_addr(struct iwl_trans *trans, u32 inta_addr,
        if (interrupts_enabled)
                iwl_trans_interrupts(trans, true);
 
-       iwl_trans_fw_error(trans);
+       iwl_trans_fw_error(trans, false);
 }
index 850648e..475f951 100644 (file)
@@ -549,7 +549,8 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
                                .mac_cap_info[2] =
                                        IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP,
                                .mac_cap_info[3] =
-                                       IEEE80211_HE_MAC_CAP3_OMI_CONTROL,
+                                       IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
+                                       IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS,
                                .mac_cap_info[4] =
                                        IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU |
                                        IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39,
@@ -568,7 +569,8 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
                                        IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
                                        IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD,
                                .phy_cap_info[2] =
-                                       IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US,
+                                       IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
+                                       IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ,
                                .phy_cap_info[3] =
                                        IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM |
                                        IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1 |
@@ -595,6 +597,8 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
                                        IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB |
                                        IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB |
                                        IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED,
+                               .phy_cap_info[10] =
+                                       IEEE80211_HE_PHY_CAP10_HE_MU_M1RU_MAX_LTF,
                        },
                        /*
                         * Set default Tx/Rx HE MCS NSS Support field.
@@ -634,6 +638,7 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
                                .phy_cap_info[1] =
                                        IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD,
                                .phy_cap_info[2] =
+                                       IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ |
                                        IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US,
                                .phy_cap_info[3] =
                                        IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM |
@@ -742,6 +747,8 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans,
                        IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
 
        if ((tx_chains & rx_chains) == ANT_AB) {
+               iftype_data->he_cap.he_cap_elem.phy_cap_info[2] |=
+                       IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ;
                iftype_data->he_cap.he_cap_elem.phy_cap_info[5] |=
                        IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 |
                        IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2;
@@ -958,8 +965,10 @@ static void iwl_flip_hw_address(__le32 mac_addr0, __le32 mac_addr1, u8 *dest)
 static void iwl_set_hw_address_from_csr(struct iwl_trans *trans,
                                        struct iwl_nvm_data *data)
 {
-       __le32 mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_STRAP));
-       __le32 mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_STRAP));
+       __le32 mac_addr0 = cpu_to_le32(iwl_read32(trans,
+                                                 CSR_MAC_ADDR0_STRAP(trans)));
+       __le32 mac_addr1 = cpu_to_le32(iwl_read32(trans,
+                                                 CSR_MAC_ADDR1_STRAP(trans)));
 
        iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr);
        /*
@@ -969,8 +978,8 @@ static void iwl_set_hw_address_from_csr(struct iwl_trans *trans,
        if (is_valid_ether_addr(data->hw_addr))
                return;
 
-       mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP));
-       mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP));
+       mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP(trans)));
+       mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP(trans)));
 
        iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr);
 }
@@ -1373,6 +1382,25 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
                reg_query_regdb_wmm(regd->alpha2, center_freq, rule);
        }
 
+       /*
+        * Certain firmware versions might report no valid channels
+        * if booted in RF-kill, i.e. not all calibrations etc. are
+        * running. We'll get out of this situation later when the
+        * rfkill is removed and we update the regdomain again, but
+        * since cfg80211 doesn't accept an empty regdomain, add a
+        * dummy (unusable) rule here in this case so we can init.
+        */
+       if (!valid_rules) {
+               valid_rules = 1;
+               rule = &regd->reg_rules[valid_rules - 1];
+               rule->freq_range.start_freq_khz = MHZ_TO_KHZ(2412);
+               rule->freq_range.end_freq_khz = MHZ_TO_KHZ(2413);
+               rule->freq_range.max_bandwidth_khz = MHZ_TO_KHZ(1);
+               rule->power_rule.max_antenna_gain = DBI_TO_MBI(6);
+               rule->power_rule.max_eirp =
+                       DBM_TO_MBM(IWL_DEFAULT_MAX_TX_POWER);
+       }
+
        regd->n_reg_rules = valid_rules;
 
        /*
index cf9c640..af5f9b2 100644 (file)
@@ -78,7 +78,7 @@ struct iwl_cfg;
  *     there are Tx packets pending in the transport layer.
  *     Must be atomic
  * @nic_error: error notification. Must be atomic and must be called with BH
- *     disabled.
+ *     disabled, unless the sync parameter is true.
  * @cmd_queue_full: Called when the command queue gets full. Must be atomic and
  *     called with BH disabled.
  * @nic_config: configure NIC, called before firmware is started.
@@ -102,7 +102,7 @@ struct iwl_op_mode_ops {
        void (*queue_not_full)(struct iwl_op_mode *op_mode, int queue);
        bool (*hw_rf_kill)(struct iwl_op_mode *op_mode, bool state);
        void (*free_skb)(struct iwl_op_mode *op_mode, struct sk_buff *skb);
-       void (*nic_error)(struct iwl_op_mode *op_mode);
+       void (*nic_error)(struct iwl_op_mode *op_mode, bool sync);
        void (*cmd_queue_full)(struct iwl_op_mode *op_mode);
        void (*nic_config)(struct iwl_op_mode *op_mode);
        void (*wimax_active)(struct iwl_op_mode *op_mode);
@@ -181,9 +181,9 @@ static inline void iwl_op_mode_free_skb(struct iwl_op_mode *op_mode,
        op_mode->ops->free_skb(op_mode, skb);
 }
 
-static inline void iwl_op_mode_nic_error(struct iwl_op_mode *op_mode)
+static inline void iwl_op_mode_nic_error(struct iwl_op_mode *op_mode, bool sync)
 {
-       op_mode->ops->nic_error(op_mode);
+       op_mode->ops->nic_error(op_mode, sync);
 }
 
 static inline void iwl_op_mode_cmd_queue_full(struct iwl_op_mode *op_mode)
index 9a9e714..d0a7d58 100644 (file)
 #define RFIC_REG_RD                    0xAD0470
 #define WFPM_CTRL_REG                  0xA03030
 #define WFPM_GP2                       0xA030B4
+
+/* DBGI SRAM Register details */
+#define DBGI_SRAM_TARGET_ACCESS_CFG                    0x00A2E14C
+#define DBGI_SRAM_TARGET_ACCESS_CFG_RESET_ADDRESS_MSK  0x10000
+#define DBGI_SRAM_TARGET_ACCESS_RDATA_LSB              0x00A2E154
+#define DBGI_SRAM_TARGET_ACCESS_RDATA_MSB              0x00A2E158
+
 enum {
        ENABLE_WFPM = BIT(31),
        WFPM_AUX_CTL_AUX_IF_MAC_OWNER_MSK       = 0x80000000,
index 0199d7a..8f0ff54 100644 (file)
@@ -887,7 +887,7 @@ struct iwl_trans_txqs {
        bool bc_table_dword;
        u8 page_offs;
        u8 dev_cmd_offs;
-       struct __percpu iwl_tso_hdr_page * tso_hdr_page;
+       struct iwl_tso_hdr_page __percpu *tso_hdr_page;
 
        struct {
                u8 fifo;
@@ -1385,14 +1385,14 @@ iwl_trans_release_nic_access(struct iwl_trans *trans)
        __release(nic_access);
 }
 
-static inline void iwl_trans_fw_error(struct iwl_trans *trans)
+static inline void iwl_trans_fw_error(struct iwl_trans *trans, bool sync)
 {
        if (WARN_ON_ONCE(!trans->op_mode))
                return;
 
        /* prevent double restarts due to the same erroneous FW */
        if (!test_and_set_bit(STATUS_FW_ERROR, &trans->status)) {
-               iwl_op_mode_nic_error(trans->op_mode);
+               iwl_op_mode_nic_error(trans->op_mode, sync);
                trans->state = IWL_TRANS_NO_FW;
        }
 }
index 1343f25..9d0d01f 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
- * Copyright (C) 2013-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2013-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2015 Intel Deutschland GmbH
  */
 #ifndef __MVM_CONSTANTS_H
@@ -93,6 +93,7 @@
 #define IWL_MVM_ENABLE_EBS                     1
 #define IWL_MVM_FTM_INITIATOR_ALGO             IWL_TOF_ALGO_TYPE_MAX_LIKE
 #define IWL_MVM_FTM_INITIATOR_DYNACK           true
+#define IWL_MVM_FTM_LMR_FEEDBACK_TERMINATE     false
 #define IWL_MVM_FTM_R2I_MAX_REP                        7
 #define IWL_MVM_FTM_I2R_MAX_REP                        7
 #define IWL_MVM_FTM_R2I_MAX_STS                        1
 #define IWL_MVM_FTM_INITIATOR_SECURE_LTF       false
 #define IWL_MVM_FTM_RESP_NDP_SUPPORT           true
 #define IWL_MVM_FTM_RESP_LMR_FEEDBACK_SUPPORT  true
+#define IWL_MVM_FTM_NON_TB_MIN_TIME_BETWEEN_MSR        5
+#define IWL_MVM_FTM_NON_TB_MAX_TIME_BETWEEN_MSR        1000
 #define IWL_MVM_D3_DEBUG                       false
 #define IWL_MVM_USE_TWT                                true
 #define IWL_MVM_AMPDU_CONSEC_DROPS_DELBA       10
index 6a259d8..0e97d5e 100644 (file)
@@ -101,11 +101,8 @@ static const u8 *iwl_mvm_find_max_pn(struct ieee80211_key_conf *key,
        return ret;
 }
 
-struct wowlan_key_data {
-       struct iwl_wowlan_rsc_tsc_params_cmd *rsc_tsc;
-       struct iwl_wowlan_tkip_params_cmd *tkip;
-       struct iwl_wowlan_kek_kck_material_cmd_v4 *kek_kck_cmd;
-       bool error, use_rsc_tsc, use_tkip, configure_keys;
+struct wowlan_key_reprogram_data {
+       bool error;
        int wep_key_idx;
 };
 
@@ -117,15 +114,8 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
 {
        struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
        struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
-       struct wowlan_key_data *data = _data;
-       struct aes_sc *aes_sc, *aes_tx_sc = NULL;
-       struct tkip_sc *tkip_sc, *tkip_tx_sc = NULL;
-       struct iwl_p1k_cache *rx_p1ks;
-       u8 *rx_mic_key;
-       struct ieee80211_key_seq seq;
-       u32 cur_rx_iv32 = 0;
-       u16 p1k[IWL_P1K_SIZE];
-       int ret, i;
+       struct wowlan_key_reprogram_data *data = _data;
+       int ret;
 
        switch (key->cipher) {
        case WLAN_CIPHER_SUITE_WEP40:
@@ -162,18 +152,14 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
                        wkc.wep_key.key_offset = data->wep_key_idx;
                }
 
-               if (data->configure_keys) {
-                       mutex_lock(&mvm->mutex);
-                       ret = iwl_mvm_send_cmd_pdu(mvm, WEP_KEY, 0,
-                                                  sizeof(wkc), &wkc);
-                       data->error = ret != 0;
-
-                       mvm->ptk_ivlen = key->iv_len;
-                       mvm->ptk_icvlen = key->icv_len;
-                       mvm->gtk_ivlen = key->iv_len;
-                       mvm->gtk_icvlen = key->icv_len;
-                       mutex_unlock(&mvm->mutex);
-               }
+               mutex_lock(&mvm->mutex);
+               ret = iwl_mvm_send_cmd_pdu(mvm, WEP_KEY, 0, sizeof(wkc), &wkc);
+               data->error = ret != 0;
+
+               mvm->ptk_ivlen = key->iv_len;
+               mvm->ptk_icvlen = key->icv_len;
+               mvm->gtk_ivlen = key->iv_len;
+               mvm->gtk_icvlen = key->icv_len;
 
                /* don't upload key again */
                return;
@@ -183,10 +169,8 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
                return;
        case WLAN_CIPHER_SUITE_BIP_GMAC_256:
        case WLAN_CIPHER_SUITE_BIP_GMAC_128:
-               data->kek_kck_cmd->igtk_cipher = cpu_to_le32(STA_KEY_FLG_GCMP);
                return;
        case WLAN_CIPHER_SUITE_AES_CMAC:
-               data->kek_kck_cmd->igtk_cipher = cpu_to_le32(STA_KEY_FLG_CCM);
                /*
                 * Ignore CMAC keys -- the WoWLAN firmware doesn't support them
                 * but we also shouldn't abort suspend due to that. It does have
@@ -195,6 +179,58 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
                 * be deauthenticated, but that was considered acceptable.
                 */
                return;
+       case WLAN_CIPHER_SUITE_TKIP:
+       case WLAN_CIPHER_SUITE_CCMP:
+       case WLAN_CIPHER_SUITE_GCMP:
+       case WLAN_CIPHER_SUITE_GCMP_256:
+               break;
+       }
+
+       mutex_lock(&mvm->mutex);
+       /*
+        * The D3 firmware hardcodes the key offset 0 as the key it
+        * uses to transmit packets to the AP, i.e. the PTK.
+        */
+       if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) {
+               mvm->ptk_ivlen = key->iv_len;
+               mvm->ptk_icvlen = key->icv_len;
+               ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 0);
+       } else {
+               /*
+                * firmware only supports TSC/RSC for a single key,
+                * so if there are multiple keep overwriting them
+                * with new ones -- this relies on mac80211 doing
+                * list_add_tail().
+                */
+               mvm->gtk_ivlen = key->iv_len;
+               mvm->gtk_icvlen = key->icv_len;
+               ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 1);
+       }
+       mutex_unlock(&mvm->mutex);
+       data->error = ret != 0;
+}
+
+struct wowlan_key_rsc_tsc_data {
+       struct iwl_wowlan_rsc_tsc_params_cmd_v4 *rsc_tsc;
+       bool have_rsc_tsc;
+};
+
+static void iwl_mvm_wowlan_get_rsc_tsc_data(struct ieee80211_hw *hw,
+                                           struct ieee80211_vif *vif,
+                                           struct ieee80211_sta *sta,
+                                           struct ieee80211_key_conf *key,
+                                           void *_data)
+{
+       struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+       struct wowlan_key_rsc_tsc_data *data = _data;
+       struct aes_sc *aes_sc;
+       struct tkip_sc *tkip_sc, *tkip_tx_sc = NULL;
+       struct ieee80211_key_seq seq;
+       int i;
+
+       switch (key->cipher) {
+       default:
+               break;
        case WLAN_CIPHER_SUITE_TKIP:
                if (sta) {
                        u64 pn64;
@@ -204,28 +240,12 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
                        tkip_tx_sc =
                                &data->rsc_tsc->params.all_tsc_rsc.tkip.tsc;
 
-                       rx_p1ks = data->tkip->rx_uni;
-
                        pn64 = atomic64_read(&key->tx_pn);
                        tkip_tx_sc->iv16 = cpu_to_le16(TKIP_PN_TO_IV16(pn64));
                        tkip_tx_sc->iv32 = cpu_to_le32(TKIP_PN_TO_IV32(pn64));
-
-                       ieee80211_get_tkip_p1k_iv(key, TKIP_PN_TO_IV32(pn64),
-                                                 p1k);
-                       iwl_mvm_convert_p1k(p1k, data->tkip->tx.p1k);
-
-                       memcpy(data->tkip->mic_keys.tx,
-                              &key->key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY],
-                              IWL_MIC_KEY_SIZE);
-
-                       rx_mic_key = data->tkip->mic_keys.rx_unicast;
                } else {
                        tkip_sc =
                          data->rsc_tsc->params.all_tsc_rsc.tkip.multicast_rsc;
-                       rx_p1ks = data->tkip->rx_multi;
-                       rx_mic_key = data->tkip->mic_keys.rx_mcast;
-                       data->kek_kck_cmd->gtk_cipher =
-                               cpu_to_le32(STA_KEY_FLG_TKIP);
                }
 
                /*
@@ -237,29 +257,15 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
                        ieee80211_get_key_rx_seq(key, i, &seq);
                        tkip_sc[i].iv16 = cpu_to_le16(seq.tkip.iv16);
                        tkip_sc[i].iv32 = cpu_to_le32(seq.tkip.iv32);
-                       /* wrapping isn't allowed, AP must rekey */
-                       if (seq.tkip.iv32 > cur_rx_iv32)
-                               cur_rx_iv32 = seq.tkip.iv32;
                }
 
-               ieee80211_get_tkip_rx_p1k(key, vif->bss_conf.bssid,
-                                         cur_rx_iv32, p1k);
-               iwl_mvm_convert_p1k(p1k, rx_p1ks[0].p1k);
-               ieee80211_get_tkip_rx_p1k(key, vif->bss_conf.bssid,
-                                         cur_rx_iv32 + 1, p1k);
-               iwl_mvm_convert_p1k(p1k, rx_p1ks[1].p1k);
-
-               memcpy(rx_mic_key,
-                      &key->key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY],
-                      IWL_MIC_KEY_SIZE);
-
-               data->use_tkip = true;
-               data->use_rsc_tsc = true;
+               data->have_rsc_tsc = true;
                break;
        case WLAN_CIPHER_SUITE_CCMP:
        case WLAN_CIPHER_SUITE_GCMP:
        case WLAN_CIPHER_SUITE_GCMP_256:
                if (sta) {
+                       struct aes_sc *aes_tx_sc;
                        u64 pn64;
 
                        aes_sc =
@@ -272,10 +278,6 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
                } else {
                        aes_sc =
                           data->rsc_tsc->params.all_tsc_rsc.aes.multicast_rsc;
-                       data->kek_kck_cmd->gtk_cipher =
-                               key->cipher == WLAN_CIPHER_SUITE_CCMP ?
-                               cpu_to_le32(STA_KEY_FLG_CCM) :
-                               cpu_to_le32(STA_KEY_FLG_GCMP);
                }
 
                /*
@@ -320,35 +322,301 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
                                                           ((u64)pn[0] << 40));
                        }
                }
-               data->use_rsc_tsc = true;
+               data->have_rsc_tsc = true;
                break;
        }
+}
 
-       IWL_DEBUG_WOWLAN(mvm, "GTK cipher %d\n", data->kek_kck_cmd->gtk_cipher);
+struct wowlan_key_rsc_v5_data {
+       struct iwl_wowlan_rsc_tsc_params_cmd *rsc;
+       bool have_rsc;
+       int gtks;
+       int gtk_ids[4];
+};
 
-       if (data->configure_keys) {
-               mutex_lock(&mvm->mutex);
+static void iwl_mvm_wowlan_get_rsc_v5_data(struct ieee80211_hw *hw,
+                                          struct ieee80211_vif *vif,
+                                          struct ieee80211_sta *sta,
+                                          struct ieee80211_key_conf *key,
+                                          void *_data)
+{
+       struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+       struct wowlan_key_rsc_v5_data *data = _data;
+       struct ieee80211_key_seq seq;
+       __le64 *rsc;
+       int i;
+
+       /* only for ciphers that can be PTK/GTK */
+       switch (key->cipher) {
+       default:
+               return;
+       case WLAN_CIPHER_SUITE_TKIP:
+       case WLAN_CIPHER_SUITE_CCMP:
+       case WLAN_CIPHER_SUITE_GCMP:
+       case WLAN_CIPHER_SUITE_GCMP_256:
+               break;
+       }
+
+       if (sta) {
+               rsc = data->rsc->ucast_rsc;
+       } else {
+               if (WARN_ON(data->gtks > ARRAY_SIZE(data->gtk_ids)))
+                       return;
+               data->gtk_ids[data->gtks] = key->keyidx;
+               rsc = data->rsc->mcast_rsc[data->gtks % 2];
+               if (WARN_ON(key->keyidx >
+                               ARRAY_SIZE(data->rsc->mcast_key_id_map)))
+                       return;
+               data->rsc->mcast_key_id_map[key->keyidx] = data->gtks % 2;
+               if (data->gtks >= 2) {
+                       int prev = data->gtks - 2;
+                       int prev_idx = data->gtk_ids[prev];
+
+                       data->rsc->mcast_key_id_map[prev_idx] =
+                               IWL_MCAST_KEY_MAP_INVALID;
+               }
+               data->gtks++;
+       }
+
+       switch (key->cipher) {
+       default:
+               WARN_ON(1);
+               break;
+       case WLAN_CIPHER_SUITE_TKIP:
+
+               /*
+                * For non-QoS this relies on the fact that both the uCode and
+                * mac80211 use TID 0 (as they need to to avoid replay attacks)
+                * for checking the IV in the frames.
+                */
+               for (i = 0; i < IWL_MAX_TID_COUNT; i++) {
+                       ieee80211_get_key_rx_seq(key, i, &seq);
+
+                       rsc[i] = cpu_to_le64(((u64)seq.tkip.iv32 << 16) |
+                                            seq.tkip.iv16);
+               }
+
+               data->have_rsc = true;
+               break;
+       case WLAN_CIPHER_SUITE_CCMP:
+       case WLAN_CIPHER_SUITE_GCMP:
+       case WLAN_CIPHER_SUITE_GCMP_256:
                /*
-                * The D3 firmware hardcodes the key offset 0 as the key it
-                * uses to transmit packets to the AP, i.e. the PTK.
+                * For non-QoS this relies on the fact that both the uCode and
+                * mac80211/our RX code use TID 0 for checking the PN.
                 */
-               if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) {
-                       mvm->ptk_ivlen = key->iv_len;
-                       mvm->ptk_icvlen = key->icv_len;
-                       ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 0);
+               if (sta) {
+                       struct iwl_mvm_sta *mvmsta;
+                       struct iwl_mvm_key_pn *ptk_pn;
+                       const u8 *pn;
+
+                       mvmsta = iwl_mvm_sta_from_mac80211(sta);
+                       rcu_read_lock();
+                       ptk_pn = rcu_dereference(mvmsta->ptk_pn[key->keyidx]);
+                       if (WARN_ON(!ptk_pn)) {
+                               rcu_read_unlock();
+                               break;
+                       }
+
+                       for (i = 0; i < IWL_MAX_TID_COUNT; i++) {
+                               pn = iwl_mvm_find_max_pn(key, ptk_pn, &seq, i,
+                                               mvm->trans->num_rx_queues);
+                               rsc[i] = cpu_to_le64((u64)pn[5] |
+                                                    ((u64)pn[4] << 8) |
+                                                    ((u64)pn[3] << 16) |
+                                                    ((u64)pn[2] << 24) |
+                                                    ((u64)pn[1] << 32) |
+                                                    ((u64)pn[0] << 40));
+                       }
+
+                       rcu_read_unlock();
                } else {
-                       /*
-                        * firmware only supports TSC/RSC for a single key,
-                        * so if there are multiple keep overwriting them
-                        * with new ones -- this relies on mac80211 doing
-                        * list_add_tail().
-                        */
-                       mvm->gtk_ivlen = key->iv_len;
-                       mvm->gtk_icvlen = key->icv_len;
-                       ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 1);
+                       for (i = 0; i < IWL_MAX_TID_COUNT; i++) {
+                               u8 *pn = seq.ccmp.pn;
+
+                               ieee80211_get_key_rx_seq(key, i, &seq);
+                               rsc[i] = cpu_to_le64((u64)pn[5] |
+                                                    ((u64)pn[4] << 8) |
+                                                    ((u64)pn[3] << 16) |
+                                                    ((u64)pn[2] << 24) |
+                                                    ((u64)pn[1] << 32) |
+                                                    ((u64)pn[0] << 40));
+                       }
                }
-               mutex_unlock(&mvm->mutex);
-               data->error = ret != 0;
+               data->have_rsc = true;
+               break;
+       }
+}
+
+static int iwl_mvm_wowlan_config_rsc_tsc(struct iwl_mvm *mvm,
+                                        struct ieee80211_vif *vif)
+{
+       struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+       int ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
+                                       WOWLAN_TSC_RSC_PARAM,
+                                       IWL_FW_CMD_VER_UNKNOWN);
+       int ret;
+
+       if (ver == 5) {
+               struct wowlan_key_rsc_v5_data data = {};
+               int i;
+
+               data.rsc = kmalloc(sizeof(*data.rsc), GFP_KERNEL);
+               if (!data.rsc)
+                       return -ENOMEM;
+
+               memset(data.rsc, 0xff, sizeof(*data.rsc));
+
+               for (i = 0; i < ARRAY_SIZE(data.rsc->mcast_key_id_map); i++)
+                       data.rsc->mcast_key_id_map[i] =
+                               IWL_MCAST_KEY_MAP_INVALID;
+               data.rsc->sta_id = cpu_to_le32(mvmvif->ap_sta_id);
+
+               ieee80211_iter_keys(mvm->hw, vif,
+                                   iwl_mvm_wowlan_get_rsc_v5_data,
+                                   &data);
+
+               if (data.have_rsc)
+                       ret = iwl_mvm_send_cmd_pdu(mvm, WOWLAN_TSC_RSC_PARAM,
+                                                  CMD_ASYNC, sizeof(*data.rsc),
+                                                  data.rsc);
+               else
+                       ret = 0;
+               kfree(data.rsc);
+       } else if (ver == 4 || ver == 2 || ver == IWL_FW_CMD_VER_UNKNOWN) {
+               struct wowlan_key_rsc_tsc_data data = {};
+               int size;
+
+               data.rsc_tsc = kzalloc(sizeof(*data.rsc_tsc), GFP_KERNEL);
+               if (!data.rsc_tsc)
+                       return -ENOMEM;
+
+               if (ver == 4) {
+                       size = sizeof(*data.rsc_tsc);
+                       data.rsc_tsc->sta_id = cpu_to_le32(mvmvif->ap_sta_id);
+               } else {
+                       /* ver == 2 || ver == IWL_FW_CMD_VER_UNKNOWN */
+                       size = sizeof(data.rsc_tsc->params);
+               }
+
+               ieee80211_iter_keys(mvm->hw, vif,
+                                   iwl_mvm_wowlan_get_rsc_tsc_data,
+                                   &data);
+
+               if (data.have_rsc_tsc)
+                       ret = iwl_mvm_send_cmd_pdu(mvm, WOWLAN_TSC_RSC_PARAM,
+                                                  CMD_ASYNC, size,
+                                                  data.rsc_tsc);
+               else
+                       ret = 0;
+               kfree(data.rsc_tsc);
+       } else {
+               ret = 0;
+               WARN_ON_ONCE(1);
+       }
+
+       return ret;
+}
+
+struct wowlan_key_tkip_data {
+       struct iwl_wowlan_tkip_params_cmd tkip;
+       bool have_tkip_keys;
+};
+
+static void iwl_mvm_wowlan_get_tkip_data(struct ieee80211_hw *hw,
+                                        struct ieee80211_vif *vif,
+                                        struct ieee80211_sta *sta,
+                                        struct ieee80211_key_conf *key,
+                                        void *_data)
+{
+       struct wowlan_key_tkip_data *data = _data;
+       struct iwl_p1k_cache *rx_p1ks;
+       u8 *rx_mic_key;
+       struct ieee80211_key_seq seq;
+       u32 cur_rx_iv32 = 0;
+       u16 p1k[IWL_P1K_SIZE];
+       int i;
+
+       switch (key->cipher) {
+       default:
+               break;
+       case WLAN_CIPHER_SUITE_TKIP:
+               if (sta) {
+                       u64 pn64;
+
+                       rx_p1ks = data->tkip.rx_uni;
+
+                       pn64 = atomic64_read(&key->tx_pn);
+
+                       ieee80211_get_tkip_p1k_iv(key, TKIP_PN_TO_IV32(pn64),
+                                                 p1k);
+                       iwl_mvm_convert_p1k(p1k, data->tkip.tx.p1k);
+
+                       memcpy(data->tkip.mic_keys.tx,
+                              &key->key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY],
+                              IWL_MIC_KEY_SIZE);
+
+                       rx_mic_key = data->tkip.mic_keys.rx_unicast;
+               } else {
+                       rx_p1ks = data->tkip.rx_multi;
+                       rx_mic_key = data->tkip.mic_keys.rx_mcast;
+               }
+
+               for (i = 0; i < IWL_NUM_RSC; i++) {
+                       /* wrapping isn't allowed, AP must rekey */
+                       if (seq.tkip.iv32 > cur_rx_iv32)
+                               cur_rx_iv32 = seq.tkip.iv32;
+               }
+
+               ieee80211_get_tkip_rx_p1k(key, vif->bss_conf.bssid,
+                                         cur_rx_iv32, p1k);
+               iwl_mvm_convert_p1k(p1k, rx_p1ks[0].p1k);
+               ieee80211_get_tkip_rx_p1k(key, vif->bss_conf.bssid,
+                                         cur_rx_iv32 + 1, p1k);
+               iwl_mvm_convert_p1k(p1k, rx_p1ks[1].p1k);
+
+               memcpy(rx_mic_key,
+                      &key->key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY],
+                      IWL_MIC_KEY_SIZE);
+
+               data->have_tkip_keys = true;
+               break;
+       }
+}
+
+struct wowlan_key_gtk_type_iter {
+       struct iwl_wowlan_kek_kck_material_cmd_v4 *kek_kck_cmd;
+};
+
+static void iwl_mvm_wowlan_gtk_type_iter(struct ieee80211_hw *hw,
+                                        struct ieee80211_vif *vif,
+                                        struct ieee80211_sta *sta,
+                                        struct ieee80211_key_conf *key,
+                                        void *_data)
+{
+       struct wowlan_key_gtk_type_iter *data = _data;
+
+       switch (key->cipher) {
+       default:
+               return;
+       case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+       case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+               data->kek_kck_cmd->igtk_cipher = cpu_to_le32(STA_KEY_FLG_GCMP);
+               return;
+       case WLAN_CIPHER_SUITE_AES_CMAC:
+               data->kek_kck_cmd->igtk_cipher = cpu_to_le32(STA_KEY_FLG_CCM);
+               return;
+       case WLAN_CIPHER_SUITE_CCMP:
+               if (!sta)
+                       data->kek_kck_cmd->gtk_cipher =
+                               cpu_to_le32(STA_KEY_FLG_CCM);
+               break;
+       case WLAN_CIPHER_SUITE_GCMP:
+       case WLAN_CIPHER_SUITE_GCMP_256:
+               if (!sta)
+                       data->kek_kck_cmd->gtk_cipher =
+                               cpu_to_le32(STA_KEY_FLG_GCMP);
+               break;
        }
 }
 
@@ -713,109 +981,81 @@ iwl_mvm_get_wowlan_config(struct iwl_mvm *mvm,
 }
 
 static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
-                                           struct ieee80211_vif *vif,
-                                           u32 cmd_flags)
+                                           struct ieee80211_vif *vif)
 {
-       struct iwl_wowlan_kek_kck_material_cmd_v4 kek_kck_cmd = {};
-       struct iwl_wowlan_kek_kck_material_cmd_v4 *_kek_kck_cmd = &kek_kck_cmd;
-       struct iwl_wowlan_tkip_params_cmd tkip_cmd = {};
        bool unified = fw_has_capa(&mvm->fw->ucode_capa,
                                   IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG);
-       struct wowlan_key_data key_data = {
-               .configure_keys = !unified,
-               .use_rsc_tsc = false,
-               .tkip = &tkip_cmd,
-               .use_tkip = false,
-               .kek_kck_cmd = _kek_kck_cmd,
-       };
+       struct wowlan_key_reprogram_data key_data = {};
        struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
        int ret;
        u8 cmd_ver;
        size_t cmd_size;
 
-       key_data.rsc_tsc = kzalloc(sizeof(*key_data.rsc_tsc), GFP_KERNEL);
-       if (!key_data.rsc_tsc)
-               return -ENOMEM;
-
-       /*
-        * if we have to configure keys, call ieee80211_iter_keys(),
-        * as we need non-atomic context in order to take the
-        * required locks.
-        */
-       /*
-        * Note that currently we don't propagate cmd_flags
-        * to the iterator. In case of key_data.configure_keys,
-        * all the configured commands are SYNC, and
-        * iwl_mvm_wowlan_program_keys() will take care of
-        * locking/unlocking mvm->mutex.
-        */
-       ieee80211_iter_keys(mvm->hw, vif, iwl_mvm_wowlan_program_keys,
-                           &key_data);
+       if (!unified) {
+               /*
+                * if we have to configure keys, call ieee80211_iter_keys(),
+                * as we need non-atomic context in order to take the
+                * required locks.
+                */
+               /*
+                * Note that currently we don't use CMD_ASYNC in the iterator.
+                * In case of key_data.configure_keys, all the configured
+                * commands are SYNC, and iwl_mvm_wowlan_program_keys() will
+                * take care of locking/unlocking mvm->mutex.
+                */
+               ieee80211_iter_keys(mvm->hw, vif, iwl_mvm_wowlan_program_keys,
+                                   &key_data);
 
-       if (key_data.error) {
-               ret = -EIO;
-               goto out;
+               if (key_data.error)
+                       return -EIO;
        }
 
-       if (key_data.use_rsc_tsc) {
-               int ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
-                                               WOWLAN_TSC_RSC_PARAM,
-                                               IWL_FW_CMD_VER_UNKNOWN);
-               int size;
-
-               if (ver == 4) {
-                       size = sizeof(*key_data.rsc_tsc);
-                       key_data.rsc_tsc->sta_id =
-                               cpu_to_le32(mvmvif->ap_sta_id);
-
-               } else if (ver == 2 || ver == IWL_FW_CMD_VER_UNKNOWN) {
-                       size = sizeof(key_data.rsc_tsc->params);
-               } else {
-                       ret = 0;
-                       WARN_ON_ONCE(1);
-                       goto out;
-               }
-
-               ret = iwl_mvm_send_cmd_pdu(mvm, WOWLAN_TSC_RSC_PARAM,
-                                          cmd_flags,
-                                          size,
-                                          key_data.rsc_tsc);
-
-               if (ret)
-                       goto out;
-       }
+       ret = iwl_mvm_wowlan_config_rsc_tsc(mvm, vif);
+       if (ret)
+               return ret;
 
-       if (key_data.use_tkip &&
-           !fw_has_api(&mvm->fw->ucode_capa,
+       if (!fw_has_api(&mvm->fw->ucode_capa,
                        IWL_UCODE_TLV_API_TKIP_MIC_KEYS)) {
                int ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
                                                WOWLAN_TKIP_PARAM,
                                                IWL_FW_CMD_VER_UNKNOWN);
+               struct wowlan_key_tkip_data tkip_data = {};
                int size;
 
                if (ver == 2) {
-                       size = sizeof(tkip_cmd);
-                       key_data.tkip->sta_id =
+                       size = sizeof(tkip_data.tkip);
+                       tkip_data.tkip.sta_id =
                                cpu_to_le32(mvmvif->ap_sta_id);
                } else if (ver == 1 || ver == IWL_FW_CMD_VER_UNKNOWN) {
                        size = sizeof(struct iwl_wowlan_tkip_params_cmd_ver_1);
                } else {
-                       ret =  -EINVAL;
                        WARN_ON_ONCE(1);
-                       goto out;
+                       return -EINVAL;
                }
 
-               /* send relevant data according to CMD version */
-               ret = iwl_mvm_send_cmd_pdu(mvm,
-                                          WOWLAN_TKIP_PARAM,
-                                          cmd_flags, size,
-                                          &tkip_cmd);
-               if (ret)
-                       goto out;
+               ieee80211_iter_keys(mvm->hw, vif, iwl_mvm_wowlan_get_tkip_data,
+                                   &tkip_data);
+
+               if (tkip_data.have_tkip_keys) {
+                       /* send relevant data according to CMD version */
+                       ret = iwl_mvm_send_cmd_pdu(mvm,
+                                                  WOWLAN_TKIP_PARAM,
+                                                  CMD_ASYNC, size,
+                                                  &tkip_data.tkip);
+                       if (ret)
+                               return ret;
+               }
        }
 
        /* configure rekey data only if offloaded rekey is supported (d3) */
        if (mvmvif->rekey_data.valid) {
+               struct iwl_wowlan_kek_kck_material_cmd_v4 kek_kck_cmd = {};
+               struct iwl_wowlan_kek_kck_material_cmd_v4 *_kek_kck_cmd =
+                       &kek_kck_cmd;
+               struct wowlan_key_gtk_type_iter gtk_type_data = {
+                       .kek_kck_cmd = _kek_kck_cmd,
+               };
+
                cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw,
                                                IWL_ALWAYS_LONG_GROUP,
                                                WOWLAN_KEK_KCK_MATERIAL,
@@ -824,6 +1064,9 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
                            cmd_ver != IWL_FW_CMD_VER_UNKNOWN))
                        return -EINVAL;
 
+               ieee80211_iter_keys(mvm->hw, vif, iwl_mvm_wowlan_gtk_type_iter,
+                                   &gtk_type_data);
+
                memcpy(kek_kck_cmd.kck, mvmvif->rekey_data.kck,
                       mvmvif->rekey_data.kck_len);
                kek_kck_cmd.kck_len = cpu_to_le16(mvmvif->rekey_data.kck_len);
@@ -851,17 +1094,13 @@ static int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm,
                IWL_DEBUG_WOWLAN(mvm, "setting akm %d\n",
                                 mvmvif->rekey_data.akm);
 
-               ret = iwl_mvm_send_cmd_pdu(mvm,
-                                          WOWLAN_KEK_KCK_MATERIAL, cmd_flags,
-                                          cmd_size,
-                                          _kek_kck_cmd);
+               ret = iwl_mvm_send_cmd_pdu(mvm, WOWLAN_KEK_KCK_MATERIAL,
+                                          CMD_ASYNC, cmd_size, _kek_kck_cmd);
                if (ret)
-                       goto out;
+                       return ret;
        }
-       ret = 0;
-out:
-       kfree(key_data.rsc_tsc);
-       return ret;
+
+       return 0;
 }
 
 static int
@@ -893,7 +1132,7 @@ iwl_mvm_wowlan_config(struct iwl_mvm *mvm,
         * that isn't really a problem though.
         */
        mutex_unlock(&mvm->mutex);
-       ret = iwl_mvm_wowlan_config_key_params(mvm, vif, CMD_ASYNC);
+       ret = iwl_mvm_wowlan_config_key_params(mvm, vif);
        mutex_lock(&mvm->mutex);
        if (ret)
                return ret;
@@ -1694,9 +1933,12 @@ iwl_mvm_send_wowlan_get_status(struct iwl_mvm *mvm, u8 sta_id)
 
                status->gtk[0] = v7->gtk[0];
                status->igtk[0] = v7->igtk[0];
-       } else if (notif_ver == 9 || notif_ver == 10) {
+       } else if (notif_ver == 9 || notif_ver == 10 || notif_ver == 11) {
                struct iwl_wowlan_status_v9 *v9 = (void *)cmd.resp_pkt->data;
 
+               /* these three command versions have same layout and size, the
+                * difference is only in a few not used (reserved) fields.
+                */
                status = iwl_mvm_parse_wowlan_status_common_v9(mvm,
                                                               cmd.resp_pkt->data,
                                                               len);
index 95f883a..5dc39fb 100644 (file)
@@ -305,7 +305,6 @@ static ssize_t iwl_dbgfs_sar_geo_profile_read(struct file *file,
        int pos = 0;
        int bufsz = sizeof(buf);
        int tbl_idx;
-       u8 *value;
 
        if (!iwl_mvm_firmware_running(mvm))
                return -EIO;
@@ -321,16 +320,18 @@ static ssize_t iwl_dbgfs_sar_geo_profile_read(struct file *file,
                pos = scnprintf(buf, bufsz,
                                "SAR geographic profile disabled\n");
        } else {
-               value = &mvm->fwrt.geo_profiles[tbl_idx - 1].values[0];
-
                pos += scnprintf(buf + pos, bufsz - pos,
                                 "Use geographic profile %d\n", tbl_idx);
                pos += scnprintf(buf + pos, bufsz - pos,
                                 "2.4GHz:\n\tChain A offset: %hhu dBm\n\tChain B offset: %hhu dBm\n\tmax tx power: %hhu dBm\n",
-                                value[1], value[2], value[0]);
+                                mvm->fwrt.geo_profiles[tbl_idx - 1].bands[0].chains[0],
+                                mvm->fwrt.geo_profiles[tbl_idx - 1].bands[0].chains[1],
+                                mvm->fwrt.geo_profiles[tbl_idx - 1].bands[0].max);
                pos += scnprintf(buf + pos, bufsz - pos,
                                 "5.2GHz:\n\tChain A offset: %hhu dBm\n\tChain B offset: %hhu dBm\n\tmax tx power: %hhu dBm\n",
-                                value[4], value[5], value[3]);
+                                mvm->fwrt.geo_profiles[tbl_idx - 1].bands[1].chains[0],
+                                mvm->fwrt.geo_profiles[tbl_idx - 1].bands[1].chains[1],
+                                mvm->fwrt.geo_profiles[tbl_idx - 1].bands[1].max);
        }
        mutex_unlock(&mvm->mutex);
 
index 59cef0d..03e5bf5 100644 (file)
@@ -754,6 +754,33 @@ iwl_mvm_ftm_set_ndp_params(struct iwl_mvm *mvm,
        target->i2r_max_total_ltf = IWL_MVM_FTM_I2R_MAX_TOTAL_LTF;
 }
 
+static int
+iwl_mvm_ftm_put_target_v8(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+                         struct cfg80211_pmsr_request_peer *peer,
+                         struct iwl_tof_range_req_ap_entry_v8 *target)
+{
+       u32 flags;
+       int ret = iwl_mvm_ftm_put_target_v7(mvm, vif, peer, (void *)target);
+
+       if (ret)
+               return ret;
+
+       iwl_mvm_ftm_set_ndp_params(mvm, target);
+
+       /*
+        * If secure LTF is turned off, replace the flag with PMF only
+        */
+       flags = le32_to_cpu(target->initiator_ap_flags);
+       if ((flags & IWL_INITIATOR_AP_FLAGS_SECURED) &&
+           !IWL_MVM_FTM_INITIATOR_SECURE_LTF) {
+               flags &= ~IWL_INITIATOR_AP_FLAGS_SECURED;
+               flags |= IWL_INITIATOR_AP_FLAGS_PMF;
+               target->initiator_ap_flags = cpu_to_le32(flags);
+       }
+
+       return 0;
+}
+
 static int iwl_mvm_ftm_start_v12(struct iwl_mvm *mvm,
                                 struct ieee80211_vif *vif,
                                 struct cfg80211_pmsr_request *req)
@@ -773,24 +800,53 @@ static int iwl_mvm_ftm_start_v12(struct iwl_mvm *mvm,
        for (i = 0; i < cmd.num_of_ap; i++) {
                struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
                struct iwl_tof_range_req_ap_entry_v8 *target = &cmd.ap[i];
-               u32 flags;
 
-               err = iwl_mvm_ftm_put_target_v7(mvm, vif, peer, (void *)target);
+               err = iwl_mvm_ftm_put_target_v8(mvm, vif, peer, target);
                if (err)
                        return err;
+       }
 
-               iwl_mvm_ftm_set_ndp_params(mvm, target);
-
-               /*
-                * If secure LTF is turned off, replace the flag with PMF only
-                */
-               flags = le32_to_cpu(target->initiator_ap_flags);
-               if ((flags & IWL_INITIATOR_AP_FLAGS_SECURED) &&
-                   !IWL_MVM_FTM_INITIATOR_SECURE_LTF) {
-                       flags &= ~IWL_INITIATOR_AP_FLAGS_SECURED;
-                       flags |= IWL_INITIATOR_AP_FLAGS_PMF;
-                       target->initiator_ap_flags = cpu_to_le32(flags);
+       return iwl_mvm_ftm_send_cmd(mvm, &hcmd);
+}
+
+static int iwl_mvm_ftm_start_v13(struct iwl_mvm *mvm,
+                                struct ieee80211_vif *vif,
+                                struct cfg80211_pmsr_request *req)
+{
+       struct iwl_tof_range_req_cmd_v13 cmd;
+       struct iwl_host_cmd hcmd = {
+               .id = iwl_cmd_id(TOF_RANGE_REQ_CMD, LOCATION_GROUP, 0),
+               .dataflags[0] = IWL_HCMD_DFL_DUP,
+               .data[0] = &cmd,
+               .len[0] = sizeof(cmd),
+       };
+       u8 i;
+       int err;
+
+       iwl_mvm_ftm_cmd_common(mvm, vif, (void *)&cmd, req);
+
+       for (i = 0; i < cmd.num_of_ap; i++) {
+               struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
+               struct iwl_tof_range_req_ap_entry_v9 *target = &cmd.ap[i];
+
+               err = iwl_mvm_ftm_put_target_v8(mvm, vif, peer, (void *)target);
+               if (err)
+                       return err;
+
+               if (peer->ftm.trigger_based || peer->ftm.non_trigger_based)
+                       target->bss_color = peer->ftm.bss_color;
+
+               if (peer->ftm.non_trigger_based) {
+                       target->min_time_between_msr =
+                               cpu_to_le16(IWL_MVM_FTM_NON_TB_MIN_TIME_BETWEEN_MSR);
+                       target->burst_period =
+                               cpu_to_le16(IWL_MVM_FTM_NON_TB_MAX_TIME_BETWEEN_MSR);
+               } else {
+                       target->min_time_between_msr = cpu_to_le16(0);
                }
+
+               target->band =
+                       iwl_mvm_phy_band_from_nl80211(peer->chandef.chan->band);
        }
 
        return iwl_mvm_ftm_send_cmd(mvm, &hcmd);
@@ -814,6 +870,9 @@ int iwl_mvm_ftm_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
                                                   IWL_FW_CMD_VER_UNKNOWN);
 
                switch (cmd_ver) {
+               case 13:
+                       err = iwl_mvm_ftm_start_v13(mvm, vif, req);
+                       break;
                case 12:
                        err = iwl_mvm_ftm_start_v12(mvm, vif, req);
                        break;
index 5a249ea..eba5433 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #include <net/cfg80211.h>
 #include <linux/etherdevice.h>
@@ -77,7 +77,7 @@ static int iwl_mvm_ftm_responder_set_bw_v2(struct cfg80211_chan_def *chandef,
 
 static void
 iwl_mvm_ftm_responder_set_ndp(struct iwl_mvm *mvm,
-                             struct iwl_tof_responder_config_cmd_v8 *cmd)
+                             struct iwl_tof_responder_config_cmd_v9 *cmd)
 {
        /* Up to 2 R2I STS are allowed on the responder */
        u32 r2i_max_sts = IWL_MVM_FTM_R2I_MAX_STS < 2 ?
@@ -104,7 +104,7 @@ iwl_mvm_ftm_responder_cmd(struct iwl_mvm *mvm,
         * field interpretation is different), so the same struct can be use
         * for all cases.
         */
-       struct iwl_tof_responder_config_cmd_v8 cmd = {
+       struct iwl_tof_responder_config_cmd_v9 cmd = {
                .channel_num = chandef->chan->hw_value,
                .cmd_valid_fields =
                        cpu_to_le32(IWL_TOF_RESPONDER_CMD_VALID_CHAN_INFO |
@@ -115,10 +115,27 @@ iwl_mvm_ftm_responder_cmd(struct iwl_mvm *mvm,
        u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, LOCATION_GROUP,
                                           TOF_RESPONDER_CONFIG_CMD, 6);
        int err;
+       int cmd_size;
 
        lockdep_assert_held(&mvm->mutex);
 
-if (cmd_ver == 8)
+       /* Use a default of bss_color=1 for now */
+       if (cmd_ver == 9) {
+               cmd.cmd_valid_fields |=
+                       cpu_to_le32(IWL_TOF_RESPONDER_CMD_VALID_BSS_COLOR |
+                                   IWL_TOF_RESPONDER_CMD_VALID_MIN_MAX_TIME_BETWEEN_MSR);
+               cmd.bss_color = 1;
+               cmd.min_time_between_msr =
+                       cpu_to_le16(IWL_MVM_FTM_NON_TB_MIN_TIME_BETWEEN_MSR);
+               cmd.max_time_between_msr =
+                       cpu_to_le16(IWL_MVM_FTM_NON_TB_MAX_TIME_BETWEEN_MSR);
+               cmd_size = sizeof(struct iwl_tof_responder_config_cmd_v9);
+       } else {
+               /* All versions up to version 8 have the same size */
+               cmd_size = sizeof(struct iwl_tof_responder_config_cmd_v8);
+       }
+
+       if (cmd_ver >= 8)
                iwl_mvm_ftm_responder_set_ndp(mvm, &cmd);
 
        if (cmd_ver >= 7)
@@ -137,7 +154,7 @@ if (cmd_ver == 8)
 
        return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(TOF_RESPONDER_CONFIG_CMD,
                                                    LOCATION_GROUP, 0),
-                                   0, sizeof(cmd), &cmd);
+                                   0, cmd_size, &cmd);
 }
 
 static int
index 38fd588..74404c9 100644 (file)
@@ -743,7 +743,8 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b)
        /* all structs have the same common part, add it */
        len += sizeof(cmd.common);
 
-       ret = iwl_sar_select_profile(&mvm->fwrt, per_chain, ACPI_SAR_NUM_TABLES,
+       ret = iwl_sar_select_profile(&mvm->fwrt, per_chain,
+                                    IWL_NUM_CHAIN_TABLES,
                                     n_subbands, prof_a, prof_b);
 
        /* return on error or if the profile is disabled (positive number) */
@@ -1057,16 +1058,7 @@ static const struct dmi_system_id dmi_ppag_approved_list[] = {
 
 static int iwl_mvm_ppag_init(struct iwl_mvm *mvm)
 {
-       int ret;
-
-       ret = iwl_mvm_get_ppag_table(mvm);
-       if (ret < 0) {
-               IWL_DEBUG_RADIO(mvm,
-                               "PPAG BIOS table invalid or unavailable. (%d)\n",
-                               ret);
-               return 0;
-       }
-
+       /* no need to read the table, done in INIT stage */
        if (!dmi_check_system(dmi_ppag_approved_list)) {
                IWL_DEBUG_RADIO(mvm,
                                "System vendor '%s' is not in the approved list, disabling PPAG.\n",
@@ -1191,12 +1183,65 @@ static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
                                        ret);
        }
 }
+
+void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm)
+{
+       int ret;
+
+       /* read PPAG table */
+       ret = iwl_mvm_get_ppag_table(mvm);
+       if (ret < 0) {
+               IWL_DEBUG_RADIO(mvm,
+                               "PPAG BIOS table invalid or unavailable. (%d)\n",
+                               ret);
+       }
+
+       /* read SAR tables */
+       ret = iwl_sar_get_wrds_table(&mvm->fwrt);
+       if (ret < 0) {
+               IWL_DEBUG_RADIO(mvm,
+                               "WRDS SAR BIOS table invalid or unavailable. (%d)\n",
+                               ret);
+               /*
+                * If not available, don't fail and don't bother with EWRD and
+                * WGDS */
+
+               if (!iwl_sar_get_wgds_table(&mvm->fwrt)) {
+                       /*
+                        * If basic SAR is not available, we check for WGDS,
+                        * which should *not* be available either.  If it is
+                        * available, issue an error, because we can't use SAR
+                        * Geo without basic SAR.
+                        */
+                       IWL_ERR(mvm, "BIOS contains WGDS but no WRDS\n");
+               }
+
+       } else {
+               ret = iwl_sar_get_ewrd_table(&mvm->fwrt);
+               /* if EWRD is not available, we can still use
+               * WRDS, so don't fail */
+               if (ret < 0)
+                       IWL_DEBUG_RADIO(mvm,
+                                       "EWRD SAR BIOS table invalid or unavailable. (%d)\n",
+                                       ret);
+
+               /* read geo SAR table */
+               if (iwl_sar_geo_support(&mvm->fwrt)) {
+                       ret = iwl_sar_get_wgds_table(&mvm->fwrt);
+                       if (ret < 0)
+                               IWL_DEBUG_RADIO(mvm,
+                                               "Geo SAR BIOS table invalid or unavailable. (%d)\n",
+                                               ret);
+                               /* we don't fail if the table is not available */
+               }
+       }
+}
 #else /* CONFIG_ACPI */
 
 inline int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm,
                                      int prof_a, int prof_b)
 {
-       return -ENOENT;
+       return 1;
 }
 
 inline int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm)
@@ -1231,6 +1276,10 @@ static u8 iwl_mvm_eval_dsm_rfi(struct iwl_mvm *mvm)
 {
        return DSM_VALUE_RFI_DISABLE;
 }
+
+void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm)
+{
+}
 #endif /* CONFIG_ACPI */
 
 void iwl_mvm_send_recovery_cmd(struct iwl_mvm *mvm, u32 flags)
@@ -1286,27 +1335,6 @@ void iwl_mvm_send_recovery_cmd(struct iwl_mvm *mvm, u32 flags)
 
 static int iwl_mvm_sar_init(struct iwl_mvm *mvm)
 {
-       int ret;
-
-       ret = iwl_sar_get_wrds_table(&mvm->fwrt);
-       if (ret < 0) {
-               IWL_DEBUG_RADIO(mvm,
-                               "WRDS SAR BIOS table invalid or unavailable. (%d)\n",
-                               ret);
-               /*
-                * If not available, don't fail and don't bother with EWRD.
-                * Return 1 to tell that we can't use WGDS either.
-                */
-               return 1;
-       }
-
-       ret = iwl_sar_get_ewrd_table(&mvm->fwrt);
-       /* if EWRD is not available, we can still use WRDS, so don't fail */
-       if (ret < 0)
-               IWL_DEBUG_RADIO(mvm,
-                               "EWRD SAR BIOS table invalid or unavailable. (%d)\n",
-                               ret);
-
        return iwl_mvm_sar_select_profile(mvm, 1, 1);
 }
 
@@ -1542,19 +1570,9 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
                goto error;
 
        ret = iwl_mvm_sar_init(mvm);
-       if (ret == 0) {
+       if (ret == 0)
                ret = iwl_mvm_sar_geo_init(mvm);
-       } else if (ret == -ENOENT && !iwl_sar_get_wgds_table(&mvm->fwrt)) {
-               /*
-                * If basic SAR is not available, we check for WGDS,
-                * which should *not* be available either.  If it is
-                * available, issue an error, because we can't use SAR
-                * Geo without basic SAR.
-                */
-               IWL_ERR(mvm, "BIOS contains WGDS but no WRDS\n");
-       }
-
-       if (ret < 0)
+       else if (ret < 0)
                goto error;
 
        iwl_mvm_tas_init(mvm);
index fd5e089..fd352b2 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
  */
@@ -647,12 +647,14 @@ static int iwl_mvm_mac_ctxt_cmd_sta(struct iwl_mvm *mvm,
 
        if (vif->bss_conf.he_support && !iwlwifi_mod_params.disable_11ax) {
                cmd.filter_flags |= cpu_to_le32(MAC_FILTER_IN_11AX);
-               if (vif->bss_conf.twt_requester && IWL_MVM_USE_TWT) {
+               if (vif->bss_conf.twt_requester && IWL_MVM_USE_TWT)
                        ctxt_sta->data_policy |= cpu_to_le32(TWT_SUPPORTED);
-                       if (vif->bss_conf.twt_protected)
-                               ctxt_sta->data_policy |=
-                                       cpu_to_le32(PROTECTED_TWT_SUPPORTED);
-               }
+               if (vif->bss_conf.twt_protected)
+                       ctxt_sta->data_policy |=
+                               cpu_to_le32(PROTECTED_TWT_SUPPORTED);
+               if (vif->bss_conf.twt_broadcast)
+                       ctxt_sta->data_policy |=
+                               cpu_to_le32(BROADCAST_TWT_SUPPORTED);
        }
 
 
@@ -1005,8 +1007,10 @@ int iwl_mvm_mac_ctxt_beacon_changed(struct iwl_mvm *mvm,
                return -ENOMEM;
 
 #ifdef CONFIG_IWLWIFI_DEBUGFS
-       if (mvm->beacon_inject_active)
+       if (mvm->beacon_inject_active) {
+               dev_kfree_skb(beacon);
                return -EBUSY;
+       }
 #endif
 
        ret = iwl_mvm_mac_ctxt_send_beacon(mvm, vif, beacon);
@@ -1427,14 +1431,34 @@ void iwl_mvm_rx_stored_beacon_notif(struct iwl_mvm *mvm,
 {
        struct iwl_rx_packet *pkt = rxb_addr(rxb);
        unsigned int pkt_len = iwl_rx_packet_payload_len(pkt);
-       struct iwl_stored_beacon_notif *sb = (void *)pkt->data;
+       struct iwl_stored_beacon_notif_common *sb = (void *)pkt->data;
        struct ieee80211_rx_status rx_status;
        struct sk_buff *skb;
+       u8 *data;
        u32 size = le32_to_cpu(sb->byte_count);
+       int ver = iwl_fw_lookup_cmd_ver(mvm->fw, PROT_OFFLOAD_GROUP,
+                                       STORED_BEACON_NTF, 0);
 
-       if (size == 0 || pkt_len < struct_size(sb, data, size))
+       if (size == 0)
                return;
 
+       /* handle per-version differences */
+       if (ver <= 2) {
+               struct iwl_stored_beacon_notif_v2 *sb_v2 = (void *)pkt->data;
+
+               if (pkt_len < struct_size(sb_v2, data, size))
+                       return;
+
+               data = sb_v2->data;
+       } else {
+               struct iwl_stored_beacon_notif_v3 *sb_v3 = (void *)pkt->data;
+
+               if (pkt_len < struct_size(sb_v3, data, size))
+                       return;
+
+               data = sb_v3->data;
+       }
+
        skb = alloc_skb(size, GFP_ATOMIC);
        if (!skb) {
                IWL_ERR(mvm, "alloc_skb failed\n");
@@ -1455,7 +1479,7 @@ void iwl_mvm_rx_stored_beacon_notif(struct iwl_mvm *mvm,
                                               rx_status.band);
 
        /* copy the data */
-       skb_put_data(skb, sb->data, size);
+       skb_put_data(skb, data, size);
        memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status));
 
        /* pass it as regular rx to mac80211 */
index 70ebecb..3a45852 100644 (file)
@@ -390,7 +390,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
        if (mvm->trans->max_skb_frags)
                hw->netdev_features = NETIF_F_HIGHDMA | NETIF_F_SG;
 
-       hw->queues = IEEE80211_MAX_QUEUES;
+       hw->queues = IEEE80211_NUM_ACS;
        hw->offchannel_tx_hw_queue = IWL_MVM_OFFCHANNEL_QUEUE;
        hw->radiotap_mcs_details |= IEEE80211_RADIOTAP_MCS_HAVE_FEC |
                                    IEEE80211_RADIOTAP_MCS_HAVE_STBC;
@@ -762,11 +762,11 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw,
            !test_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status))
                goto drop;
 
-       /* treat non-bufferable MMPDUs on AP interfaces as broadcast */
-       if ((info->control.vif->type == NL80211_IFTYPE_AP ||
-            info->control.vif->type == NL80211_IFTYPE_ADHOC) &&
-           ieee80211_is_mgmt(hdr->frame_control) &&
-           !ieee80211_is_bufferable_mmpdu(hdr->frame_control))
+       /*
+        * bufferable MMPDUs or MMPDUs on STA interfaces come via TXQs
+        * so we treat the others as broadcast
+        */
+       if (ieee80211_is_mgmt(hdr->frame_control))
                sta = NULL;
 
        /* If there is no sta, and it's not offchannel - send through AP */
@@ -2440,6 +2440,9 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm,
                IWL_DEBUG_MAC80211(mvm, "arp filter changed\n");
                iwl_mvm_configure_bcast_filter(mvm);
        }
+
+       if (changes & BSS_CHANGED_BANDWIDTH)
+               iwl_mvm_apply_fw_smps_request(vif);
 }
 
 static int iwl_mvm_start_ap_ibss(struct ieee80211_hw *hw,
@@ -2987,16 +2990,20 @@ static void iwl_mvm_check_he_obss_narrow_bw_ru_iter(struct wiphy *wiphy,
                                                    void *_data)
 {
        struct iwl_mvm_he_obss_narrow_bw_ru_data *data = _data;
+       const struct cfg80211_bss_ies *ies;
        const struct element *elem;
 
-       elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, bss->ies->data,
-                                 bss->ies->len);
+       rcu_read_lock();
+       ies = rcu_dereference(bss->ies);
+       elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, ies->data,
+                                 ies->len);
 
        if (!elem || elem->datalen < 10 ||
            !(elem->data[10] &
              WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT)) {
                data->tolerated = false;
        }
+       rcu_read_unlock();
 }
 
 static void iwl_mvm_check_he_obss_narrow_bw_ru(struct ieee80211_hw *hw,
@@ -5035,22 +5042,14 @@ static void iwl_mvm_event_mlme_callback_ini(struct iwl_mvm *mvm,
                                            struct ieee80211_vif *vif,
                                            const  struct ieee80211_mlme_event *mlme)
 {
-       if (mlme->data == ASSOC_EVENT && (mlme->status == MLME_DENIED ||
-                                         mlme->status == MLME_TIMEOUT)) {
+       if ((mlme->data == ASSOC_EVENT || mlme->data == AUTH_EVENT) &&
+           (mlme->status == MLME_DENIED || mlme->status == MLME_TIMEOUT)) {
                iwl_dbg_tlv_time_point(&mvm->fwrt,
                                       IWL_FW_INI_TIME_POINT_ASSOC_FAILED,
                                       NULL);
                return;
        }
 
-       if (mlme->data == AUTH_EVENT && (mlme->status == MLME_DENIED ||
-                                        mlme->status == MLME_TIMEOUT)) {
-               iwl_dbg_tlv_time_point(&mvm->fwrt,
-                                      IWL_FW_INI_TIME_POINT_EAPOL_FAILED,
-                                      NULL);
-               return;
-       }
-
        if (mlme->data == DEAUTH_RX_EVENT || mlme->data == DEAUTH_TX_EVENT) {
                iwl_dbg_tlv_time_point(&mvm->fwrt,
                                       IWL_FW_INI_TIME_POINT_DEASSOC,
index b50942f..f877d86 100644 (file)
@@ -431,8 +431,6 @@ struct iwl_mvm_vif {
 static inline struct iwl_mvm_vif *
 iwl_mvm_vif_from_mac80211(struct ieee80211_vif *vif)
 {
-       if (!vif)
-               return NULL;
        return (void *)vif->drv_priv;
 }
 
@@ -2045,6 +2043,7 @@ void iwl_mvm_event_frame_timeout_callback(struct iwl_mvm *mvm,
 int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b);
 int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm);
 int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm);
+void iwl_mvm_get_acpi_tables(struct iwl_mvm *mvm);
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 void iwl_mvm_sta_add_debugfs(struct ieee80211_hw *hw,
                             struct ieee80211_vif *vif,
index 7fb4e61..da705fc 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2012-2014, 2018-2019 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2019, 2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -416,7 +416,7 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2,
        struct iwl_rx_packet *pkt;
        struct iwl_host_cmd cmd = {
                .id = MCC_UPDATE_CMD,
-               .flags = CMD_WANT_SKB,
+               .flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
                .data = { &mcc_update_cmd },
        };
 
index 20e8d34..6f60018 100644 (file)
@@ -78,7 +78,6 @@ module_exit(iwl_mvm_exit);
 static void iwl_mvm_nic_config(struct iwl_op_mode *op_mode)
 {
        struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
-       struct iwl_trans_debug *dbg = &mvm->trans->dbg;
        u8 radio_cfg_type, radio_cfg_step, radio_cfg_dash;
        u32 reg_val = 0;
        u32 phy_config = iwl_mvm_get_phy_config(mvm);
@@ -115,10 +114,7 @@ static void iwl_mvm_nic_config(struct iwl_op_mode *op_mode)
        if (mvm->trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_8000)
                reg_val |= CSR_HW_IF_CONFIG_REG_BIT_RADIO_SI;
 
-       if (iwl_fw_dbg_is_d3_debug_enabled(&mvm->fwrt) ||
-           (iwl_trans_dbg_ini_valid(mvm->trans) &&
-            dbg->fw_mon_cfg[IWL_FW_INI_ALLOCATION_ID_INTERNAL].buf_location)
-           )
+       if (iwl_fw_dbg_is_d3_debug_enabled(&mvm->fwrt))
                reg_val |= CSR_HW_IF_CONFIG_REG_D3_DEBUG;
 
        iwl_trans_set_bits_mask(mvm->trans, CSR_HW_IF_CONFIG_REG,
@@ -214,11 +210,14 @@ void iwl_mvm_apply_fw_smps_request(struct ieee80211_vif *vif)
 {
        struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
        struct iwl_mvm *mvm = mvmvif->mvm;
+       enum ieee80211_smps_mode mode = IEEE80211_SMPS_AUTOMATIC;
 
-       iwl_mvm_update_smps(mvm, vif, IWL_MVM_SMPS_REQ_FW,
-                           mvm->fw_static_smps_request ?
-                               IEEE80211_SMPS_STATIC :
-                               IEEE80211_SMPS_AUTOMATIC);
+       if (mvm->fw_static_smps_request &&
+           vif->bss_conf.chandef.width == NL80211_CHAN_WIDTH_160 &&
+           vif->bss_conf.he_support)
+               mode = IEEE80211_SMPS_STATIC;
+
+       iwl_mvm_update_smps(mvm, vif, IWL_MVM_SMPS_REQ_FW, mode);
 }
 
 static void iwl_mvm_intf_dual_chain_req(void *data, u8 *mac,
@@ -374,7 +373,7 @@ static const struct iwl_rx_handlers iwl_mvm_rx_handlers[] = {
                       struct iwl_mfu_assert_dump_notif),
        RX_HANDLER_GRP(PROT_OFFLOAD_GROUP, STORED_BEACON_NTF,
                       iwl_mvm_rx_stored_beacon_notif, RX_HANDLER_SYNC,
-                      struct iwl_stored_beacon_notif),
+                      struct iwl_stored_beacon_notif_v2),
        RX_HANDLER_GRP(DATA_PATH_GROUP, MU_GROUP_MGMT_NOTIF,
                       iwl_mvm_mu_mimo_grp_notif, RX_HANDLER_SYNC,
                       struct iwl_mu_group_mgmt_notif),
@@ -693,11 +692,16 @@ static int iwl_mvm_start_get_nvm(struct iwl_mvm *mvm)
 
        if (ret && ret != -ERFKILL)
                iwl_fw_dbg_error_collect(&mvm->fwrt, FW_DBG_TRIGGER_DRIVER);
+       if (!ret && iwl_mvm_is_lar_supported(mvm)) {
+               mvm->hw->wiphy->regulatory_flags |= REGULATORY_WIPHY_SELF_MANAGED;
+               ret = iwl_mvm_init_mcc(mvm);
+       }
 
        if (!iwlmvm_mod_params.init_dbg || !ret)
                iwl_mvm_stop_device(mvm);
 
        mutex_unlock(&mvm->mutex);
+       rtnl_unlock();
 
        if (ret < 0)
                IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", ret);
@@ -772,6 +776,8 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
        iwl_fw_runtime_init(&mvm->fwrt, trans, fw, &iwl_mvm_fwrt_ops, mvm,
                            dbgfs_dir);
 
+       iwl_mvm_get_acpi_tables(mvm);
+
        mvm->init_status = 0;
 
        if (iwl_mvm_has_new_rx_api(mvm)) {
@@ -792,10 +798,26 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 
        mvm->fw_restart = iwlwifi_mod_params.fw_restart ? -1 : 0;
 
-       mvm->aux_queue = IWL_MVM_DQA_AUX_QUEUE;
-       mvm->snif_queue = IWL_MVM_DQA_INJECT_MONITOR_QUEUE;
-       mvm->probe_queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
-       mvm->p2p_dev_queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE;
+       if (iwl_mvm_has_new_tx_api(mvm)) {
+               /*
+                * If we have the new TX/queue allocation API initialize them
+                * all to invalid numbers. We'll rewrite the ones that we need
+                * later, but that doesn't happen for all of them all of the
+                * time (e.g. P2P Device is optional), and if a dynamic queue
+                * ends up getting number 2 (IWL_MVM_DQA_P2P_DEVICE_QUEUE) then
+                * iwl_mvm_is_static_queue() erroneously returns true, and we
+                * might have things getting stuck.
+                */
+               mvm->aux_queue = IWL_MVM_INVALID_QUEUE;
+               mvm->snif_queue = IWL_MVM_INVALID_QUEUE;
+               mvm->probe_queue = IWL_MVM_INVALID_QUEUE;
+               mvm->p2p_dev_queue = IWL_MVM_INVALID_QUEUE;
+       } else {
+               mvm->aux_queue = IWL_MVM_DQA_AUX_QUEUE;
+               mvm->snif_queue = IWL_MVM_DQA_INJECT_MONITOR_QUEUE;
+               mvm->probe_queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
+               mvm->p2p_dev_queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE;
+       }
 
        mvm->sf_state = SF_UNINIT;
        if (iwl_mvm_has_unified_ucode(mvm))
@@ -1400,7 +1422,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
         * can't recover this since we're already half suspended.
         */
        if (!mvm->fw_restart && fw_error) {
-               iwl_fw_error_collect(&mvm->fwrt);
+               iwl_fw_error_collect(&mvm->fwrt, false);
        } else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
                struct iwl_mvm_reprobe *reprobe;
 
@@ -1451,7 +1473,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
                        }
                }
 
-               iwl_fw_error_collect(&mvm->fwrt);
+               iwl_fw_error_collect(&mvm->fwrt, false);
 
                if (fw_error && mvm->fw_restart > 0)
                        mvm->fw_restart--;
@@ -1459,13 +1481,31 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
        }
 }
 
-static void iwl_mvm_nic_error(struct iwl_op_mode *op_mode)
+static void iwl_mvm_nic_error(struct iwl_op_mode *op_mode, bool sync)
 {
        struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
 
        if (!test_bit(STATUS_TRANS_DEAD, &mvm->trans->status))
                iwl_mvm_dump_nic_error_log(mvm);
 
+       if (sync) {
+               iwl_fw_error_collect(&mvm->fwrt, true);
+               /*
+                * Currently, the only case for sync=true is during
+                * shutdown, so just stop in this case. If/when that
+                * changes, we need to be a bit smarter here.
+                */
+               return;
+       }
+
+       /*
+        * If the firmware crashes while we're already considering it
+        * to be dead then don't ask for a restart, that cannot do
+        * anything useful anyway.
+        */
+       if (!test_bit(IWL_MVM_STATUS_FIRMWARE_RUNNING, &mvm->status))
+               return;
+
        iwl_mvm_nic_restart(mvm, true);
 }
 
index 0b81806..4434421 100644 (file)
@@ -11,7 +11,7 @@
  * DDR needs frequency in units of 16.666MHz, so provide FW with the
  * frequency values in the adjusted format.
  */
-const static struct iwl_rfi_lut_entry iwl_rfi_table[IWL_RFI_LUT_SIZE] = {
+static const struct iwl_rfi_lut_entry iwl_rfi_table[IWL_RFI_LUT_SIZE] = {
        /* LPDDR4 */
 
        /* frequency 3733MHz */
index c0babb8..c12f303 100644 (file)
@@ -69,8 +69,8 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb,
 
        /* if we are here - this for sure is either CCMP or GCMP */
        if (IS_ERR_OR_NULL(sta)) {
-               IWL_ERR(mvm,
-                       "expected hw-decrypted unicast frame for station\n");
+               IWL_DEBUG_DROP(mvm,
+                              "expected hw-decrypted unicast frame for station\n");
                return -1;
        }
 
@@ -279,7 +279,6 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
 {
        struct iwl_mvm_sta *mvmsta;
        struct iwl_mvm_vif *mvmvif;
-       u8 fwkeyid = u32_get_bits(status, IWL_RX_MPDU_STATUS_KEY);
        u8 keyid;
        struct ieee80211_key_conf *key;
        u32 len = le16_to_cpu(desc->mpdu_len);
@@ -299,6 +298,10 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
        if (!ieee80211_is_beacon(hdr->frame_control))
                return 0;
 
+       /* key mismatch - will also report !MIC_OK but we shouldn't count it */
+       if (!(status & IWL_RX_MPDU_STATUS_KEY_VALID))
+               return -1;
+
        /* good cases */
        if (likely(status & IWL_RX_MPDU_STATUS_MIC_OK &&
                   !(status & IWL_RX_MPDU_STATUS_REPLAY_ERROR)))
@@ -309,26 +312,36 @@ static int iwl_mvm_rx_mgmt_prot(struct ieee80211_sta *sta,
 
        mvmsta = iwl_mvm_sta_from_mac80211(sta);
 
-       /* what? */
-       if (fwkeyid != 6 && fwkeyid != 7)
-               return -1;
-
        mvmvif = iwl_mvm_vif_from_mac80211(mvmsta->vif);
 
-       key = rcu_dereference(mvmvif->bcn_prot.keys[fwkeyid - 6]);
-       if (!key)
-               return -1;
+       /*
+        * both keys will have the same cipher and MIC length, use
+        * whichever one is available
+        */
+       key = rcu_dereference(mvmvif->bcn_prot.keys[0]);
+       if (!key) {
+               key = rcu_dereference(mvmvif->bcn_prot.keys[1]);
+               if (!key)
+                       return -1;
+       }
 
        if (len < key->icv_len + IEEE80211_GMAC_PN_LEN + 2)
                return -1;
 
-       /*
-        * See if the key ID matches - if not this may be due to a
-        * switch and the firmware may erroneously report !MIC_OK.
-        */
+       /* get the real key ID */
        keyid = frame[len - key->icv_len - IEEE80211_GMAC_PN_LEN - 2];
-       if (keyid != fwkeyid)
-               return -1;
+       /* and if that's the other key, look it up */
+       if (keyid != key->keyidx) {
+               /*
+                * shouldn't happen since firmware checked, but be safe
+                * in case the MIC length is wrong too, for example
+                */
+               if (keyid != 6 && keyid != 7)
+                       return -1;
+               key = rcu_dereference(mvmvif->bcn_prot.keys[keyid - 6]);
+               if (!key)
+                       return -1;
+       }
 
        /* Report status to mac80211 */
        if (!(status & IWL_RX_MPDU_STATUS_MIC_OK))
index 0368b71..d78e436 100644 (file)
@@ -1648,7 +1648,7 @@ iwl_mvm_umac_scan_cfg_channels_v6(struct iwl_mvm *mvm,
                struct iwl_scan_channel_cfg_umac *cfg = &cp->channel_config[i];
                u32 n_aps_flag =
                        iwl_mvm_scan_ch_n_aps_flag(vif_type,
-                                                  cfg->v2.channel_num);
+                                                  channels[i]->hw_value);
 
                cfg->flags = cpu_to_le32(flags | n_aps_flag);
                cfg->v2.channel_num = channels[i]->hw_value;
@@ -1661,22 +1661,32 @@ iwl_mvm_umac_scan_cfg_channels_v6(struct iwl_mvm *mvm,
 }
 
 static int
-iwl_mvm_umac_scan_fill_6g_chan_list(struct iwl_mvm_scan_params *params,
-                                   __le32 *cmd_short_ssid, u8 *cmd_bssid,
-                                   u8 *scan_ssid_num, u8 *bssid_num)
+iwl_mvm_umac_scan_fill_6g_chan_list(struct iwl_mvm *mvm,
+                                   struct iwl_mvm_scan_params *params,
+                                    struct iwl_scan_probe_params_v4 *pp)
 {
        int j, idex_s = 0, idex_b = 0;
        struct cfg80211_scan_6ghz_params *scan_6ghz_params =
                params->scan_6ghz_params;
+       bool hidden_supported = fw_has_capa(&mvm->fw->ucode_capa,
+                                           IWL_UCODE_TLV_CAPA_HIDDEN_6GHZ_SCAN);
 
-       if (!params->n_6ghz_params) {
-               for (j = 0; j < params->n_ssids; j++) {
-                       cmd_short_ssid[idex_s++] =
-                               cpu_to_le32(~crc32_le(~0, params->ssids[j].ssid,
-                                                     params->ssids[j].ssid_len));
-                       (*scan_ssid_num)++;
+       for (j = 0; j < params->n_ssids && idex_s < SCAN_SHORT_SSID_MAX_SIZE;
+            j++) {
+               if (!params->ssids[j].ssid_len)
+                       continue;
+
+               pp->short_ssid[idex_s] =
+                       cpu_to_le32(~crc32_le(~0, params->ssids[j].ssid,
+                                             params->ssids[j].ssid_len));
+
+               if (hidden_supported) {
+                       pp->direct_scan[idex_s].id = WLAN_EID_SSID;
+                       pp->direct_scan[idex_s].len = params->ssids[j].ssid_len;
+                       memcpy(pp->direct_scan[idex_s].ssid, params->ssids[j].ssid,
+                              params->ssids[j].ssid_len);
                }
-               return 0;
+               idex_s++;
        }
 
        /*
@@ -1693,40 +1703,40 @@ iwl_mvm_umac_scan_fill_6g_chan_list(struct iwl_mvm_scan_params *params,
                /* First, try to place the short SSID */
                if (scan_6ghz_params[j].short_ssid_valid) {
                        for (k = 0; k < idex_s; k++) {
-                               if (cmd_short_ssid[k] ==
+                               if (pp->short_ssid[k] ==
                                    cpu_to_le32(scan_6ghz_params[j].short_ssid))
                                        break;
                        }
 
                        if (k == idex_s && idex_s < SCAN_SHORT_SSID_MAX_SIZE) {
-                               cmd_short_ssid[idex_s++] =
+                               pp->short_ssid[idex_s++] =
                                        cpu_to_le32(scan_6ghz_params[j].short_ssid);
-                               (*scan_ssid_num)++;
                        }
                }
 
                /* try to place BSSID for the same entry */
                for (k = 0; k < idex_b; k++) {
-                       if (!memcmp(&cmd_bssid[ETH_ALEN * k],
+                       if (!memcmp(&pp->bssid_array[k],
                                    scan_6ghz_params[j].bssid, ETH_ALEN))
                                break;
                }
 
                if (k == idex_b && idex_b < SCAN_BSSID_MAX_SIZE) {
-                       memcpy(&cmd_bssid[ETH_ALEN * idex_b++],
+                       memcpy(&pp->bssid_array[idex_b++],
                               scan_6ghz_params[j].bssid, ETH_ALEN);
-                       (*bssid_num)++;
                }
        }
+
+       pp->short_ssid_num = idex_s;
+       pp->bssid_num = idex_b;
        return 0;
 }
 
 /* TODO: this function can be merged with iwl_mvm_scan_umac_fill_ch_p_v6 */
 static void
 iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
-                                    u32 n_channels, __le32 *cmd_short_ssid,
-                                    u8 *cmd_bssid, u8 scan_ssid_num,
-                                    u8 bssid_num,
+                                    u32 n_channels,
+                                    struct iwl_scan_probe_params_v4 *pp,
                                     struct iwl_scan_channel_params_v6 *cp,
                                     enum nl80211_iftype vif_type)
 {
@@ -1741,7 +1751,7 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
 
                u32 s_ssid_bitmap = 0, bssid_bitmap = 0, flags = 0;
                u8 j, k, s_max = 0, b_max = 0, n_used_bssid_entries;
-               bool force_passive, found = false,
+               bool force_passive, found = false, allow_passive = true,
                     unsolicited_probe_on_chan = false, psc_no_listen = false;
 
                cfg->v1.channel_num = params->channels[i]->hw_value;
@@ -1766,9 +1776,9 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
                                scan_6ghz_params[j].unsolicited_probe;
                        psc_no_listen |= scan_6ghz_params[j].psc_no_listen;
 
-                       for (k = 0; k < scan_ssid_num; k++) {
+                       for (k = 0; k < pp->short_ssid_num; k++) {
                                if (!scan_6ghz_params[j].unsolicited_probe &&
-                                   le32_to_cpu(cmd_short_ssid[k]) ==
+                                   le32_to_cpu(pp->short_ssid[k]) ==
                                    scan_6ghz_params[j].short_ssid) {
                                        /* Relevant short SSID bit set */
                                        if (s_ssid_bitmap & BIT(k)) {
@@ -1778,7 +1788,10 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
 
                                        /*
                                         * Use short SSID only to create a new
-                                        * iteration during channel dwell.
+                                        * iteration during channel dwell or in
+                                        * case that the short SSID has a
+                                        * matching SSID, i.e., scan for hidden
+                                        * APs.
                                         */
                                        if (n_used_bssid_entries >= 3) {
                                                s_ssid_bitmap |= BIT(k);
@@ -1786,6 +1799,12 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
                                                n_used_bssid_entries -= 3;
                                                found = true;
                                                break;
+                                       } else if (pp->direct_scan[k].len) {
+                                               s_ssid_bitmap |= BIT(k);
+                                               s_max++;
+                                               found = true;
+                                               allow_passive = false;
+                                               break;
                                        }
                                }
                        }
@@ -1793,8 +1812,8 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
                        if (found)
                                continue;
 
-                       for (k = 0; k < bssid_num; k++) {
-                               if (!memcmp(&cmd_bssid[ETH_ALEN * k],
+                       for (k = 0; k < pp->bssid_num; k++) {
+                               if (!memcmp(&pp->bssid_array[k],
                                            scan_6ghz_params[j].bssid,
                                            ETH_ALEN)) {
                                        if (!(bssid_bitmap & BIT(k))) {
@@ -1849,7 +1868,7 @@ iwl_mvm_umac_scan_cfg_channels_v6_6g(struct iwl_mvm_scan_params *params,
                        force_passive |= (unsolicited_probe_on_chan &&
                                          (s_max > 1 || b_max > 3));
                }
-               if (force_passive ||
+               if ((allow_passive && force_passive) ||
                    (!flags && !cfg80211_channel_is_psc(params->channels[i])))
                        flags |= IWL_UHB_CHAN_CFG_FLAG_FORCE_PASSIVE;
 
@@ -2368,32 +2387,28 @@ static int iwl_mvm_scan_umac_v14(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
        if (ret)
                return ret;
 
-       iwl_mvm_scan_umac_fill_probe_p_v4(params, &scan_p->probe_params,
-                                         &bitmap_ssid);
        if (!params->scan_6ghz) {
+               iwl_mvm_scan_umac_fill_probe_p_v4(params, &scan_p->probe_params,
+                                         &bitmap_ssid);
                iwl_mvm_scan_umac_fill_ch_p_v6(mvm, params, vif,
-                                              &scan_p->channel_params, bitmap_ssid);
+                                      &scan_p->channel_params, bitmap_ssid);
 
                return 0;
+       } else {
+               pb->preq = params->preq;
        }
+
        cp->flags = iwl_mvm_scan_umac_chan_flags_v2(mvm, params, vif);
        cp->n_aps_override[0] = IWL_SCAN_ADWELL_N_APS_GO_FRIENDLY;
        cp->n_aps_override[1] = IWL_SCAN_ADWELL_N_APS_SOCIAL_CHS;
 
-       ret = iwl_mvm_umac_scan_fill_6g_chan_list(params, pb->short_ssid,
-                                                 pb->bssid_array[0],
-                                                 &pb->short_ssid_num,
-                                                 &pb->bssid_num);
+       ret = iwl_mvm_umac_scan_fill_6g_chan_list(mvm, params, pb);
        if (ret)
                return ret;
 
        iwl_mvm_umac_scan_cfg_channels_v6_6g(params,
                                             params->n_channels,
-                                            pb->short_ssid,
-                                            pb->bssid_array[0],
-                                            pb->short_ssid_num,
-                                            pb->bssid_num, cp,
-                                            vif->type);
+                                            pb, cp, vif->type);
        cp->count = params->n_channels;
        if (!params->n_ssids ||
            (params->n_ssids == 1 && !params->ssids[0].ssid_len))
index 9c45a64..a64874c 100644 (file)
@@ -316,8 +316,9 @@ static int iwl_mvm_invalidate_sta_queue(struct iwl_mvm *mvm, int queue,
 }
 
 static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
-                              int queue, u8 tid, u8 flags)
+                              u16 *queueptr, u8 tid, u8 flags)
 {
+       int queue = *queueptr;
        struct iwl_scd_txq_cfg_cmd cmd = {
                .scd_queue = queue,
                .action = SCD_CFG_DISABLE_QUEUE,
@@ -326,6 +327,7 @@ static int iwl_mvm_disable_txq(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 
        if (iwl_mvm_has_new_tx_api(mvm)) {
                iwl_trans_txq_free(mvm->trans, queue);
+               *queueptr = IWL_MVM_INVALID_QUEUE;
 
                return 0;
        }
@@ -487,6 +489,7 @@ static int iwl_mvm_free_inactive_queue(struct iwl_mvm *mvm, int queue,
        u8 sta_id, tid;
        unsigned long disable_agg_tids = 0;
        bool same_sta;
+       u16 queue_tmp = queue;
        int ret;
 
        lockdep_assert_held(&mvm->mutex);
@@ -509,7 +512,7 @@ static int iwl_mvm_free_inactive_queue(struct iwl_mvm *mvm, int queue,
                iwl_mvm_invalidate_sta_queue(mvm, queue,
                                             disable_agg_tids, false);
 
-       ret = iwl_mvm_disable_txq(mvm, old_sta, queue, tid, 0);
+       ret = iwl_mvm_disable_txq(mvm, old_sta, &queue_tmp, tid, 0);
        if (ret) {
                IWL_ERR(mvm,
                        "Failed to free inactive queue %d (ret=%d)\n",
@@ -1184,6 +1187,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm,
        unsigned int wdg_timeout =
                iwl_mvm_get_wd_timeout(mvm, mvmsta->vif, false, false);
        int queue = -1;
+       u16 queue_tmp;
        unsigned long disable_agg_tids = 0;
        enum iwl_mvm_agg_state queue_state;
        bool shared_queue = false, inc_ssn;
@@ -1332,7 +1336,8 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm,
        return 0;
 
 out_err:
-       iwl_mvm_disable_txq(mvm, sta, queue, tid, 0);
+       queue_tmp = queue;
+       iwl_mvm_disable_txq(mvm, sta, &queue_tmp, tid, 0);
 
        return ret;
 }
@@ -1779,7 +1784,7 @@ static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm,
                if (mvm_sta->tid_data[i].txq_id == IWL_MVM_INVALID_QUEUE)
                        continue;
 
-               iwl_mvm_disable_txq(mvm, sta, mvm_sta->tid_data[i].txq_id, i,
+               iwl_mvm_disable_txq(mvm, sta, &mvm_sta->tid_data[i].txq_id, i,
                                    0);
                mvm_sta->tid_data[i].txq_id = IWL_MVM_INVALID_QUEUE;
        }
@@ -1987,7 +1992,7 @@ static int iwl_mvm_add_int_sta_with_queue(struct iwl_mvm *mvm, int macidx,
        ret = iwl_mvm_add_int_sta_common(mvm, sta, addr, macidx, maccolor);
        if (ret) {
                if (!iwl_mvm_has_new_tx_api(mvm))
-                       iwl_mvm_disable_txq(mvm, NULL, *queue,
+                       iwl_mvm_disable_txq(mvm, NULL, queue,
                                            IWL_MAX_TID_COUNT, 0);
                return ret;
        }
@@ -2060,7 +2065,7 @@ int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
        if (WARN_ON_ONCE(mvm->snif_sta.sta_id == IWL_MVM_INVALID_STA))
                return -EINVAL;
 
-       iwl_mvm_disable_txq(mvm, NULL, mvm->snif_queue, IWL_MAX_TID_COUNT, 0);
+       iwl_mvm_disable_txq(mvm, NULL, &mvm->snif_queue, IWL_MAX_TID_COUNT, 0);
        ret = iwl_mvm_rm_sta_common(mvm, mvm->snif_sta.sta_id);
        if (ret)
                IWL_WARN(mvm, "Failed sending remove station\n");
@@ -2077,7 +2082,7 @@ int iwl_mvm_rm_aux_sta(struct iwl_mvm *mvm)
        if (WARN_ON_ONCE(mvm->aux_sta.sta_id == IWL_MVM_INVALID_STA))
                return -EINVAL;
 
-       iwl_mvm_disable_txq(mvm, NULL, mvm->aux_queue, IWL_MAX_TID_COUNT, 0);
+       iwl_mvm_disable_txq(mvm, NULL, &mvm->aux_queue, IWL_MAX_TID_COUNT, 0);
        ret = iwl_mvm_rm_sta_common(mvm, mvm->aux_sta.sta_id);
        if (ret)
                IWL_WARN(mvm, "Failed sending remove station\n");
@@ -2173,7 +2178,7 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm,
                                          struct ieee80211_vif *vif)
 {
        struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
-       int queue;
+       u16 *queueptr, queue;
 
        lockdep_assert_held(&mvm->mutex);
 
@@ -2182,10 +2187,10 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm,
        switch (vif->type) {
        case NL80211_IFTYPE_AP:
        case NL80211_IFTYPE_ADHOC:
-               queue = mvm->probe_queue;
+               queueptr = &mvm->probe_queue;
                break;
        case NL80211_IFTYPE_P2P_DEVICE:
-               queue = mvm->p2p_dev_queue;
+               queueptr = &mvm->p2p_dev_queue;
                break;
        default:
                WARN(1, "Can't free bcast queue on vif type %d\n",
@@ -2193,7 +2198,8 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm,
                return;
        }
 
-       iwl_mvm_disable_txq(mvm, NULL, queue, IWL_MAX_TID_COUNT, 0);
+       queue = *queueptr;
+       iwl_mvm_disable_txq(mvm, NULL, queueptr, IWL_MAX_TID_COUNT, 0);
        if (iwl_mvm_has_new_tx_api(mvm))
                return;
 
@@ -2428,7 +2434,7 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 
        iwl_mvm_flush_sta(mvm, &mvmvif->mcast_sta, true);
 
-       iwl_mvm_disable_txq(mvm, NULL, mvmvif->cab_queue, 0, 0);
+       iwl_mvm_disable_txq(mvm, NULL, &mvmvif->cab_queue, 0, 0);
 
        ret = iwl_mvm_rm_sta_common(mvm, mvmvif->mcast_sta.sta_id);
        if (ret)
@@ -3190,6 +3196,20 @@ static struct iwl_mvm_sta *iwl_mvm_get_key_sta(struct iwl_mvm *mvm,
        return NULL;
 }
 
+static int iwl_mvm_pn_cmp(const u8 *pn1, const u8 *pn2, int len)
+{
+       int i;
+
+       for (i = len - 1; i >= 0; i--) {
+               if (pn1[i] > pn2[i])
+                       return 1;
+               if (pn1[i] < pn2[i])
+                       return -1;
+       }
+
+       return 0;
+}
+
 static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
                                u32 sta_id,
                                struct ieee80211_key_conf *key, bool mcast,
@@ -3208,6 +3228,9 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
        int i, size;
        bool new_api = fw_has_api(&mvm->fw->ucode_capa,
                                  IWL_UCODE_TLV_API_TKIP_MIC_KEYS);
+       int api_ver = iwl_fw_lookup_cmd_ver(mvm->fw, LONG_GROUP,
+                                           ADD_STA_KEY,
+                                           new_api ? 2 : 1);
 
        if (sta_id == IWL_MVM_INVALID_STA)
                return -EINVAL;
@@ -3220,7 +3243,7 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
        switch (key->cipher) {
        case WLAN_CIPHER_SUITE_TKIP:
                key_flags |= cpu_to_le16(STA_KEY_FLG_TKIP);
-               if (new_api) {
+               if (api_ver >= 2) {
                        memcpy((void *)&u.cmd.tx_mic_key,
                               &key->key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY],
                               IWL_MIC_KEY_SIZE);
@@ -3241,7 +3264,7 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
        case WLAN_CIPHER_SUITE_CCMP:
                key_flags |= cpu_to_le16(STA_KEY_FLG_CCM);
                memcpy(u.cmd.common.key, key->key, key->keylen);
-               if (new_api)
+               if (api_ver >= 2)
                        pn = atomic64_read(&key->tx_pn);
                break;
        case WLAN_CIPHER_SUITE_WEP104:
@@ -3257,7 +3280,7 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
        case WLAN_CIPHER_SUITE_GCMP:
                key_flags |= cpu_to_le16(STA_KEY_FLG_GCMP);
                memcpy(u.cmd.common.key, key->key, key->keylen);
-               if (new_api)
+               if (api_ver >= 2)
                        pn = atomic64_read(&key->tx_pn);
                break;
        default:
@@ -3274,7 +3297,46 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
        u.cmd.common.key_flags = key_flags;
        u.cmd.common.sta_id = sta_id;
 
-       if (new_api) {
+       if (key->cipher == WLAN_CIPHER_SUITE_TKIP)
+               i = 0;
+       else
+               i = -1;
+
+       for (; i < IEEE80211_NUM_TIDS; i++) {
+               struct ieee80211_key_seq seq = {};
+               u8 _rx_pn[IEEE80211_MAX_PN_LEN] = {}, *rx_pn = _rx_pn;
+               int rx_pn_len = 8;
+               /* there's a hole at 2/3 in FW format depending on version */
+               int hole = api_ver >= 3 ? 0 : 2;
+
+               ieee80211_get_key_rx_seq(key, i, &seq);
+
+               if (key->cipher == WLAN_CIPHER_SUITE_TKIP) {
+                       rx_pn[0] = seq.tkip.iv16;
+                       rx_pn[1] = seq.tkip.iv16 >> 8;
+                       rx_pn[2 + hole] = seq.tkip.iv32;
+                       rx_pn[3 + hole] = seq.tkip.iv32 >> 8;
+                       rx_pn[4 + hole] = seq.tkip.iv32 >> 16;
+                       rx_pn[5 + hole] = seq.tkip.iv32 >> 24;
+               } else if (key_flags & cpu_to_le16(STA_KEY_FLG_EXT)) {
+                       rx_pn = seq.hw.seq;
+                       rx_pn_len = seq.hw.seq_len;
+               } else {
+                       rx_pn[0] = seq.ccmp.pn[0];
+                       rx_pn[1] = seq.ccmp.pn[1];
+                       rx_pn[2 + hole] = seq.ccmp.pn[2];
+                       rx_pn[3 + hole] = seq.ccmp.pn[3];
+                       rx_pn[4 + hole] = seq.ccmp.pn[4];
+                       rx_pn[5 + hole] = seq.ccmp.pn[5];
+               }
+
+               if (iwl_mvm_pn_cmp(rx_pn, (u8 *)&u.cmd.common.rx_secur_seq_cnt,
+                                  rx_pn_len) > 0)
+                       memcpy(&u.cmd.common.rx_secur_seq_cnt, rx_pn,
+                              rx_pn_len);
+       }
+
+       if (api_ver >= 2) {
                u.cmd.transmit_seq_cnt = cpu_to_le64(pn);
                size = sizeof(u.cmd);
        } else {
@@ -3411,7 +3473,6 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
                                 u8 key_offset,
                                 bool mcast)
 {
-       int ret;
        const u8 *addr;
        struct ieee80211_key_seq seq;
        u16 p1k[5];
@@ -3433,30 +3494,19 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
                return -EINVAL;
        }
 
-       switch (keyconf->cipher) {
-       case WLAN_CIPHER_SUITE_TKIP:
+       if (keyconf->cipher == WLAN_CIPHER_SUITE_TKIP) {
                addr = iwl_mvm_get_mac_addr(mvm, vif, sta);
                /* get phase 1 key from mac80211 */
                ieee80211_get_key_rx_seq(keyconf, 0, &seq);
                ieee80211_get_tkip_rx_p1k(keyconf, addr, seq.tkip.iv32, p1k);
-               ret = iwl_mvm_send_sta_key(mvm, sta_id, keyconf, mcast,
-                                          seq.tkip.iv32, p1k, 0, key_offset,
-                                          mfp);
-               break;
-       case WLAN_CIPHER_SUITE_CCMP:
-       case WLAN_CIPHER_SUITE_WEP40:
-       case WLAN_CIPHER_SUITE_WEP104:
-       case WLAN_CIPHER_SUITE_GCMP:
-       case WLAN_CIPHER_SUITE_GCMP_256:
-               ret = iwl_mvm_send_sta_key(mvm, sta_id, keyconf, mcast,
-                                          0, NULL, 0, key_offset, mfp);
-               break;
-       default:
-               ret = iwl_mvm_send_sta_key(mvm, sta_id, keyconf, mcast,
-                                          0, NULL, 0, key_offset, mfp);
+
+               return iwl_mvm_send_sta_key(mvm, sta_id, keyconf, mcast,
+                                           seq.tkip.iv32, p1k, 0, key_offset,
+                                           mfp);
        }
 
-       return ret;
+       return iwl_mvm_send_sta_key(mvm, sta_id, keyconf, mcast,
+                                   0, NULL, 0, key_offset, mfp);
 }
 
 int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
index d3307a1..25af88a 100644 (file)
@@ -168,6 +168,16 @@ static bool iwl_mvm_te_check_disconnect(struct iwl_mvm *mvm,
                rcu_read_unlock();
        }
 
+       if (vif->bss_conf.assoc) {
+               /*
+                * When not associated, this will be called from
+                * iwl_mvm_event_mlme_callback_ini()
+                */
+               iwl_dbg_tlv_time_point(&mvm->fwrt,
+                                      IWL_FW_INI_TIME_POINT_ASSOC_FAILED,
+                                      NULL);
+       }
+
        iwl_mvm_connection_loss(mvm, vif, errmsg);
        return true;
 }
@@ -246,6 +256,18 @@ static void iwl_mvm_te_check_trigger(struct iwl_mvm *mvm,
        }
 }
 
+static void iwl_mvm_p2p_roc_finished(struct iwl_mvm *mvm)
+{
+       /*
+        * If the IWL_MVM_STATUS_NEED_FLUSH_P2P is already set, then the
+        * roc_done_wk is already scheduled or running, so don't schedule it
+        * again to avoid a race where the roc_done_wk clears this bit after
+        * it is set here, affecting the next run of the roc_done_wk.
+        */
+       if (!test_and_set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status))
+               iwl_mvm_roc_finished(mvm);
+}
+
 /*
  * Handles a FW notification for an event that is known to the driver.
  *
@@ -297,8 +319,7 @@ static void iwl_mvm_te_handle_notif(struct iwl_mvm *mvm,
                switch (te_data->vif->type) {
                case NL80211_IFTYPE_P2P_DEVICE:
                        ieee80211_remain_on_channel_expired(mvm->hw);
-                       set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
-                       iwl_mvm_roc_finished(mvm);
+                       iwl_mvm_p2p_roc_finished(mvm);
                        break;
                case NL80211_IFTYPE_STATION:
                        /*
@@ -674,8 +695,7 @@ static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
                        /* Session protection is still ongoing. Cancel it */
                        iwl_mvm_cancel_session_protection(mvm, mvmvif, id);
                        if (iftype == NL80211_IFTYPE_P2P_DEVICE) {
-                               set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
-                               iwl_mvm_roc_finished(mvm);
+                               iwl_mvm_p2p_roc_finished(mvm);
                        }
                }
                return false;
@@ -842,8 +862,7 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
                /* End TE, notify mac80211 */
                mvmvif->time_event_data.id = SESSION_PROTECT_CONF_MAX_ID;
                ieee80211_remain_on_channel_expired(mvm->hw);
-               set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
-               iwl_mvm_roc_finished(mvm);
+               iwl_mvm_p2p_roc_finished(mvm);
        } else if (le32_to_cpu(notif->start)) {
                if (WARN_ON(mvmvif->time_event_data.id !=
                                le32_to_cpu(notif->conf_id)))
@@ -1004,14 +1023,13 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
                if (vif->type == NL80211_IFTYPE_P2P_DEVICE) {
                        iwl_mvm_cancel_session_protection(mvm, mvmvif,
                                                          mvmvif->time_event_data.id);
-                       set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
+                       iwl_mvm_p2p_roc_finished(mvm);
                } else {
                        iwl_mvm_remove_aux_roc_te(mvm, mvmvif,
                                                  &mvmvif->time_event_data);
+                       iwl_mvm_roc_finished(mvm);
                }
 
-               iwl_mvm_roc_finished(mvm);
-
                return;
        }
 
@@ -1025,12 +1043,11 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 
        if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE) {
                iwl_mvm_remove_time_event(mvm, mvmvif, te_data);
-               set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
+               iwl_mvm_p2p_roc_finished(mvm);
        } else {
                iwl_mvm_remove_aux_roc_te(mvm, mvmvif, te_data);
+               iwl_mvm_roc_finished(mvm);
        }
-
-       iwl_mvm_roc_finished(mvm);
 }
 
 void iwl_mvm_remove_csa_period(struct iwl_mvm *mvm,
index 0b8a0cd..8dc1b8e 100644 (file)
@@ -1093,22 +1093,22 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
                      IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
                      IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY,
                      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
-                     iwl_cfg_bz_a0_hr_b0, iwl_ax201_name),
+                     iwl_cfg_bz_a0_hr_b0, iwl_bz_name),
        _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
                      IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
                      IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY,
                      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
-                     iwl_cfg_bz_a0_gf_a0, iwl_ax211_name),
+                     iwl_cfg_bz_a0_gf_a0, iwl_bz_name),
        _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
                      IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
                      IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY,
                      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_CDB,
-                     iwl_cfg_bz_a0_gf4_a0, iwl_ax211_name),
+                     iwl_cfg_bz_a0_gf4_a0, iwl_bz_name),
        _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
                      IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
                      IWL_CFG_RF_TYPE_MR, IWL_CFG_ANY,
                      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
-                     iwl_cfg_bz_a0_mr_a0, iwl_ax211_name),
+                     iwl_cfg_bz_a0_mr_a0, iwl_bz_name),
 
 /* SoF with JF2 */
        _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
index cc550f6..a43e56c 100644 (file)
@@ -42,6 +42,7 @@ struct iwl_host_cmd;
  * struct iwl_rx_mem_buffer
  * @page_dma: bus address of rxb page
  * @page: driver's pointer to the rxb page
+ * @list: list entry for the membuffer
  * @invalid: rxb is in driver ownership - not owned by HW
  * @vid: index of this rxb in the global table
  * @offset: indicates which offset of the page (in bytes)
@@ -50,10 +51,10 @@ struct iwl_host_cmd;
 struct iwl_rx_mem_buffer {
        dma_addr_t page_dma;
        struct page *page;
-       u16 vid;
-       bool invalid;
        struct list_head list;
        u32 offset;
+       u16 vid;
+       bool invalid;
 };
 
 /**
@@ -253,6 +254,13 @@ struct cont_rec {
 };
 #endif
 
+enum iwl_pcie_fw_reset_state {
+       FW_RESET_IDLE,
+       FW_RESET_REQUESTED,
+       FW_RESET_OK,
+       FW_RESET_ERROR,
+};
+
 /**
  * struct iwl_trans_pcie - PCIe transport specific data
  * @rxq: all the RX queue data
@@ -404,7 +412,7 @@ struct iwl_trans_pcie {
        dma_addr_t base_rb_stts_dma;
 
        bool fw_reset_handshake;
-       bool fw_reset_done;
+       enum iwl_pcie_fw_reset_state fw_reset_state;
        wait_queue_head_t fw_reset_waitq;
 
        char rf_name[32];
@@ -670,19 +678,19 @@ static inline const char *queue_name(struct device *dev,
                          IWL_SHARED_IRQ_FIRST_RSS ? 1 : 0;
 
                if (i == 0)
-                       return DRV_NAME ": shared IRQ";
+                       return DRV_NAME ":shared_IRQ";
 
                return devm_kasprintf(dev, GFP_KERNEL,
-                                     DRV_NAME ": queue %d", i + vec);
+                                     DRV_NAME ":queue_%d", i + vec);
        }
        if (i == 0)
-               return DRV_NAME ": default queue";
+               return DRV_NAME ":default_queue";
 
        if (i == trans_p->alloc_vecs - 1)
-               return DRV_NAME ": exception";
+               return DRV_NAME ":exception";
 
        return devm_kasprintf(dev, GFP_KERNEL,
-                             DRV_NAME  ": queue %d", i);
+                             DRV_NAME  ":queue_%d", i);
 }
 
 static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
index 4f6f4b2..8e45eb3 100644 (file)
@@ -487,6 +487,9 @@ void iwl_pcie_free_rbs_pool(struct iwl_trans *trans)
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        int i;
 
+       if (!trans_pcie->rx_pool)
+               return;
+
        for (i = 0; i < RX_POOL_SIZE(trans_pcie->num_rx_bufs); i++) {
                if (!trans_pcie->rx_pool[i].page)
                        continue;
@@ -1062,7 +1065,7 @@ static int _iwl_pcie_rx_init(struct iwl_trans *trans)
        INIT_LIST_HEAD(&rba->rbd_empty);
        spin_unlock_bh(&rba->lock);
 
-       /* free all first - we might be reconfigured for a different size */
+       /* free all first - we overwrite everything here */
        iwl_pcie_free_rbs_pool(trans);
 
        for (i = 0; i < RX_QUEUE_SIZE; i++)
@@ -1653,7 +1656,7 @@ static void iwl_pcie_irq_handle_error(struct iwl_trans *trans)
 
        /* The STATUS_FW_ERROR bit is set in this function. This must happen
         * before we wake up the command caller, to ensure a proper cleanup. */
-       iwl_trans_fw_error(trans);
+       iwl_trans_fw_error(trans, false);
 
        clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
        wake_up(&trans->wait_command_queue);
@@ -2225,7 +2228,13 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id)
                        "Microcode SW error detected. Restarting 0x%X.\n",
                        inta_fh);
                isr_stats->sw++;
-               iwl_pcie_irq_handle_error(trans);
+               /* during FW reset flow report errors from there */
+               if (trans_pcie->fw_reset_state == FW_RESET_REQUESTED) {
+                       trans_pcie->fw_reset_state = FW_RESET_ERROR;
+                       wake_up(&trans_pcie->fw_reset_waitq);
+               } else {
+                       iwl_pcie_irq_handle_error(trans);
+               }
        }
 
        /* After checking FH register check HW register */
@@ -2293,7 +2302,7 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id)
 
        if (inta_hw & MSIX_HW_INT_CAUSES_REG_RESET_DONE) {
                IWL_DEBUG_ISR(trans, "Reset flow completed\n");
-               trans_pcie->fw_reset_done = true;
+               trans_pcie->fw_reset_state = FW_RESET_OK;
                wake_up(&trans_pcie->fw_reset_waitq);
        }
 
index a340093..bf0c32a 100644 (file)
@@ -87,7 +87,12 @@ static void iwl_pcie_gen2_apm_stop(struct iwl_trans *trans, bool op_mode_leave)
         * Clear "initialization complete" bit to move adapter from
         * D0A* (powered-up Active) --> D0U* (Uninitialized) state.
         */
-       iwl_clear_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+       if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ)
+               iwl_clear_bit(trans, CSR_GP_CNTRL,
+                             CSR_GP_CNTRL_REG_FLAG_MAC_INIT);
+       else
+               iwl_clear_bit(trans, CSR_GP_CNTRL,
+                             CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
 }
 
 static void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans)
@@ -95,7 +100,7 @@ static void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans)
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        int ret;
 
-       trans_pcie->fw_reset_done = false;
+       trans_pcie->fw_reset_state = FW_RESET_REQUESTED;
 
        if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
                iwl_write_umac_prph(trans, UREG_NIC_SET_NMI_DRIVER,
@@ -106,10 +111,15 @@ static void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans)
 
        /* wait 200ms */
        ret = wait_event_timeout(trans_pcie->fw_reset_waitq,
-                                trans_pcie->fw_reset_done, FW_RESET_TIMEOUT);
-       if (!ret)
+                                trans_pcie->fw_reset_state != FW_RESET_REQUESTED,
+                                FW_RESET_TIMEOUT);
+       if (!ret || trans_pcie->fw_reset_state == FW_RESET_ERROR) {
                IWL_INFO(trans,
                         "firmware didn't ACK the reset - continue anyway\n");
+               iwl_trans_fw_error(trans, true);
+       }
+
+       trans_pcie->fw_reset_state = FW_RESET_IDLE;
 }
 
 void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans)
@@ -121,9 +131,21 @@ void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans)
        if (trans_pcie->is_down)
                return;
 
-       if (trans_pcie->fw_reset_handshake &&
-           trans->state >= IWL_TRANS_FW_STARTED)
-               iwl_trans_pcie_fw_reset_handshake(trans);
+       if (trans->state >= IWL_TRANS_FW_STARTED) {
+               if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
+                       iwl_set_bit(trans, CSR_GP_CNTRL,
+                                   CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_REQ);
+                       iwl_poll_bit(trans, CSR_GP_CNTRL,
+                                    CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS,
+                                    CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS,
+                                    5000);
+                       msleep(100);
+                       iwl_set_bit(trans, CSR_GP_CNTRL,
+                                   CSR_GP_CNTRL_REG_FLAG_SW_RESET);
+               } else if (trans_pcie->fw_reset_handshake) {
+                       iwl_trans_pcie_fw_reset_handshake(trans);
+               }
+       }
 
        trans_pcie->is_down = true;
 
@@ -154,9 +176,17 @@ void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans)
                iwl_pcie_ctxt_info_free(trans);
 
        /* Make sure (redundant) we've released our request to stay awake */
-       iwl_clear_bit(trans, CSR_GP_CNTRL,
-                     CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+       if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ)
+               iwl_clear_bit(trans, CSR_GP_CNTRL,
+                             CSR_GP_CNTRL_REG_FLAG_BZ_MAC_ACCESS_REQ);
+       else
+               iwl_clear_bit(trans, CSR_GP_CNTRL,
+                             CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
 
+       if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
+               iwl_set_bit(trans, CSR_GP_CNTRL,
+                           CSR_GP_CNTRL_REG_FLAG_SW_RESET);
+       }
        /* Stop the device, and put it in low power state */
        iwl_pcie_gen2_apm_stop(trans, false);
 
@@ -436,7 +466,10 @@ int iwl_trans_pcie_gen2_start_fw(struct iwl_trans *trans,
 
        iwl_pcie_set_ltr(trans);
 
-       if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
+       if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ)
+               iwl_set_bit(trans, CSR_GP_CNTRL,
+                           CSR_GP_CNTRL_REG_FLAG_ROM_START);
+       else if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
                iwl_write_umac_prph(trans, UREG_CPU_INIT_RUN, 1);
        else
                iwl_write_prph(trans, UREG_CPU_INIT_RUN, 1);
index bee6b45..f252680 100644 (file)
@@ -449,11 +449,23 @@ void iwl_pcie_apm_stop_master(struct iwl_trans *trans)
        int ret;
 
        /* stop device's busmaster DMA activity */
-       iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_STOP_MASTER);
 
-       ret = iwl_poll_bit(trans, CSR_RESET,
-                          CSR_RESET_REG_FLAG_MASTER_DISABLED,
-                          CSR_RESET_REG_FLAG_MASTER_DISABLED, 100);
+       if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
+               iwl_set_bit(trans, CSR_GP_CNTRL,
+                           CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_REQ);
+
+               ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
+                                  CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS,
+                                  CSR_GP_CNTRL_REG_FLAG_BUS_MASTER_DISABLE_STATUS,
+                                  100);
+       } else {
+               iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_STOP_MASTER);
+
+               ret = iwl_poll_bit(trans, CSR_RESET,
+                                  CSR_RESET_REG_FLAG_MASTER_DISABLED,
+                                  CSR_RESET_REG_FLAG_MASTER_DISABLED, 100);
+       }
+
        if (ret < 0)
                IWL_WARN(trans, "Master Disable Timed Out, 100 usec\n");
 
@@ -1866,6 +1878,9 @@ static void iwl_trans_pcie_configure(struct iwl_trans *trans,
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
+       /* free all first - we might be reconfigured for a different size */
+       iwl_pcie_free_rbs_pool(trans);
+
        trans->txqs.cmd.q_id = trans_cfg->cmd_queue;
        trans->txqs.cmd.fifo = trans_cfg->cmd_fifo;
        trans->txqs.cmd.wdg_timeout = trans_cfg->cmd_q_wdg_timeout;
@@ -1992,15 +2007,24 @@ bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans)
 {
        int ret;
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+       u32 write = CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ;
+       u32 mask = CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
+                  CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP;
+       u32 poll = CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN;
 
        spin_lock(&trans_pcie->reg_lock);
 
        if (trans_pcie->cmd_hold_nic_awake)
                goto out;
 
+       if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) {
+               write = CSR_GP_CNTRL_REG_FLAG_BZ_MAC_ACCESS_REQ;
+               mask = CSR_GP_CNTRL_REG_FLAG_MAC_STATUS;
+               poll = CSR_GP_CNTRL_REG_FLAG_MAC_STATUS;
+       }
+
        /* this bit wakes up the NIC */
-       __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL,
-                                CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+       __iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL, write);
        if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_8000)
                udelay(2);
 
@@ -2024,10 +2048,7 @@ bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans)
         * 5000 series and later (including 1000 series) have non-volatile SRAM,
         * and do not save/restore SRAM when power cycling.
         */
-       ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
-                          CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN,
-                          (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
-                           CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
+       ret = iwl_poll_bit(trans, CSR_GP_CNTRL, poll, mask, 15000);
        if (unlikely(ret < 0)) {
                u32 cntrl = iwl_read32(trans, CSR_GP_CNTRL);
 
@@ -2947,8 +2968,8 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans,
                struct iwl_rx_mem_buffer *rxb = rxq->queue[i];
                struct iwl_fw_error_dump_rb *rb;
 
-               dma_unmap_page(trans->dev, rxb->page_dma, max_len,
-                              DMA_FROM_DEVICE);
+               dma_sync_single_for_cpu(trans->dev, rxb->page_dma,
+                                       max_len, DMA_FROM_DEVICE);
 
                rb_len += sizeof(**data) + sizeof(*rb) + max_len;
 
@@ -2957,10 +2978,6 @@ static u32 iwl_trans_pcie_dump_rbs(struct iwl_trans *trans,
                rb = (void *)(*data)->data;
                rb->index = cpu_to_le32(i);
                memcpy(rb->data, page_address(rxb->page), max_len);
-               /* remap the page for the free benefit */
-               rxb->page_dma = dma_map_page(trans->dev, rxb->page,
-                                            rxb->offset, max_len,
-                                            DMA_FROM_DEVICE);
 
                *data = iwl_fw_error_next_data(*data);
        }
@@ -3489,15 +3506,9 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
        pci_set_master(pdev);
 
        addr_size = trans->txqs.tfd.addr_size;
-       ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(addr_size));
-       if (!ret)
-               ret = pci_set_consistent_dma_mask(pdev,
-                                                 DMA_BIT_MASK(addr_size));
+       ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_size));
        if (ret) {
-               ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-               if (!ret)
-                       ret = pci_set_consistent_dma_mask(pdev,
-                                                         DMA_BIT_MASK(32));
+               ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
                /* both attempts failed: */
                if (ret) {
                        dev_err(&pdev->dev, "No suitable DMA available\n");
index c52d9b5..bd6bf70 100644 (file)
@@ -16,24 +16,4 @@ source "drivers/net/wireless/intersil/hostap/Kconfig"
 source "drivers/net/wireless/intersil/orinoco/Kconfig"
 source "drivers/net/wireless/intersil/p54/Kconfig"
 
-config PRISM54
-       tristate 'Intersil Prism GT/Duette/Indigo PCI/Cardbus (DEPRECATED)'
-       depends on PCI
-       select WIRELESS_EXT
-       select WEXT_SPY
-       select WEXT_PRIV
-       select FW_LOADER
-       help
-         This enables support for FullMAC PCI/Cardbus prism54 devices. This
-         driver is now deprecated in favor for the SoftMAC driver, p54pci.
-         p54pci supports FullMAC PCI/Cardbus devices as well.
-
-         For more information refer to the p54 wiki:
-
-         http://wireless.wiki.kernel.org/en/users/Drivers/p54
-
-         Note: You need a motherboard with DMA support to use any of these cards
-
-         When built as module you get the module prism54
-
 endif # WLAN_VENDOR_INTERSIL
index aa630e9..65281d1 100644 (file)
@@ -2,4 +2,3 @@
 obj-$(CONFIG_HOSTAP)           += hostap/
 obj-$(CONFIG_HERMES)           += orinoco/
 obj-$(CONFIG_P54_COMMON)       += p54/
-obj-$(CONFIG_PRISM54)          += prism54/
index c4b81ff..c17ab6d 100644 (file)
@@ -93,6 +93,7 @@ extern const struct iw_handler_def hostap_iw_handler_def;
 extern const struct ethtool_ops prism2_ethtool_ops;
 
 int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
-
+int hostap_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                         void __user *data, int cmd);
 
 #endif /* HOSTAP_H */
index 49766b2..0a376f1 100644 (file)
@@ -3941,7 +3941,8 @@ const struct iw_handler_def hostap_iw_handler_def =
        .get_wireless_stats = hostap_get_wireless_stats,
 };
 
-
+/* Private ioctls (iwpriv) that have not yet been converted
+ * into new wireless extensions API */
 int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
        struct iwreq *wrq = (struct iwreq *) ifr;
@@ -3953,9 +3954,6 @@ int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        local = iface->local;
 
        switch (cmd) {
-               /* Private ioctls (iwpriv) that have not yet been converted
-                * into new wireless extensions API */
-
        case PRISM2_IOCTL_INQUIRE:
                if (!capable(CAP_NET_ADMIN)) ret = -EPERM;
                else ret = prism2_ioctl_priv_inquire(dev, (int *) wrq->u.name);
@@ -4009,11 +4007,31 @@ int hostap_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                                               wrq->u.ap_addr.sa_data);
                break;
 #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
+       default:
+               ret = -EOPNOTSUPP;
+               break;
+       }
+
+       return ret;
+}
 
+/* Private ioctls that are not used with iwpriv;
+ * in SIOCDEVPRIVATE range */
+int hostap_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                         void __user *data, int cmd)
+{
+       struct iwreq *wrq = (struct iwreq *)ifr;
+       struct hostap_interface *iface;
+       local_info_t *local;
+       int ret = 0;
 
-               /* Private ioctls that are not used with iwpriv;
-                * in SIOCDEVPRIVATE range */
+       iface = netdev_priv(dev);
+       local = iface->local;
+
+       if (in_compat_syscall()) /* not implemented yet */
+               return -EOPNOTSUPP;
 
+       switch (cmd) {
 #ifdef PRISM2_DOWNLOAD_SUPPORT
        case PRISM2_IOCTL_DOWNLOAD:
                if (!capable(CAP_NET_ADMIN)) ret = -EPERM;
index de97b33..54f67b6 100644 (file)
@@ -797,6 +797,7 @@ static const struct net_device_ops hostap_netdev_ops = {
        .ndo_open               = prism2_open,
        .ndo_stop               = prism2_close,
        .ndo_do_ioctl           = hostap_ioctl,
+       .ndo_siocdevprivate     = hostap_siocdevprivate,
        .ndo_set_mac_address    = prism2_set_mac_address,
        .ndo_set_rx_mode        = hostap_set_multicast_list,
        .ndo_tx_timeout         = prism2_tx_timeout,
@@ -809,6 +810,7 @@ static const struct net_device_ops hostap_mgmt_netdev_ops = {
        .ndo_open               = prism2_open,
        .ndo_stop               = prism2_close,
        .ndo_do_ioctl           = hostap_ioctl,
+       .ndo_siocdevprivate     = hostap_siocdevprivate,
        .ndo_set_mac_address    = prism2_set_mac_address,
        .ndo_set_rx_mode        = hostap_set_multicast_list,
        .ndo_tx_timeout         = prism2_tx_timeout,
@@ -821,6 +823,7 @@ static const struct net_device_ops hostap_master_ops = {
        .ndo_open               = prism2_open,
        .ndo_stop               = prism2_close,
        .ndo_do_ioctl           = hostap_ioctl,
+       .ndo_siocdevprivate     = hostap_siocdevprivate,
        .ndo_set_mac_address    = prism2_set_mac_address,
        .ndo_set_rx_mode        = hostap_set_multicast_list,
        .ndo_tx_timeout         = prism2_tx_timeout,
diff --git a/drivers/net/wireless/intersil/prism54/Makefile b/drivers/net/wireless/intersil/prism54/Makefile
deleted file mode 100644 (file)
index 4f5572d..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-# $Id: Makefile.k26,v 1.7 2004/01/30 16:24:00 ajfa Exp $
-
-prism54-objs := islpci_eth.o islpci_mgt.o \
-                isl_38xx.o isl_ioctl.o islpci_dev.o \
-               islpci_hotplug.o oid_mgt.o
-
-obj-$(CONFIG_PRISM54) += prism54.o
-
diff --git a/drivers/net/wireless/intersil/prism54/isl_38xx.c b/drivers/net/wireless/intersil/prism54/isl_38xx.c
deleted file mode 100644 (file)
index ae964de..0000000
+++ /dev/null
@@ -1,245 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 2002 Intersil Americas Inc.
- *  Copyright (C) 2003-2004 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>_
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/ktime.h>
-
-#include <linux/uaccess.h>
-#include <asm/io.h>
-
-#include "prismcompat.h"
-#include "isl_38xx.h"
-#include "islpci_dev.h"
-#include "islpci_mgt.h"
-
-/******************************************************************************
-    Device Interface & Control functions
-******************************************************************************/
-
-/**
- * isl38xx_disable_interrupts - disable all interrupts
- * @device: pci memory base address
- *
- *  Instructs the device to disable all interrupt reporting by asserting
- *  the IRQ line. New events may still show up in the interrupt identification
- *  register located at offset %ISL38XX_INT_IDENT_REG.
- */
-void
-isl38xx_disable_interrupts(void __iomem *device)
-{
-       isl38xx_w32_flush(device, 0x00000000, ISL38XX_INT_EN_REG);
-       udelay(ISL38XX_WRITEIO_DELAY);
-}
-
-void
-isl38xx_handle_sleep_request(isl38xx_control_block *control_block,
-                            int *powerstate, void __iomem *device_base)
-{
-       /* device requests to go into sleep mode
-        * check whether the transmit queues for data and management are empty */
-       if (isl38xx_in_queue(control_block, ISL38XX_CB_TX_DATA_LQ))
-               /* data tx queue not empty */
-               return;
-
-       if (isl38xx_in_queue(control_block, ISL38XX_CB_TX_MGMTQ))
-               /* management tx queue not empty */
-               return;
-
-       /* check also whether received frames are pending */
-       if (isl38xx_in_queue(control_block, ISL38XX_CB_RX_DATA_LQ))
-               /* data rx queue not empty */
-               return;
-
-       if (isl38xx_in_queue(control_block, ISL38XX_CB_RX_MGMTQ))
-               /* management rx queue not empty */
-               return;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_TRACING, "Device going to sleep mode\n");
-#endif
-
-       /* all queues are empty, allow the device to go into sleep mode */
-       *powerstate = ISL38XX_PSM_POWERSAVE_STATE;
-
-       /* assert the Sleep interrupt in the Device Interrupt Register */
-       isl38xx_w32_flush(device_base, ISL38XX_DEV_INT_SLEEP,
-                         ISL38XX_DEV_INT_REG);
-       udelay(ISL38XX_WRITEIO_DELAY);
-}
-
-void
-isl38xx_handle_wakeup(isl38xx_control_block *control_block,
-                     int *powerstate, void __iomem *device_base)
-{
-       /* device is in active state, update the powerstate flag */
-       *powerstate = ISL38XX_PSM_ACTIVE_STATE;
-
-       /* now check whether there are frames pending for the card */
-       if (!isl38xx_in_queue(control_block, ISL38XX_CB_TX_DATA_LQ)
-           && !isl38xx_in_queue(control_block, ISL38XX_CB_TX_MGMTQ))
-               return;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_ANYTHING, "Wake up handler trigger the device\n");
-#endif
-
-       /* either data or management transmit queue has a frame pending
-        * trigger the device by setting the Update bit in the Device Int reg */
-       isl38xx_w32_flush(device_base, ISL38XX_DEV_INT_UPDATE,
-                         ISL38XX_DEV_INT_REG);
-       udelay(ISL38XX_WRITEIO_DELAY);
-}
-
-void
-isl38xx_trigger_device(int asleep, void __iomem *device_base)
-{
-       u32 reg;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       u32 counter = 0;
-       struct timespec64 current_ts64;
-       DEBUG(SHOW_FUNCTION_CALLS, "isl38xx trigger device\n");
-#endif
-
-       /* check whether the device is in power save mode */
-       if (asleep) {
-               /* device is in powersave, trigger the device for wakeup */
-#if VERBOSE > SHOW_ERROR_MESSAGES
-               ktime_get_real_ts64(&current_ts64);
-               DEBUG(SHOW_TRACING, "%lld.%09ld Device wakeup triggered\n",
-                     (s64)current_ts64.tv_sec, current_ts64.tv_nsec);
-
-               DEBUG(SHOW_TRACING, "%lld.%09ld Device register read %08x\n",
-                     (s64)current_ts64.tv_sec, current_ts64.tv_nsec,
-                     readl(device_base + ISL38XX_CTRL_STAT_REG));
-#endif
-
-               reg = readl(device_base + ISL38XX_INT_IDENT_REG);
-               if (reg == 0xabadface) {
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       ktime_get_real_ts64(&current_ts64);
-                       DEBUG(SHOW_TRACING,
-                             "%lld.%09ld Device register abadface\n",
-                             (s64)current_ts64.tv_sec, current_ts64.tv_nsec);
-#endif
-                       /* read the Device Status Register until Sleepmode bit is set */
-                       while (reg = readl(device_base + ISL38XX_CTRL_STAT_REG),
-                              (reg & ISL38XX_CTRL_STAT_SLEEPMODE) == 0) {
-                               udelay(ISL38XX_WRITEIO_DELAY);
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                               counter++;
-#endif
-                       }
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       DEBUG(SHOW_TRACING,
-                             "%lld.%09ld Device register read %08x\n",
-                             (s64)current_ts64.tv_sec, current_ts64.tv_nsec,
-                             readl(device_base + ISL38XX_CTRL_STAT_REG));
-                       ktime_get_real_ts64(&current_ts64);
-                       DEBUG(SHOW_TRACING,
-                             "%lld.%09ld Device asleep counter %i\n",
-                             (s64)current_ts64.tv_sec, current_ts64.tv_nsec,
-                             counter);
-#endif
-               }
-               /* assert the Wakeup interrupt in the Device Interrupt Register */
-               isl38xx_w32_flush(device_base, ISL38XX_DEV_INT_WAKEUP,
-                                 ISL38XX_DEV_INT_REG);
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-               udelay(ISL38XX_WRITEIO_DELAY);
-
-               /* perform another read on the Device Status Register */
-               reg = readl(device_base + ISL38XX_CTRL_STAT_REG);
-               ktime_get_real_ts64(&current_ts64);
-               DEBUG(SHOW_TRACING, "%lld.%00ld Device register read %08x\n",
-                     (s64)current_ts64.tv_sec, current_ts64.tv_nsec, reg);
-#endif
-       } else {
-               /* device is (still) awake  */
-#if VERBOSE > SHOW_ERROR_MESSAGES
-               DEBUG(SHOW_TRACING, "Device is in active state\n");
-#endif
-               /* trigger the device by setting the Update bit in the Device Int reg */
-
-               isl38xx_w32_flush(device_base, ISL38XX_DEV_INT_UPDATE,
-                                 ISL38XX_DEV_INT_REG);
-       }
-}
-
-void
-isl38xx_interface_reset(void __iomem *device_base, dma_addr_t host_address)
-{
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_FUNCTION_CALLS, "isl38xx_interface_reset\n");
-#endif
-
-       /* load the address of the control block in the device */
-       isl38xx_w32_flush(device_base, host_address, ISL38XX_CTRL_BLK_BASE_REG);
-       udelay(ISL38XX_WRITEIO_DELAY);
-
-       /* set the reset bit in the Device Interrupt Register */
-       isl38xx_w32_flush(device_base, ISL38XX_DEV_INT_RESET, ISL38XX_DEV_INT_REG);
-       udelay(ISL38XX_WRITEIO_DELAY);
-
-       /* enable the interrupt for detecting initialization */
-
-       /* Note: Do not enable other interrupts here. We want the
-        * device to have come up first 100% before allowing any other
-        * interrupts. */
-       isl38xx_w32_flush(device_base, ISL38XX_INT_IDENT_INIT, ISL38XX_INT_EN_REG);
-       udelay(ISL38XX_WRITEIO_DELAY);  /* allow complete full reset */
-}
-
-void
-isl38xx_enable_common_interrupts(void __iomem *device_base)
-{
-       u32 reg;
-
-       reg = ISL38XX_INT_IDENT_UPDATE | ISL38XX_INT_IDENT_SLEEP |
-             ISL38XX_INT_IDENT_WAKEUP;
-       isl38xx_w32_flush(device_base, reg, ISL38XX_INT_EN_REG);
-       udelay(ISL38XX_WRITEIO_DELAY);
-}
-
-int
-isl38xx_in_queue(isl38xx_control_block *cb, int queue)
-{
-       const s32 delta = (le32_to_cpu(cb->driver_curr_frag[queue]) -
-                          le32_to_cpu(cb->device_curr_frag[queue]));
-
-       /* determine the amount of fragments in the queue depending on the type
-        * of the queue, either transmit or receive */
-
-       BUG_ON(delta < 0);      /* driver ptr must be ahead of device ptr */
-
-       switch (queue) {
-               /* send queues */
-       case ISL38XX_CB_TX_MGMTQ:
-               BUG_ON(delta > ISL38XX_CB_MGMT_QSIZE);
-               fallthrough;
-
-       case ISL38XX_CB_TX_DATA_LQ:
-       case ISL38XX_CB_TX_DATA_HQ:
-               BUG_ON(delta > ISL38XX_CB_TX_QSIZE);
-               return delta;
-
-               /* receive queues */
-       case ISL38XX_CB_RX_MGMTQ:
-               BUG_ON(delta > ISL38XX_CB_MGMT_QSIZE);
-               return ISL38XX_CB_MGMT_QSIZE - delta;
-
-       case ISL38XX_CB_RX_DATA_LQ:
-       case ISL38XX_CB_RX_DATA_HQ:
-               BUG_ON(delta > ISL38XX_CB_RX_QSIZE);
-               return ISL38XX_CB_RX_QSIZE - delta;
-       }
-       BUG();
-       return 0;
-}
diff --git a/drivers/net/wireless/intersil/prism54/isl_38xx.h b/drivers/net/wireless/intersil/prism54/isl_38xx.h
deleted file mode 100644 (file)
index 69218b8..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2002 Intersil Americas Inc.
- */
-
-#ifndef _ISL_38XX_H
-#define _ISL_38XX_H
-
-#include <asm/io.h>
-#include <asm/byteorder.h>
-
-#define ISL38XX_CB_RX_QSIZE                     8
-#define ISL38XX_CB_TX_QSIZE                     32
-
-/* ISL38XX Access Point Specific definitions */
-#define ISL38XX_MAX_WDS_LINKS                   8
-
-/* ISL38xx Client Specific definitions */
-#define ISL38XX_PSM_ACTIVE_STATE                0
-#define ISL38XX_PSM_POWERSAVE_STATE             1
-
-/* ISL38XX Host Interface Definitions */
-#define ISL38XX_PCI_MEM_SIZE                    0x02000
-#define ISL38XX_MEMORY_WINDOW_SIZE              0x01000
-#define ISL38XX_DEV_FIRMWARE_ADDRES             0x20000
-#define ISL38XX_WRITEIO_DELAY                   10     /* in us */
-#define ISL38XX_RESET_DELAY                     50     /* in ms */
-#define ISL38XX_WAIT_CYCLE                      10     /* in 10ms */
-#define ISL38XX_MAX_WAIT_CYCLES                 10
-
-/* PCI Memory Area */
-#define ISL38XX_HARDWARE_REG                    0x0000
-#define ISL38XX_CARDBUS_CIS                     0x0800
-#define ISL38XX_DIRECT_MEM_WIN                  0x1000
-
-/* Hardware registers */
-#define ISL38XX_DEV_INT_REG                     0x0000
-#define ISL38XX_INT_IDENT_REG                   0x0010
-#define ISL38XX_INT_ACK_REG                     0x0014
-#define ISL38XX_INT_EN_REG                      0x0018
-#define ISL38XX_GEN_PURP_COM_REG_1              0x0020
-#define ISL38XX_GEN_PURP_COM_REG_2              0x0024
-#define ISL38XX_CTRL_BLK_BASE_REG               ISL38XX_GEN_PURP_COM_REG_1
-#define ISL38XX_DIR_MEM_BASE_REG                0x0030
-#define ISL38XX_CTRL_STAT_REG                   0x0078
-
-/* High end mobos queue up pci writes, the following
- * is used to "read" from after a write to force flush */
-#define ISL38XX_PCI_POSTING_FLUSH              ISL38XX_INT_EN_REG
-
-/**
- * isl38xx_w32_flush - PCI iomem write helper
- * @base: (host) memory base address of the device
- * @val: 32bit value (host order) to write
- * @offset: byte offset into @base to write value to
- *
- *  This helper takes care of writing a 32bit datum to the
- *  specified offset into the device's pci memory space, and making sure
- *  the pci memory buffers get flushed by performing one harmless read
- *  from the %ISL38XX_PCI_POSTING_FLUSH offset.
- */
-static inline void
-isl38xx_w32_flush(void __iomem *base, u32 val, unsigned long offset)
-{
-       writel(val, base + offset);
-       (void) readl(base + ISL38XX_PCI_POSTING_FLUSH);
-}
-
-/* Device Interrupt register bits */
-#define ISL38XX_DEV_INT_RESET                   0x0001
-#define ISL38XX_DEV_INT_UPDATE                  0x0002
-#define ISL38XX_DEV_INT_WAKEUP                  0x0008
-#define ISL38XX_DEV_INT_SLEEP                   0x0010
-
-/* Interrupt Identification/Acknowledge/Enable register bits */
-#define ISL38XX_INT_IDENT_UPDATE                0x0002
-#define ISL38XX_INT_IDENT_INIT                  0x0004
-#define ISL38XX_INT_IDENT_WAKEUP                0x0008
-#define ISL38XX_INT_IDENT_SLEEP                 0x0010
-#define ISL38XX_INT_SOURCES                     0x001E
-
-/* Control/Status register bits */
-/* Looks like there are other meaningful bits
-    0x20004400 seen in normal operation,
-    0x200044db at 'timeout waiting for mgmt response'
-*/
-#define ISL38XX_CTRL_STAT_SLEEPMODE             0x00000200
-#define        ISL38XX_CTRL_STAT_CLKRUN                0x00800000
-#define ISL38XX_CTRL_STAT_RESET                 0x10000000
-#define ISL38XX_CTRL_STAT_RAMBOOT               0x20000000
-#define ISL38XX_CTRL_STAT_STARTHALTED           0x40000000
-#define ISL38XX_CTRL_STAT_HOST_OVERRIDE         0x80000000
-
-/* Control Block definitions */
-#define ISL38XX_CB_RX_DATA_LQ                   0
-#define ISL38XX_CB_TX_DATA_LQ                   1
-#define ISL38XX_CB_RX_DATA_HQ                   2
-#define ISL38XX_CB_TX_DATA_HQ                   3
-#define ISL38XX_CB_RX_MGMTQ                     4
-#define ISL38XX_CB_TX_MGMTQ                     5
-#define ISL38XX_CB_QCOUNT                       6
-#define ISL38XX_CB_MGMT_QSIZE                   4
-#define ISL38XX_MIN_QTHRESHOLD                  4      /* fragments */
-
-/* Memory Manager definitions */
-#define MGMT_FRAME_SIZE                         1500   /* >= size struct obj_bsslist */
-#define MGMT_TX_FRAME_COUNT                     24     /* max 4 + spare 4 + 8 init */
-#define MGMT_RX_FRAME_COUNT                     24     /* 4*4 + spare 8 */
-#define MGMT_FRAME_COUNT                        (MGMT_TX_FRAME_COUNT + MGMT_RX_FRAME_COUNT)
-#define CONTROL_BLOCK_SIZE                      1024   /* should be enough */
-#define PSM_FRAME_SIZE                          1536
-#define PSM_MINIMAL_STATION_COUNT               64
-#define PSM_FRAME_COUNT                         PSM_MINIMAL_STATION_COUNT
-#define PSM_BUFFER_SIZE                         PSM_FRAME_SIZE * PSM_FRAME_COUNT
-#define MAX_TRAP_RX_QUEUE                       4
-#define HOST_MEM_BLOCK                          CONTROL_BLOCK_SIZE + PSM_BUFFER_SIZE
-
-/* Fragment package definitions */
-#define FRAGMENT_FLAG_MF                        0x0001
-#define MAX_FRAGMENT_SIZE                       1536
-
-/* In monitor mode frames have a header. I don't know exactly how big those
- * frame can be but I've never seen any frame bigger than 1584... :
- */
-#define MAX_FRAGMENT_SIZE_RX                   1600
-
-typedef struct {
-       __le32 address;         /* physical address on host */
-       __le16 size;            /* packet size */
-       __le16 flags;           /* set of bit-wise flags */
-} isl38xx_fragment;
-
-struct isl38xx_cb {
-       __le32 driver_curr_frag[ISL38XX_CB_QCOUNT];
-       __le32 device_curr_frag[ISL38XX_CB_QCOUNT];
-       isl38xx_fragment rx_data_low[ISL38XX_CB_RX_QSIZE];
-       isl38xx_fragment tx_data_low[ISL38XX_CB_TX_QSIZE];
-       isl38xx_fragment rx_data_high[ISL38XX_CB_RX_QSIZE];
-       isl38xx_fragment tx_data_high[ISL38XX_CB_TX_QSIZE];
-       isl38xx_fragment rx_data_mgmt[ISL38XX_CB_MGMT_QSIZE];
-       isl38xx_fragment tx_data_mgmt[ISL38XX_CB_MGMT_QSIZE];
-};
-
-typedef struct isl38xx_cb isl38xx_control_block;
-
-/* determine number of entries currently in queue */
-int isl38xx_in_queue(isl38xx_control_block *cb, int queue);
-
-void isl38xx_disable_interrupts(void __iomem *);
-void isl38xx_enable_common_interrupts(void __iomem *);
-
-void isl38xx_handle_sleep_request(isl38xx_control_block *, int *,
-                                 void __iomem *);
-void isl38xx_handle_wakeup(isl38xx_control_block *, int *, void __iomem *);
-void isl38xx_trigger_device(int, void __iomem *);
-void isl38xx_interface_reset(void __iomem *, dma_addr_t);
-
-#endif                         /* _ISL_38XX_H */
diff --git a/drivers/net/wireless/intersil/prism54/isl_ioctl.c b/drivers/net/wireless/intersil/prism54/isl_ioctl.c
deleted file mode 100644 (file)
index 5e5ceaf..0000000
+++ /dev/null
@@ -1,2909 +0,0 @@
-/*
- *  Copyright (C) 2002 Intersil Americas Inc.
- *            (C) 2003,2004 Aurelien Alleaume <slts@free.fr>
- *            (C) 2003 Herbert Valerio Riedel <hvr@gnu.org>
- *            (C) 2003 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/if_arp.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/etherdevice.h>
-
-#include <linux/uaccess.h>
-
-#include "prismcompat.h"
-#include "isl_ioctl.h"
-#include "islpci_mgt.h"
-#include "isl_oid.h"           /* additional types and defs for isl38xx fw */
-#include "oid_mgt.h"
-
-#include <net/iw_handler.h>    /* New driver API */
-
-#define KEY_SIZE_WEP104 13     /* 104/128-bit WEP keys */
-#define KEY_SIZE_WEP40  5      /* 40/64-bit WEP keys */
-/* KEY_SIZE_TKIP should match isl_oid.h, struct obj_key.key[] size */
-#define KEY_SIZE_TKIP   32     /* TKIP keys */
-
-static void prism54_wpa_bss_ie_add(islpci_private *priv, u8 *bssid,
-                               u8 *wpa_ie, size_t wpa_ie_len);
-static size_t prism54_wpa_bss_ie_get(islpci_private *priv, u8 *bssid, u8 *wpa_ie);
-static int prism54_set_wpa(struct net_device *, struct iw_request_info *,
-                               __u32 *, char *);
-
-/* In 500 kbps */
-static const unsigned char scan_rate_list[] = { 2, 4, 11, 22,
-                                               12, 18, 24, 36,
-                                               48, 72, 96, 108 };
-
-/**
- * prism54_mib_mode_helper - MIB change mode helper function
- * @priv: the &struct islpci_private object to modify
- * @iw_mode: new mode (%IW_MODE_*)
- *
- *  This is a helper function, hence it does not lock. Make sure
- *  caller deals with locking *if* necessary. This function sets the
- *  mode-dependent mib values and does the mapping of the Linux
- *  Wireless API modes to Device firmware modes. It also checks for
- *  correct valid Linux wireless modes.
- */
-static int
-prism54_mib_mode_helper(islpci_private *priv, u32 iw_mode)
-{
-       u32 config = INL_CONFIG_MANUALRUN;
-       u32 mode, bsstype;
-
-       /* For now, just catch early the Repeater and Secondary modes here */
-       if (iw_mode == IW_MODE_REPEAT || iw_mode == IW_MODE_SECOND) {
-               printk(KERN_DEBUG
-                      "%s(): Sorry, Repeater mode and Secondary mode "
-                      "are not yet supported by this driver.\n", __func__);
-               return -EINVAL;
-       }
-
-       priv->iw_mode = iw_mode;
-
-       switch (iw_mode) {
-       case IW_MODE_AUTO:
-               mode = INL_MODE_CLIENT;
-               bsstype = DOT11_BSSTYPE_ANY;
-               break;
-       case IW_MODE_ADHOC:
-               mode = INL_MODE_CLIENT;
-               bsstype = DOT11_BSSTYPE_IBSS;
-               break;
-       case IW_MODE_INFRA:
-               mode = INL_MODE_CLIENT;
-               bsstype = DOT11_BSSTYPE_INFRA;
-               break;
-       case IW_MODE_MASTER:
-               mode = INL_MODE_AP;
-               bsstype = DOT11_BSSTYPE_INFRA;
-               break;
-       case IW_MODE_MONITOR:
-               mode = INL_MODE_PROMISCUOUS;
-               bsstype = DOT11_BSSTYPE_ANY;
-               config |= INL_CONFIG_RXANNEX;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       if (init_wds)
-               config |= INL_CONFIG_WDS;
-       mgt_set(priv, DOT11_OID_BSSTYPE, &bsstype);
-       mgt_set(priv, OID_INL_CONFIG, &config);
-       mgt_set(priv, OID_INL_MODE, &mode);
-
-       return 0;
-}
-
-/*
- * prism54_mib_init - fill MIB cache with defaults
- *
- *  this function initializes the struct given as @mib with defaults,
- *  of which many are retrieved from the global module parameter
- *  variables.
- */
-void
-prism54_mib_init(islpci_private *priv)
-{
-       u32 channel, authen, wep, filter, dot1x, mlme, conformance, power, mode;
-       struct obj_buffer psm_buffer = {
-               .size = PSM_BUFFER_SIZE,
-               .addr = priv->device_psm_buffer
-       };
-
-       channel = CARD_DEFAULT_CHANNEL;
-       authen = CARD_DEFAULT_AUTHEN;
-       wep = CARD_DEFAULT_WEP;
-       filter = CARD_DEFAULT_FILTER; /* (0) Do not filter un-encrypted data */
-       dot1x = CARD_DEFAULT_DOT1X;
-       mlme = CARD_DEFAULT_MLME_MODE;
-       conformance = CARD_DEFAULT_CONFORMANCE;
-       power = 127;
-       mode = CARD_DEFAULT_IW_MODE;
-
-       mgt_set(priv, DOT11_OID_CHANNEL, &channel);
-       mgt_set(priv, DOT11_OID_AUTHENABLE, &authen);
-       mgt_set(priv, DOT11_OID_PRIVACYINVOKED, &wep);
-       mgt_set(priv, DOT11_OID_PSMBUFFER, &psm_buffer);
-       mgt_set(priv, DOT11_OID_EXUNENCRYPTED, &filter);
-       mgt_set(priv, DOT11_OID_DOT1XENABLE, &dot1x);
-       mgt_set(priv, DOT11_OID_MLMEAUTOLEVEL, &mlme);
-       mgt_set(priv, OID_INL_DOT11D_CONFORMANCE, &conformance);
-       mgt_set(priv, OID_INL_OUTPUTPOWER, &power);
-
-       /* This sets all of the mode-dependent values */
-       prism54_mib_mode_helper(priv, mode);
-}
-
-/* this will be executed outside of atomic context thanks to
- * schedule_work(), thus we can as well use sleeping semaphore
- * locking */
-void
-prism54_update_stats(struct work_struct *work)
-{
-       islpci_private *priv = container_of(work, islpci_private, stats_work);
-       char *data;
-       struct obj_bss bss, *bss2;
-       union oid_res_t r;
-
-       mutex_lock(&priv->stats_lock);
-
-/* Noise floor.
- * I'm not sure if the unit is dBm.
- * Note : If we are not connected, this value seems to be irrelevant. */
-
-       mgt_get_request(priv, DOT11_OID_NOISEFLOOR, 0, NULL, &r);
-       priv->local_iwstatistics.qual.noise = r.u;
-
-/* Get the rssi of the link. To do this we need to retrieve a bss. */
-
-       /* First get the MAC address of the AP we are associated with. */
-       mgt_get_request(priv, DOT11_OID_BSSID, 0, NULL, &r);
-       data = r.ptr;
-
-       /* copy this MAC to the bss */
-       memcpy(bss.address, data, ETH_ALEN);
-       kfree(data);
-
-       /* now ask for the corresponding bss */
-       mgt_get_request(priv, DOT11_OID_BSSFIND, 0, (void *) &bss, &r);
-       bss2 = r.ptr;
-       /* report the rssi and use it to calculate
-        *  link quality through a signal-noise
-        *  ratio */
-       priv->local_iwstatistics.qual.level = bss2->rssi;
-       priv->local_iwstatistics.qual.qual =
-           bss2->rssi - priv->iwstatistics.qual.noise;
-
-       kfree(bss2);
-
-       /* report that the stats are new */
-       priv->local_iwstatistics.qual.updated = 0x7;
-
-/* Rx : unable to decrypt the MPDU */
-       mgt_get_request(priv, DOT11_OID_PRIVRXFAILED, 0, NULL, &r);
-       priv->local_iwstatistics.discard.code = r.u;
-
-/* Tx : Max MAC retries num reached */
-       mgt_get_request(priv, DOT11_OID_MPDUTXFAILED, 0, NULL, &r);
-       priv->local_iwstatistics.discard.retries = r.u;
-
-       mutex_unlock(&priv->stats_lock);
-}
-
-struct iw_statistics *
-prism54_get_wireless_stats(struct net_device *ndev)
-{
-       islpci_private *priv = netdev_priv(ndev);
-
-       /* If the stats are being updated return old data */
-       if (mutex_trylock(&priv->stats_lock)) {
-               memcpy(&priv->iwstatistics, &priv->local_iwstatistics,
-                      sizeof (struct iw_statistics));
-               /* They won't be marked updated for the next time */
-               priv->local_iwstatistics.qual.updated = 0;
-               mutex_unlock(&priv->stats_lock);
-       } else
-               priv->iwstatistics.qual.updated = 0;
-
-       /* Update our wireless stats, but do not schedule to often
-        * (max 1 HZ) */
-       if ((priv->stats_timestamp == 0) ||
-           time_after(jiffies, priv->stats_timestamp + 1 * HZ)) {
-               schedule_work(&priv->stats_work);
-               priv->stats_timestamp = jiffies;
-       }
-
-       return &priv->iwstatistics;
-}
-
-static int
-prism54_commit(struct net_device *ndev, struct iw_request_info *info,
-              char *cwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-
-       /* simply re-set the last set SSID, this should commit most stuff */
-
-       /* Commit in Monitor mode is not necessary, also setting essid
-        * in Monitor mode does not make sense and isn't allowed for this
-        * device's firmware */
-       if (priv->iw_mode != IW_MODE_MONITOR)
-               return mgt_set_request(priv, DOT11_OID_SSID, 0, NULL);
-       return 0;
-}
-
-static int
-prism54_get_name(struct net_device *ndev, struct iw_request_info *info,
-                char *cwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       char *capabilities;
-       union oid_res_t r;
-       int rvalue;
-
-       if (islpci_get_state(priv) < PRV_STATE_INIT) {
-               strncpy(cwrq, "NOT READY!", IFNAMSIZ);
-               return 0;
-       }
-       rvalue = mgt_get_request(priv, OID_INL_PHYCAPABILITIES, 0, NULL, &r);
-
-       switch (r.u) {
-       case INL_PHYCAP_5000MHZ:
-               capabilities = "IEEE 802.11a/b/g";
-               break;
-       case INL_PHYCAP_FAA:
-               capabilities = "IEEE 802.11b/g - FAA Support";
-               break;
-       case INL_PHYCAP_2400MHZ:
-       default:
-               capabilities = "IEEE 802.11b/g";        /* Default */
-               break;
-       }
-       strncpy(cwrq, capabilities, IFNAMSIZ);
-       return rvalue;
-}
-
-static int
-prism54_set_freq(struct net_device *ndev, struct iw_request_info *info,
-                struct iw_freq *fwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       int rvalue;
-       u32 c;
-
-       if (fwrq->m < 1000)
-               /* we have a channel number */
-               c = fwrq->m;
-       else
-               c = (fwrq->e == 1) ? channel_of_freq(fwrq->m / 100000) : 0;
-
-       rvalue = c ? mgt_set_request(priv, DOT11_OID_CHANNEL, 0, &c) : -EINVAL;
-
-       /* Call commit handler */
-       return (rvalue ? rvalue : -EINPROGRESS);
-}
-
-static int
-prism54_get_freq(struct net_device *ndev, struct iw_request_info *info,
-                struct iw_freq *fwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       union oid_res_t r;
-       int rvalue;
-
-       rvalue = mgt_get_request(priv, DOT11_OID_CHANNEL, 0, NULL, &r);
-       fwrq->i = r.u;
-       rvalue |= mgt_get_request(priv, DOT11_OID_FREQUENCY, 0, NULL, &r);
-       fwrq->m = r.u;
-       fwrq->e = 3;
-
-       return rvalue;
-}
-
-static int
-prism54_set_mode(struct net_device *ndev, struct iw_request_info *info,
-                __u32 * uwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       u32 mlmeautolevel = CARD_DEFAULT_MLME_MODE;
-
-       /* Let's see if the user passed a valid Linux Wireless mode */
-       if (*uwrq > IW_MODE_MONITOR || *uwrq < IW_MODE_AUTO) {
-               printk(KERN_DEBUG
-                      "%s: %s() You passed a non-valid init_mode.\n",
-                      priv->ndev->name, __func__);
-               return -EINVAL;
-       }
-
-       down_write(&priv->mib_sem);
-
-       if (prism54_mib_mode_helper(priv, *uwrq)) {
-               up_write(&priv->mib_sem);
-               return -EOPNOTSUPP;
-       }
-
-       /* the ACL code needs an intermediate mlmeautolevel. The wpa stuff an
-        * extended one.
-        */
-       if ((*uwrq == IW_MODE_MASTER) && (priv->acl.policy != MAC_POLICY_OPEN))
-               mlmeautolevel = DOT11_MLME_INTERMEDIATE;
-       if (priv->wpa)
-               mlmeautolevel = DOT11_MLME_EXTENDED;
-
-       mgt_set(priv, DOT11_OID_MLMEAUTOLEVEL, &mlmeautolevel);
-
-       if (mgt_commit(priv)) {
-               up_write(&priv->mib_sem);
-               return -EIO;
-       }
-       priv->ndev->type = (priv->iw_mode == IW_MODE_MONITOR)
-           ? priv->monitor_type : ARPHRD_ETHER;
-       up_write(&priv->mib_sem);
-
-       return 0;
-}
-
-/* Use mib cache */
-static int
-prism54_get_mode(struct net_device *ndev, struct iw_request_info *info,
-                __u32 * uwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-
-       BUG_ON((priv->iw_mode < IW_MODE_AUTO) || (priv->iw_mode >
-                                                 IW_MODE_MONITOR));
-       *uwrq = priv->iw_mode;
-
-       return 0;
-}
-
-/* we use DOT11_OID_EDTHRESHOLD. From what I guess the card will not try to
- * emit data if (sensitivity > rssi - noise) (in dBm).
- * prism54_set_sens does not seem to work.
- */
-
-static int
-prism54_set_sens(struct net_device *ndev, struct iw_request_info *info,
-                struct iw_param *vwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       u32 sens;
-
-       /* by default  the card sets this to 20. */
-       sens = vwrq->disabled ? 20 : vwrq->value;
-
-       return mgt_set_request(priv, DOT11_OID_EDTHRESHOLD, 0, &sens);
-}
-
-static int
-prism54_get_sens(struct net_device *ndev, struct iw_request_info *info,
-                struct iw_param *vwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       union oid_res_t r;
-       int rvalue;
-
-       rvalue = mgt_get_request(priv, DOT11_OID_EDTHRESHOLD, 0, NULL, &r);
-
-       vwrq->value = r.u;
-       vwrq->disabled = (vwrq->value == 0);
-       vwrq->fixed = 1;
-
-       return rvalue;
-}
-
-static int
-prism54_get_range(struct net_device *ndev, struct iw_request_info *info,
-                 struct iw_point *dwrq, char *extra)
-{
-       struct iw_range *range = (struct iw_range *) extra;
-       islpci_private *priv = netdev_priv(ndev);
-       u8 *data;
-       int i, m, rvalue;
-       struct obj_frequencies *freq;
-       union oid_res_t r;
-
-       memset(range, 0, sizeof (struct iw_range));
-       dwrq->length = sizeof (struct iw_range);
-
-       /* set the wireless extension version number */
-       range->we_version_source = SUPPORTED_WIRELESS_EXT;
-       range->we_version_compiled = WIRELESS_EXT;
-
-       /* Now the encoding capabilities */
-       range->num_encoding_sizes = 3;
-       /* 64(40) bits WEP */
-       range->encoding_size[0] = 5;
-       /* 128(104) bits WEP */
-       range->encoding_size[1] = 13;
-       /* 256 bits for WPA-PSK */
-       range->encoding_size[2] = 32;
-       /* 4 keys are allowed */
-       range->max_encoding_tokens = 4;
-
-       /* we don't know the quality range... */
-       range->max_qual.level = 0;
-       range->max_qual.noise = 0;
-       range->max_qual.qual = 0;
-       /* these value describe an average quality. Needs more tweaking... */
-       range->avg_qual.level = -80;    /* -80 dBm */
-       range->avg_qual.noise = 0;      /* don't know what to put here */
-       range->avg_qual.qual = 0;
-
-       range->sensitivity = 200;
-
-       /* retry limit capabilities */
-       range->retry_capa = IW_RETRY_LIMIT | IW_RETRY_LIFETIME;
-       range->retry_flags = IW_RETRY_LIMIT;
-       range->r_time_flags = IW_RETRY_LIFETIME;
-
-       /* I don't know the range. Put stupid things here */
-       range->min_retry = 1;
-       range->max_retry = 65535;
-       range->min_r_time = 1024;
-       range->max_r_time = 65535 * 1024;
-
-       /* txpower is supported in dBm's */
-       range->txpower_capa = IW_TXPOW_DBM;
-
-       /* Event capability (kernel + driver) */
-       range->event_capa[0] = (IW_EVENT_CAPA_K_0 |
-       IW_EVENT_CAPA_MASK(SIOCGIWTHRSPY) |
-       IW_EVENT_CAPA_MASK(SIOCGIWAP));
-       range->event_capa[1] = IW_EVENT_CAPA_K_1;
-       range->event_capa[4] = IW_EVENT_CAPA_MASK(IWEVCUSTOM);
-
-       range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 |
-               IW_ENC_CAPA_CIPHER_TKIP;
-
-       if (islpci_get_state(priv) < PRV_STATE_INIT)
-               return 0;
-
-       /* Request the device for the supported frequencies
-        * not really relevant since some devices will report the 5 GHz band
-        * frequencies even if they don't support them.
-        */
-       rvalue =
-           mgt_get_request(priv, DOT11_OID_SUPPORTEDFREQUENCIES, 0, NULL, &r);
-       freq = r.ptr;
-
-       range->num_channels = freq->nr;
-       range->num_frequency = freq->nr;
-
-       m = min(IW_MAX_FREQUENCIES, (int) freq->nr);
-       for (i = 0; i < m; i++) {
-               range->freq[i].m = freq->mhz[i];
-               range->freq[i].e = 6;
-               range->freq[i].i = channel_of_freq(freq->mhz[i]);
-       }
-       kfree(freq);
-
-       rvalue |= mgt_get_request(priv, DOT11_OID_SUPPORTEDRATES, 0, NULL, &r);
-       data = r.ptr;
-
-       /* We got an array of char. It is NULL terminated. */
-       i = 0;
-       while ((i < IW_MAX_BITRATES) && (*data != 0)) {
-               /*       the result must be in bps. The card gives us 500Kbps */
-               range->bitrate[i] = *data * 500000;
-               i++;
-               data++;
-       }
-       range->num_bitrates = i;
-       kfree(r.ptr);
-
-       return rvalue;
-}
-
-/* Set AP address*/
-
-static int
-prism54_set_wap(struct net_device *ndev, struct iw_request_info *info,
-               struct sockaddr *awrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       char bssid[6];
-       int rvalue;
-
-       if (awrq->sa_family != ARPHRD_ETHER)
-               return -EINVAL;
-
-       /* prepare the structure for the set object */
-       memcpy(&bssid[0], awrq->sa_data, ETH_ALEN);
-
-       /* set the bssid -- does this make sense when in AP mode? */
-       rvalue = mgt_set_request(priv, DOT11_OID_BSSID, 0, &bssid);
-
-       return (rvalue ? rvalue : -EINPROGRESS);        /* Call commit handler */
-}
-
-/* get AP address*/
-
-static int
-prism54_get_wap(struct net_device *ndev, struct iw_request_info *info,
-               struct sockaddr *awrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       union oid_res_t r;
-       int rvalue;
-
-       rvalue = mgt_get_request(priv, DOT11_OID_BSSID, 0, NULL, &r);
-       memcpy(awrq->sa_data, r.ptr, ETH_ALEN);
-       awrq->sa_family = ARPHRD_ETHER;
-       kfree(r.ptr);
-
-       return rvalue;
-}
-
-static int
-prism54_set_scan(struct net_device *dev, struct iw_request_info *info,
-                struct iw_param *vwrq, char *extra)
-{
-       /* hehe the device does this automagicaly */
-       return 0;
-}
-
-/* a little helper that will translate our data into a card independent
- * format that the Wireless Tools will understand. This was inspired by
- * the "Aironet driver for 4500 and 4800 series cards" (GPL)
- */
-
-static char *
-prism54_translate_bss(struct net_device *ndev, struct iw_request_info *info,
-                     char *current_ev, char *end_buf, struct obj_bss *bss,
-                     char noise)
-{
-       struct iw_event iwe;    /* Temporary buffer */
-       short cap;
-       islpci_private *priv = netdev_priv(ndev);
-       u8 wpa_ie[MAX_WPA_IE_LEN];
-       size_t wpa_ie_len;
-
-       /* The first entry must be the MAC address */
-       memcpy(iwe.u.ap_addr.sa_data, bss->address, ETH_ALEN);
-       iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
-       iwe.cmd = SIOCGIWAP;
-       current_ev = iwe_stream_add_event(info, current_ev, end_buf,
-                                         &iwe, IW_EV_ADDR_LEN);
-
-       /* The following entries will be displayed in the same order we give them */
-
-       /* The ESSID. */
-       iwe.u.data.length = bss->ssid.length;
-       iwe.u.data.flags = 1;
-       iwe.cmd = SIOCGIWESSID;
-       current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-                                         &iwe, bss->ssid.octets);
-
-       /* Capabilities */
-#define CAP_ESS 0x01
-#define CAP_IBSS 0x02
-#define CAP_CRYPT 0x10
-
-       /* Mode */
-       cap = bss->capinfo;
-       iwe.u.mode = 0;
-       if (cap & CAP_ESS)
-               iwe.u.mode = IW_MODE_MASTER;
-       else if (cap & CAP_IBSS)
-               iwe.u.mode = IW_MODE_ADHOC;
-       iwe.cmd = SIOCGIWMODE;
-       if (iwe.u.mode)
-               current_ev = iwe_stream_add_event(info, current_ev, end_buf,
-                                                 &iwe, IW_EV_UINT_LEN);
-
-       /* Encryption capability */
-       if (cap & CAP_CRYPT)
-               iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
-       else
-               iwe.u.data.flags = IW_ENCODE_DISABLED;
-       iwe.u.data.length = 0;
-       iwe.cmd = SIOCGIWENCODE;
-       current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-                                         &iwe, NULL);
-
-       /* Add frequency. (short) bss->channel is the frequency in MHz */
-       iwe.u.freq.m = bss->channel;
-       iwe.u.freq.e = 6;
-       iwe.cmd = SIOCGIWFREQ;
-       current_ev = iwe_stream_add_event(info, current_ev, end_buf,
-                                         &iwe, IW_EV_FREQ_LEN);
-
-       /* Add quality statistics */
-       iwe.u.qual.level = bss->rssi;
-       iwe.u.qual.noise = noise;
-       /* do a simple SNR for quality */
-       iwe.u.qual.qual = bss->rssi - noise;
-       iwe.cmd = IWEVQUAL;
-       current_ev = iwe_stream_add_event(info, current_ev, end_buf,
-                                         &iwe, IW_EV_QUAL_LEN);
-
-       /* Add WPA/RSN Information Element, if any */
-       wpa_ie_len = prism54_wpa_bss_ie_get(priv, bss->address, wpa_ie);
-       if (wpa_ie_len > 0) {
-               iwe.cmd = IWEVGENIE;
-               iwe.u.data.length = min_t(size_t, wpa_ie_len, MAX_WPA_IE_LEN);
-               current_ev = iwe_stream_add_point(info, current_ev, end_buf,
-                                                 &iwe, wpa_ie);
-       }
-       /* Do the bitrates */
-       {
-               char *current_val = current_ev + iwe_stream_lcp_len(info);
-               int i;
-               int mask;
-
-               iwe.cmd = SIOCGIWRATE;
-               /* Those two flags are ignored... */
-               iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
-
-               /* Parse the bitmask */
-               mask = 0x1;
-               for(i = 0; i < sizeof(scan_rate_list); i++) {
-                       if(bss->rates & mask) {
-                               iwe.u.bitrate.value = (scan_rate_list[i] * 500000);
-                               current_val = iwe_stream_add_value(
-                                       info, current_ev, current_val,
-                                       end_buf, &iwe, IW_EV_PARAM_LEN);
-                       }
-                       mask <<= 1;
-               }
-               /* Check if we added any event */
-               if ((current_val - current_ev) > iwe_stream_lcp_len(info))
-                       current_ev = current_val;
-       }
-
-       return current_ev;
-}
-
-static int
-prism54_get_scan(struct net_device *ndev, struct iw_request_info *info,
-                struct iw_point *dwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       int i, rvalue;
-       struct obj_bsslist *bsslist;
-       u32 noise = 0;
-       char *current_ev = extra;
-       union oid_res_t r;
-
-       if (islpci_get_state(priv) < PRV_STATE_INIT) {
-               /* device is not ready, fail gently */
-               dwrq->length = 0;
-               return 0;
-       }
-
-       /* first get the noise value. We will use it to report the link quality */
-       rvalue = mgt_get_request(priv, DOT11_OID_NOISEFLOOR, 0, NULL, &r);
-       noise = r.u;
-
-       /* Ask the device for a list of known bss.
-       * The old API, using SIOCGIWAPLIST, had a hard limit of IW_MAX_AP=64.
-       * The new API, using SIOCGIWSCAN, is only limited by the buffer size.
-       * WE-14->WE-16, the buffer is limited to IW_SCAN_MAX_DATA bytes.
-       * Starting with WE-17, the buffer can be as big as needed.
-       * But the device won't repport anything if you change the value
-       * of IWMAX_BSS=24. */
-
-       rvalue |= mgt_get_request(priv, DOT11_OID_BSSLIST, 0, NULL, &r);
-       bsslist = r.ptr;
-
-       /* ok now, scan the list and translate its info */
-       for (i = 0; i < (int) bsslist->nr; i++) {
-               current_ev = prism54_translate_bss(ndev, info, current_ev,
-                                                  extra + dwrq->length,
-                                                  &(bsslist->bsslist[i]),
-                                                  noise);
-
-               /* Check if there is space for one more entry */
-               if((extra + dwrq->length - current_ev) <= IW_EV_ADDR_LEN) {
-                       /* Ask user space to try again with a bigger buffer */
-                       rvalue = -E2BIG;
-                       break;
-               }
-       }
-
-       kfree(bsslist);
-       dwrq->length = (current_ev - extra);
-       dwrq->flags = 0;        /* todo */
-
-       return rvalue;
-}
-
-static int
-prism54_set_essid(struct net_device *ndev, struct iw_request_info *info,
-                 struct iw_point *dwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct obj_ssid essid;
-
-       memset(essid.octets, 0, 33);
-
-       /* Check if we were asked for `any' */
-       if (dwrq->flags && dwrq->length) {
-               if (dwrq->length > 32)
-                       return -E2BIG;
-               essid.length = dwrq->length;
-               memcpy(essid.octets, extra, dwrq->length);
-       } else
-               essid.length = 0;
-
-       if (priv->iw_mode != IW_MODE_MONITOR)
-               return mgt_set_request(priv, DOT11_OID_SSID, 0, &essid);
-
-       /* If in monitor mode, just save to mib */
-       mgt_set(priv, DOT11_OID_SSID, &essid);
-       return 0;
-
-}
-
-static int
-prism54_get_essid(struct net_device *ndev, struct iw_request_info *info,
-                 struct iw_point *dwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct obj_ssid *essid;
-       union oid_res_t r;
-       int rvalue;
-
-       rvalue = mgt_get_request(priv, DOT11_OID_SSID, 0, NULL, &r);
-       essid = r.ptr;
-
-       if (essid->length) {
-               dwrq->flags = 1;        /* set ESSID to ON for Wireless Extensions */
-               /* if it is too big, trunk it */
-               dwrq->length = min((u8)IW_ESSID_MAX_SIZE, essid->length);
-       } else {
-               dwrq->flags = 0;
-               dwrq->length = 0;
-       }
-       essid->octets[dwrq->length] = '\0';
-       memcpy(extra, essid->octets, dwrq->length);
-       kfree(essid);
-
-       return rvalue;
-}
-
-/* Provides no functionality, just completes the ioctl. In essence this is a
- * just a cosmetic ioctl.
- */
-static int
-prism54_set_nick(struct net_device *ndev, struct iw_request_info *info,
-                struct iw_point *dwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-
-       if (dwrq->length > IW_ESSID_MAX_SIZE)
-               return -E2BIG;
-
-       down_write(&priv->mib_sem);
-       memset(priv->nickname, 0, sizeof (priv->nickname));
-       memcpy(priv->nickname, extra, dwrq->length);
-       up_write(&priv->mib_sem);
-
-       return 0;
-}
-
-static int
-prism54_get_nick(struct net_device *ndev, struct iw_request_info *info,
-                struct iw_point *dwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-
-       dwrq->length = 0;
-
-       down_read(&priv->mib_sem);
-       dwrq->length = strlen(priv->nickname);
-       memcpy(extra, priv->nickname, dwrq->length);
-       up_read(&priv->mib_sem);
-
-       return 0;
-}
-
-/* Set the allowed Bitrates */
-
-static int
-prism54_set_rate(struct net_device *ndev,
-                struct iw_request_info *info,
-                struct iw_param *vwrq, char *extra)
-{
-
-       islpci_private *priv = netdev_priv(ndev);
-       u32 rate, profile;
-       char *data;
-       int ret, i;
-       union oid_res_t r;
-
-       if (vwrq->value == -1) {
-               /* auto mode. No limit. */
-               profile = 1;
-               return mgt_set_request(priv, DOT11_OID_PROFILES, 0, &profile);
-       }
-
-       ret = mgt_get_request(priv, DOT11_OID_SUPPORTEDRATES, 0, NULL, &r);
-       if (ret) {
-               kfree(r.ptr);
-               return ret;
-       }
-
-       rate = (u32) (vwrq->value / 500000);
-       data = r.ptr;
-       i = 0;
-
-       while (data[i]) {
-               if (rate && (data[i] == rate)) {
-                       break;
-               }
-               if (vwrq->value == i) {
-                       break;
-               }
-               data[i] |= 0x80;
-               i++;
-       }
-
-       if (!data[i]) {
-               kfree(r.ptr);
-               return -EINVAL;
-       }
-
-       data[i] |= 0x80;
-       data[i + 1] = 0;
-
-       /* Now, check if we want a fixed or auto value */
-       if (vwrq->fixed) {
-               data[0] = data[i];
-               data[1] = 0;
-       }
-
-/*
-       i = 0;
-       printk("prism54 rate: ");
-       while(data[i]) {
-               printk("%u ", data[i]);
-               i++;
-       }
-       printk("0\n");
-*/
-       profile = -1;
-       ret = mgt_set_request(priv, DOT11_OID_PROFILES, 0, &profile);
-       ret |= mgt_set_request(priv, DOT11_OID_EXTENDEDRATES, 0, data);
-       ret |= mgt_set_request(priv, DOT11_OID_RATES, 0, data);
-
-       kfree(r.ptr);
-
-       return ret;
-}
-
-/* Get the current bit rate */
-static int
-prism54_get_rate(struct net_device *ndev,
-                struct iw_request_info *info,
-                struct iw_param *vwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       int rvalue;
-       char *data;
-       union oid_res_t r;
-
-       /* Get the current bit rate */
-       if ((rvalue = mgt_get_request(priv, GEN_OID_LINKSTATE, 0, NULL, &r)))
-               return rvalue;
-       vwrq->value = r.u * 500000;
-
-       /* request the device for the enabled rates */
-       rvalue = mgt_get_request(priv, DOT11_OID_RATES, 0, NULL, &r);
-       if (rvalue) {
-               kfree(r.ptr);
-               return rvalue;
-       }
-       data = r.ptr;
-       vwrq->fixed = (data[0] != 0) && (data[1] == 0);
-       kfree(r.ptr);
-
-       return 0;
-}
-
-static int
-prism54_set_rts(struct net_device *ndev, struct iw_request_info *info,
-               struct iw_param *vwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-
-       return mgt_set_request(priv, DOT11_OID_RTSTHRESH, 0, &vwrq->value);
-}
-
-static int
-prism54_get_rts(struct net_device *ndev, struct iw_request_info *info,
-               struct iw_param *vwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       union oid_res_t r;
-       int rvalue;
-
-       /* get the rts threshold */
-       rvalue = mgt_get_request(priv, DOT11_OID_RTSTHRESH, 0, NULL, &r);
-       vwrq->value = r.u;
-
-       return rvalue;
-}
-
-static int
-prism54_set_frag(struct net_device *ndev, struct iw_request_info *info,
-                struct iw_param *vwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-
-       return mgt_set_request(priv, DOT11_OID_FRAGTHRESH, 0, &vwrq->value);
-}
-
-static int
-prism54_get_frag(struct net_device *ndev, struct iw_request_info *info,
-                struct iw_param *vwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       union oid_res_t r;
-       int rvalue;
-
-       rvalue = mgt_get_request(priv, DOT11_OID_FRAGTHRESH, 0, NULL, &r);
-       vwrq->value = r.u;
-
-       return rvalue;
-}
-
-/* Here we have (min,max) = max retries for (small frames, big frames). Where
- * big frame <=>  bigger than the rts threshold
- * small frame <=>  smaller than the rts threshold
- * This is not really the behavior expected by the wireless tool but it seems
- * to be a common behavior in other drivers.
- */
-
-static int
-prism54_set_retry(struct net_device *ndev, struct iw_request_info *info,
-                 struct iw_param *vwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       u32 slimit = 0, llimit = 0;     /* short and long limit */
-       u32 lifetime = 0;
-       int rvalue = 0;
-
-       if (vwrq->disabled)
-               /* we cannot disable this feature */
-               return -EINVAL;
-
-       if (vwrq->flags & IW_RETRY_LIMIT) {
-               if (vwrq->flags & IW_RETRY_SHORT)
-                       slimit = vwrq->value;
-               else if (vwrq->flags & IW_RETRY_LONG)
-                       llimit = vwrq->value;
-               else {
-                       /* we are asked to set both */
-                       slimit = vwrq->value;
-                       llimit = vwrq->value;
-               }
-       }
-       if (vwrq->flags & IW_RETRY_LIFETIME)
-               /* Wireless tools use us unit while the device uses 1024 us unit */
-               lifetime = vwrq->value / 1024;
-
-       /* now set what is requested */
-       if (slimit)
-               rvalue =
-                   mgt_set_request(priv, DOT11_OID_SHORTRETRIES, 0, &slimit);
-       if (llimit)
-               rvalue |=
-                   mgt_set_request(priv, DOT11_OID_LONGRETRIES, 0, &llimit);
-       if (lifetime)
-               rvalue |=
-                   mgt_set_request(priv, DOT11_OID_MAXTXLIFETIME, 0,
-                                   &lifetime);
-       return rvalue;
-}
-
-static int
-prism54_get_retry(struct net_device *ndev, struct iw_request_info *info,
-                 struct iw_param *vwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       union oid_res_t r;
-       int rvalue = 0;
-       vwrq->disabled = 0;     /* It cannot be disabled */
-
-       if ((vwrq->flags & IW_RETRY_TYPE) == IW_RETRY_LIFETIME) {
-               /* we are asked for the life time */
-               rvalue =
-                   mgt_get_request(priv, DOT11_OID_MAXTXLIFETIME, 0, NULL, &r);
-               vwrq->value = r.u * 1024;
-               vwrq->flags = IW_RETRY_LIFETIME;
-       } else if ((vwrq->flags & IW_RETRY_LONG)) {
-               /* we are asked for the long retry limit */
-               rvalue |=
-                   mgt_get_request(priv, DOT11_OID_LONGRETRIES, 0, NULL, &r);
-               vwrq->value = r.u;
-               vwrq->flags = IW_RETRY_LIMIT | IW_RETRY_LONG;
-       } else {
-               /* default. get the  short retry limit */
-               rvalue |=
-                   mgt_get_request(priv, DOT11_OID_SHORTRETRIES, 0, NULL, &r);
-               vwrq->value = r.u;
-               vwrq->flags = IW_RETRY_LIMIT | IW_RETRY_SHORT;
-       }
-
-       return rvalue;
-}
-
-static int
-prism54_set_encode(struct net_device *ndev, struct iw_request_info *info,
-                  struct iw_point *dwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       int rvalue = 0, force = 0;
-       int authen = DOT11_AUTH_OS, invoke = 0, exunencrypt = 0;
-       union oid_res_t r;
-
-       /* with the new API, it's impossible to get a NULL pointer.
-        * New version of iwconfig set the IW_ENCODE_NOKEY flag
-        * when no key is given, but older versions don't. */
-
-       if (dwrq->length > 0) {
-               /* we have a key to set */
-               int index = (dwrq->flags & IW_ENCODE_INDEX) - 1;
-               int current_index;
-               struct obj_key key = { DOT11_PRIV_WEP, 0, "" };
-
-               /* get the current key index */
-               rvalue = mgt_get_request(priv, DOT11_OID_DEFKEYID, 0, NULL, &r);
-               current_index = r.u;
-               /* Verify that the key is not marked as invalid */
-               if (!(dwrq->flags & IW_ENCODE_NOKEY)) {
-                       if (dwrq->length > KEY_SIZE_TKIP) {
-                               /* User-provided key data too big */
-                               return -EINVAL;
-                       }
-                       if (dwrq->length > KEY_SIZE_WEP104) {
-                               /* WPA-PSK TKIP */
-                               key.type = DOT11_PRIV_TKIP;
-                               key.length = KEY_SIZE_TKIP;
-                       } else if (dwrq->length > KEY_SIZE_WEP40) {
-                               /* WEP 104/128 */
-                               key.length = KEY_SIZE_WEP104;
-                       } else {
-                               /* WEP 40/64 */
-                               key.length = KEY_SIZE_WEP40;
-                       }
-                       memset(key.key, 0, sizeof (key.key));
-                       memcpy(key.key, extra, dwrq->length);
-
-                       if ((index < 0) || (index > 3))
-                               /* no index provided use the current one */
-                               index = current_index;
-
-                       /* now send the key to the card  */
-                       rvalue |=
-                           mgt_set_request(priv, DOT11_OID_DEFKEYX, index,
-                                           &key);
-               }
-               /*
-                * If a valid key is set, encryption should be enabled
-                * (user may turn it off later).
-                * This is also how "iwconfig ethX key on" works
-                */
-               if ((index == current_index) && (key.length > 0))
-                       force = 1;
-       } else {
-               int index = (dwrq->flags & IW_ENCODE_INDEX) - 1;
-               if ((index >= 0) && (index <= 3)) {
-                       /* we want to set the key index */
-                       rvalue |=
-                           mgt_set_request(priv, DOT11_OID_DEFKEYID, 0,
-                                           &index);
-               } else {
-                       if (!(dwrq->flags & IW_ENCODE_MODE)) {
-                               /* we cannot do anything. Complain. */
-                               return -EINVAL;
-                       }
-               }
-       }
-       /* now read the flags */
-       if (dwrq->flags & IW_ENCODE_DISABLED) {
-               /* Encoding disabled,
-                * authen = DOT11_AUTH_OS;
-                * invoke = 0;
-                * exunencrypt = 0; */
-       }
-       if (dwrq->flags & IW_ENCODE_OPEN)
-               /* Encode but accept non-encoded packets. No auth */
-               invoke = 1;
-       if ((dwrq->flags & IW_ENCODE_RESTRICTED) || force) {
-               /* Refuse non-encoded packets. Auth */
-               authen = DOT11_AUTH_BOTH;
-               invoke = 1;
-               exunencrypt = 1;
-       }
-       /* do the change if requested  */
-       if ((dwrq->flags & IW_ENCODE_MODE) || force) {
-               rvalue |=
-                   mgt_set_request(priv, DOT11_OID_AUTHENABLE, 0, &authen);
-               rvalue |=
-                   mgt_set_request(priv, DOT11_OID_PRIVACYINVOKED, 0, &invoke);
-               rvalue |=
-                   mgt_set_request(priv, DOT11_OID_EXUNENCRYPTED, 0,
-                                   &exunencrypt);
-       }
-       return rvalue;
-}
-
-static int
-prism54_get_encode(struct net_device *ndev, struct iw_request_info *info,
-                  struct iw_point *dwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct obj_key *key;
-       u32 devindex, index = (dwrq->flags & IW_ENCODE_INDEX) - 1;
-       u32 authen = 0, invoke = 0, exunencrypt = 0;
-       int rvalue;
-       union oid_res_t r;
-
-       /* first get the flags */
-       rvalue = mgt_get_request(priv, DOT11_OID_AUTHENABLE, 0, NULL, &r);
-       authen = r.u;
-       rvalue |= mgt_get_request(priv, DOT11_OID_PRIVACYINVOKED, 0, NULL, &r);
-       invoke = r.u;
-       rvalue |= mgt_get_request(priv, DOT11_OID_EXUNENCRYPTED, 0, NULL, &r);
-       exunencrypt = r.u;
-
-       if (invoke && (authen == DOT11_AUTH_BOTH) && exunencrypt)
-               dwrq->flags = IW_ENCODE_RESTRICTED;
-       else if ((authen == DOT11_AUTH_OS) && !exunencrypt) {
-               if (invoke)
-                       dwrq->flags = IW_ENCODE_OPEN;
-               else
-                       dwrq->flags = IW_ENCODE_DISABLED;
-       } else
-               /* The card should not work in this state */
-               dwrq->flags = 0;
-
-       /* get the current device key index */
-       rvalue |= mgt_get_request(priv, DOT11_OID_DEFKEYID, 0, NULL, &r);
-       devindex = r.u;
-       /* Now get the key, return it */
-       if (index == -1 || index > 3)
-               /* no index provided, use the current one */
-               index = devindex;
-       rvalue |= mgt_get_request(priv, DOT11_OID_DEFKEYX, index, NULL, &r);
-       key = r.ptr;
-       dwrq->length = key->length;
-       memcpy(extra, key->key, dwrq->length);
-       kfree(key);
-       /* return the used key index */
-       dwrq->flags |= devindex + 1;
-
-       return rvalue;
-}
-
-static int
-prism54_get_txpower(struct net_device *ndev, struct iw_request_info *info,
-                   struct iw_param *vwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       union oid_res_t r;
-       int rvalue;
-
-       rvalue = mgt_get_request(priv, OID_INL_OUTPUTPOWER, 0, NULL, &r);
-       /* intersil firmware operates in 0.25 dBm (1/4 dBm) */
-       vwrq->value = (s32) r.u / 4;
-       vwrq->fixed = 1;
-       /* radio is not turned of
-        * btw: how is possible to turn off only the radio
-        */
-       vwrq->disabled = 0;
-
-       return rvalue;
-}
-
-static int
-prism54_set_txpower(struct net_device *ndev, struct iw_request_info *info,
-                   struct iw_param *vwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       s32 u = vwrq->value;
-
-       /* intersil firmware operates in 0.25 dBm (1/4) */
-       u *= 4;
-       if (vwrq->disabled) {
-               /* don't know how to disable radio */
-               printk(KERN_DEBUG
-                      "%s: %s() disabling radio is not yet supported.\n",
-                      priv->ndev->name, __func__);
-               return -ENOTSUPP;
-       } else if (vwrq->fixed)
-               /* currently only fixed value is supported */
-               return mgt_set_request(priv, OID_INL_OUTPUTPOWER, 0, &u);
-       else {
-               printk(KERN_DEBUG
-                      "%s: %s() auto power will be implemented later.\n",
-                      priv->ndev->name, __func__);
-               return -ENOTSUPP;
-       }
-}
-
-static int prism54_set_genie(struct net_device *ndev,
-                            struct iw_request_info *info,
-                            struct iw_point *data, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       int alen, ret = 0;
-       struct obj_attachment *attach;
-
-       if (data->length > MAX_WPA_IE_LEN ||
-           (data->length && extra == NULL))
-               return -EINVAL;
-
-       memcpy(priv->wpa_ie, extra, data->length);
-       priv->wpa_ie_len = data->length;
-
-       alen = sizeof(*attach) + priv->wpa_ie_len;
-       attach = kzalloc(alen, GFP_KERNEL);
-       if (attach == NULL)
-               return -ENOMEM;
-
-#define WLAN_FC_TYPE_MGMT 0
-#define WLAN_FC_STYPE_ASSOC_REQ 0
-#define WLAN_FC_STYPE_REASSOC_REQ 2
-
-       /* Note: endianness is covered by mgt_set_varlen */
-       attach->type = (WLAN_FC_TYPE_MGMT << 2) |
-               (WLAN_FC_STYPE_ASSOC_REQ << 4);
-       attach->id = -1;
-       attach->size = priv->wpa_ie_len;
-       memcpy(attach->data, extra, priv->wpa_ie_len);
-
-       ret = mgt_set_varlen(priv, DOT11_OID_ATTACHMENT, attach,
-               priv->wpa_ie_len);
-       if (ret == 0) {
-               attach->type = (WLAN_FC_TYPE_MGMT << 2) |
-                       (WLAN_FC_STYPE_REASSOC_REQ << 4);
-
-               ret = mgt_set_varlen(priv, DOT11_OID_ATTACHMENT, attach,
-                       priv->wpa_ie_len);
-               if (ret == 0)
-                       printk(KERN_DEBUG "%s: WPA IE Attachment was set\n",
-                               ndev->name);
-       }
-
-       kfree(attach);
-       return ret;
-}
-
-
-static int prism54_get_genie(struct net_device *ndev,
-                            struct iw_request_info *info,
-                            struct iw_point *data, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       int len = priv->wpa_ie_len;
-
-       if (len <= 0) {
-               data->length = 0;
-               return 0;
-       }
-
-       if (data->length < len)
-               return -E2BIG;
-
-       data->length = len;
-       memcpy(extra, priv->wpa_ie, len);
-
-       return 0;
-}
-
-static int prism54_set_auth(struct net_device *ndev,
-                              struct iw_request_info *info,
-                              union iwreq_data *wrqu, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct iw_param *param = &wrqu->param;
-       u32 mlmelevel = 0, authen = 0, dot1x = 0;
-       u32 exunencrypt = 0, privinvoked = 0, wpa = 0;
-       u32 old_wpa;
-       int ret = 0;
-       union oid_res_t r;
-
-       if (islpci_get_state(priv) < PRV_STATE_INIT)
-               return 0;
-
-       /* first get the flags */
-       down_write(&priv->mib_sem);
-       wpa = old_wpa = priv->wpa;
-       up_write(&priv->mib_sem);
-       ret = mgt_get_request(priv, DOT11_OID_AUTHENABLE, 0, NULL, &r);
-       authen = r.u;
-       ret = mgt_get_request(priv, DOT11_OID_PRIVACYINVOKED, 0, NULL, &r);
-       privinvoked = r.u;
-       ret = mgt_get_request(priv, DOT11_OID_EXUNENCRYPTED, 0, NULL, &r);
-       exunencrypt = r.u;
-       ret = mgt_get_request(priv, DOT11_OID_DOT1XENABLE, 0, NULL, &r);
-       dot1x = r.u;
-       ret = mgt_get_request(priv, DOT11_OID_MLMEAUTOLEVEL, 0, NULL, &r);
-       mlmelevel = r.u;
-
-       if (ret < 0)
-               goto out;
-
-       switch (param->flags & IW_AUTH_INDEX) {
-       case IW_AUTH_CIPHER_PAIRWISE:
-       case IW_AUTH_CIPHER_GROUP:
-       case IW_AUTH_KEY_MGMT:
-               break;
-
-       case IW_AUTH_WPA_ENABLED:
-               /* Do the same thing as IW_AUTH_WPA_VERSION */
-               if (param->value) {
-                       wpa = 1;
-                       privinvoked = 1; /* For privacy invoked */
-                       exunencrypt = 1; /* Filter out all unencrypted frames */
-                       dot1x = 0x01; /* To enable eap filter */
-                       mlmelevel = DOT11_MLME_EXTENDED;
-                       authen = DOT11_AUTH_OS; /* Only WEP uses _SK and _BOTH */
-               } else {
-                       wpa = 0;
-                       privinvoked = 0;
-                       exunencrypt = 0; /* Do not filter un-encrypted data */
-                       dot1x = 0;
-                       mlmelevel = DOT11_MLME_AUTO;
-               }
-               break;
-
-       case IW_AUTH_WPA_VERSION:
-               if (param->value & IW_AUTH_WPA_VERSION_DISABLED) {
-                       wpa = 0;
-                       privinvoked = 0;
-                       exunencrypt = 0; /* Do not filter un-encrypted data */
-                       dot1x = 0;
-                       mlmelevel = DOT11_MLME_AUTO;
-               } else {
-                       if (param->value & IW_AUTH_WPA_VERSION_WPA)
-                               wpa = 1;
-                       else if (param->value & IW_AUTH_WPA_VERSION_WPA2)
-                               wpa = 2;
-                       privinvoked = 1; /* For privacy invoked */
-                       exunencrypt = 1; /* Filter out all unencrypted frames */
-                       dot1x = 0x01; /* To enable eap filter */
-                       mlmelevel = DOT11_MLME_EXTENDED;
-                       authen = DOT11_AUTH_OS; /* Only WEP uses _SK and _BOTH */
-               }
-               break;
-
-       case IW_AUTH_RX_UNENCRYPTED_EAPOL:
-               /* dot1x should be the opposite of RX_UNENCRYPTED_EAPOL;
-                * turn off dot1x when allowing receipt of unencrypted EAPOL
-                * frames, turn on dot1x when receipt should be disallowed
-                */
-               dot1x = param->value ? 0 : 0x01;
-               break;
-
-       case IW_AUTH_PRIVACY_INVOKED:
-               privinvoked = param->value ? 1 : 0;
-               break;
-
-       case IW_AUTH_DROP_UNENCRYPTED:
-               exunencrypt = param->value ? 1 : 0;
-               break;
-
-       case IW_AUTH_80211_AUTH_ALG:
-               if (param->value & IW_AUTH_ALG_SHARED_KEY) {
-                       /* Only WEP uses _SK and _BOTH */
-                       if (wpa > 0) {
-                               ret = -EINVAL;
-                               goto out;
-                       }
-                       authen = DOT11_AUTH_SK;
-               } else if (param->value & IW_AUTH_ALG_OPEN_SYSTEM) {
-                       authen = DOT11_AUTH_OS;
-               } else {
-                       ret = -EINVAL;
-                       goto out;
-               }
-               break;
-
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       /* Set all the values */
-       down_write(&priv->mib_sem);
-       priv->wpa = wpa;
-       up_write(&priv->mib_sem);
-       mgt_set_request(priv, DOT11_OID_AUTHENABLE, 0, &authen);
-       mgt_set_request(priv, DOT11_OID_PRIVACYINVOKED, 0, &privinvoked);
-       mgt_set_request(priv, DOT11_OID_EXUNENCRYPTED, 0, &exunencrypt);
-       mgt_set_request(priv, DOT11_OID_DOT1XENABLE, 0, &dot1x);
-       mgt_set_request(priv, DOT11_OID_MLMEAUTOLEVEL, 0, &mlmelevel);
-
-out:
-       return ret;
-}
-
-static int prism54_get_auth(struct net_device *ndev,
-                           struct iw_request_info *info,
-                           union iwreq_data *wrqu, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct iw_param *param = &wrqu->param;
-       u32 wpa = 0;
-       int ret = 0;
-       union oid_res_t r;
-
-       if (islpci_get_state(priv) < PRV_STATE_INIT)
-               return 0;
-
-       /* first get the flags */
-       down_write(&priv->mib_sem);
-       wpa = priv->wpa;
-       up_write(&priv->mib_sem);
-
-       switch (param->flags & IW_AUTH_INDEX) {
-       case IW_AUTH_CIPHER_PAIRWISE:
-       case IW_AUTH_CIPHER_GROUP:
-       case IW_AUTH_KEY_MGMT:
-               /*
-                * wpa_supplicant will control these internally
-                */
-               ret = -EOPNOTSUPP;
-               break;
-
-       case IW_AUTH_WPA_VERSION:
-               switch (wpa) {
-               case 1:
-                       param->value = IW_AUTH_WPA_VERSION_WPA;
-                       break;
-               case 2:
-                       param->value = IW_AUTH_WPA_VERSION_WPA2;
-                       break;
-               case 0:
-               default:
-                       param->value = IW_AUTH_WPA_VERSION_DISABLED;
-                       break;
-               }
-               break;
-
-       case IW_AUTH_DROP_UNENCRYPTED:
-               ret = mgt_get_request(priv, DOT11_OID_EXUNENCRYPTED, 0, NULL, &r);
-               if (ret >= 0)
-                       param->value = r.u > 0 ? 1 : 0;
-               break;
-
-       case IW_AUTH_80211_AUTH_ALG:
-               ret = mgt_get_request(priv, DOT11_OID_AUTHENABLE, 0, NULL, &r);
-               if (ret >= 0) {
-                       switch (r.u) {
-                       case DOT11_AUTH_OS:
-                               param->value = IW_AUTH_ALG_OPEN_SYSTEM;
-                               break;
-                       case DOT11_AUTH_BOTH:
-                       case DOT11_AUTH_SK:
-                               param->value = IW_AUTH_ALG_SHARED_KEY;
-                               break;
-                       case DOT11_AUTH_NONE:
-                       default:
-                               param->value = 0;
-                               break;
-                       }
-               }
-               break;
-
-       case IW_AUTH_WPA_ENABLED:
-               param->value = wpa > 0 ? 1 : 0;
-               break;
-
-       case IW_AUTH_RX_UNENCRYPTED_EAPOL:
-               ret = mgt_get_request(priv, DOT11_OID_DOT1XENABLE, 0, NULL, &r);
-               if (ret >= 0)
-                       param->value = r.u > 0 ? 1 : 0;
-               break;
-
-       case IW_AUTH_PRIVACY_INVOKED:
-               ret = mgt_get_request(priv, DOT11_OID_PRIVACYINVOKED, 0, NULL, &r);
-               if (ret >= 0)
-                       param->value = r.u > 0 ? 1 : 0;
-               break;
-
-       default:
-               return -EOPNOTSUPP;
-       }
-       return ret;
-}
-
-static int prism54_set_encodeext(struct net_device *ndev,
-                                struct iw_request_info *info,
-                                union iwreq_data *wrqu,
-                                char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct iw_point *encoding = &wrqu->encoding;
-       struct iw_encode_ext *ext = (struct iw_encode_ext *)extra;
-       int idx, alg = ext->alg, set_key = 1;
-       union oid_res_t r;
-       int authen = DOT11_AUTH_OS, invoke = 0, exunencrypt = 0;
-       int ret = 0;
-
-       if (islpci_get_state(priv) < PRV_STATE_INIT)
-               return 0;
-
-       /* Determine and validate the key index */
-       idx = (encoding->flags & IW_ENCODE_INDEX) - 1;
-       if (idx) {
-               if (idx < 0 || idx > 3)
-                       return -EINVAL;
-       } else {
-               ret = mgt_get_request(priv, DOT11_OID_DEFKEYID, 0, NULL, &r);
-               if (ret < 0)
-                       goto out;
-               idx = r.u;
-       }
-
-       if (encoding->flags & IW_ENCODE_DISABLED)
-               alg = IW_ENCODE_ALG_NONE;
-
-       if (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY) {
-               /* Only set transmit key index here, actual
-                * key is set below if needed.
-                */
-               ret = mgt_set_request(priv, DOT11_OID_DEFKEYID, 0, &idx);
-               set_key = ext->key_len > 0 ? 1 : 0;
-       }
-
-       if (set_key) {
-               struct obj_key key = { DOT11_PRIV_WEP, 0, "" };
-               switch (alg) {
-               case IW_ENCODE_ALG_NONE:
-                       break;
-               case IW_ENCODE_ALG_WEP:
-                       if (ext->key_len > KEY_SIZE_WEP104) {
-                               ret = -EINVAL;
-                               goto out;
-                       }
-                       if (ext->key_len > KEY_SIZE_WEP40)
-                               key.length = KEY_SIZE_WEP104;
-                       else
-                               key.length = KEY_SIZE_WEP40;
-                       break;
-               case IW_ENCODE_ALG_TKIP:
-                       if (ext->key_len > KEY_SIZE_TKIP) {
-                               ret = -EINVAL;
-                               goto out;
-                       }
-                       key.type = DOT11_PRIV_TKIP;
-                       key.length = KEY_SIZE_TKIP;
-                       break;
-               default:
-                       return -EINVAL;
-               }
-
-               if (key.length) {
-                       memset(key.key, 0, sizeof(key.key));
-                       memcpy(key.key, ext->key, ext->key_len);
-                       ret = mgt_set_request(priv, DOT11_OID_DEFKEYX, idx,
-                                           &key);
-                       if (ret < 0)
-                               goto out;
-               }
-       }
-
-       /* Read the flags */
-       if (encoding->flags & IW_ENCODE_DISABLED) {
-               /* Encoding disabled,
-                * authen = DOT11_AUTH_OS;
-                * invoke = 0;
-                * exunencrypt = 0; */
-       }
-       if (encoding->flags & IW_ENCODE_OPEN) {
-               /* Encode but accept non-encoded packets. No auth */
-               invoke = 1;
-       }
-       if (encoding->flags & IW_ENCODE_RESTRICTED) {
-               /* Refuse non-encoded packets. Auth */
-               authen = DOT11_AUTH_BOTH;
-               invoke = 1;
-               exunencrypt = 1;
-       }
-
-       /* do the change if requested  */
-       if (encoding->flags & IW_ENCODE_MODE) {
-               ret = mgt_set_request(priv, DOT11_OID_AUTHENABLE, 0,
-                                     &authen);
-               ret = mgt_set_request(priv, DOT11_OID_PRIVACYINVOKED, 0,
-                                     &invoke);
-               ret = mgt_set_request(priv, DOT11_OID_EXUNENCRYPTED, 0,
-                                     &exunencrypt);
-       }
-
-out:
-       return ret;
-}
-
-
-static int prism54_get_encodeext(struct net_device *ndev,
-                                struct iw_request_info *info,
-                                union iwreq_data *wrqu,
-                                char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct iw_point *encoding = &wrqu->encoding;
-       struct iw_encode_ext *ext = (struct iw_encode_ext *)extra;
-       int idx, max_key_len;
-       union oid_res_t r;
-       int authen = DOT11_AUTH_OS, invoke = 0, exunencrypt = 0, wpa = 0;
-       int ret = 0;
-
-       if (islpci_get_state(priv) < PRV_STATE_INIT)
-               return 0;
-
-       /* first get the flags */
-       ret = mgt_get_request(priv, DOT11_OID_AUTHENABLE, 0, NULL, &r);
-       authen = r.u;
-       ret = mgt_get_request(priv, DOT11_OID_PRIVACYINVOKED, 0, NULL, &r);
-       invoke = r.u;
-       ret = mgt_get_request(priv, DOT11_OID_EXUNENCRYPTED, 0, NULL, &r);
-       exunencrypt = r.u;
-       if (ret < 0)
-               goto out;
-
-       max_key_len = encoding->length - sizeof(*ext);
-       if (max_key_len < 0)
-               return -EINVAL;
-
-       idx = (encoding->flags & IW_ENCODE_INDEX) - 1;
-       if (idx) {
-               if (idx < 0 || idx > 3)
-                       return -EINVAL;
-       } else {
-               ret = mgt_get_request(priv, DOT11_OID_DEFKEYID, 0, NULL, &r);
-               if (ret < 0)
-                       goto out;
-               idx = r.u;
-       }
-
-       encoding->flags = idx + 1;
-       memset(ext, 0, sizeof(*ext));
-
-       switch (authen) {
-       case DOT11_AUTH_BOTH:
-       case DOT11_AUTH_SK:
-               wrqu->encoding.flags |= IW_ENCODE_RESTRICTED;
-               fallthrough;
-       case DOT11_AUTH_OS:
-       default:
-               wrqu->encoding.flags |= IW_ENCODE_OPEN;
-               break;
-       }
-
-       down_write(&priv->mib_sem);
-       wpa = priv->wpa;
-       up_write(&priv->mib_sem);
-
-       if (authen == DOT11_AUTH_OS && !exunencrypt && !invoke && !wpa) {
-               /* No encryption */
-               ext->alg = IW_ENCODE_ALG_NONE;
-               ext->key_len = 0;
-               wrqu->encoding.flags |= IW_ENCODE_DISABLED;
-       } else {
-               struct obj_key *key;
-
-               ret = mgt_get_request(priv, DOT11_OID_DEFKEYX, idx, NULL, &r);
-               if (ret < 0)
-                       goto out;
-               key = r.ptr;
-               if (max_key_len < key->length) {
-                       ret = -E2BIG;
-                       goto out;
-               }
-               memcpy(ext->key, key->key, key->length);
-               ext->key_len = key->length;
-
-               switch (key->type) {
-               case DOT11_PRIV_TKIP:
-                       ext->alg = IW_ENCODE_ALG_TKIP;
-                       break;
-               default:
-               case DOT11_PRIV_WEP:
-                       ext->alg = IW_ENCODE_ALG_WEP;
-                       break;
-               }
-               wrqu->encoding.flags |= IW_ENCODE_ENABLED;
-       }
-
-out:
-       return ret;
-}
-
-
-static int
-prism54_reset(struct net_device *ndev, struct iw_request_info *info,
-             __u32 * uwrq, char *extra)
-{
-       islpci_reset(netdev_priv(ndev), 0);
-
-       return 0;
-}
-
-static int
-prism54_get_oid(struct net_device *ndev, struct iw_request_info *info,
-               struct iw_point *dwrq, char *extra)
-{
-       union oid_res_t r;
-       int rvalue;
-       enum oid_num_t n = dwrq->flags;
-
-       rvalue = mgt_get_request(netdev_priv(ndev), n, 0, NULL, &r);
-       dwrq->length = mgt_response_to_str(n, &r, extra);
-       if ((isl_oid[n].flags & OID_FLAG_TYPE) != OID_TYPE_U32)
-               kfree(r.ptr);
-       return rvalue;
-}
-
-static int
-prism54_set_u32(struct net_device *ndev, struct iw_request_info *info,
-               __u32 * uwrq, char *extra)
-{
-       u32 oid = uwrq[0], u = uwrq[1];
-
-       return mgt_set_request(netdev_priv(ndev), oid, 0, &u);
-}
-
-static int
-prism54_set_raw(struct net_device *ndev, struct iw_request_info *info,
-               struct iw_point *dwrq, char *extra)
-{
-       u32 oid = dwrq->flags;
-
-       return mgt_set_request(netdev_priv(ndev), oid, 0, extra);
-}
-
-void
-prism54_acl_init(struct islpci_acl *acl)
-{
-       mutex_init(&acl->lock);
-       INIT_LIST_HEAD(&acl->mac_list);
-       acl->size = 0;
-       acl->policy = MAC_POLICY_OPEN;
-}
-
-static void
-prism54_clear_mac(struct islpci_acl *acl)
-{
-       struct list_head *ptr, *next;
-       struct mac_entry *entry;
-
-       mutex_lock(&acl->lock);
-
-       if (acl->size == 0) {
-               mutex_unlock(&acl->lock);
-               return;
-       }
-
-       for (ptr = acl->mac_list.next, next = ptr->next;
-            ptr != &acl->mac_list; ptr = next, next = ptr->next) {
-               entry = list_entry(ptr, struct mac_entry, _list);
-               list_del(ptr);
-               kfree(entry);
-       }
-       acl->size = 0;
-       mutex_unlock(&acl->lock);
-}
-
-void
-prism54_acl_clean(struct islpci_acl *acl)
-{
-       prism54_clear_mac(acl);
-}
-
-static int
-prism54_add_mac(struct net_device *ndev, struct iw_request_info *info,
-               struct sockaddr *awrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct islpci_acl *acl = &priv->acl;
-       struct mac_entry *entry;
-       struct sockaddr *addr = (struct sockaddr *) extra;
-
-       if (addr->sa_family != ARPHRD_ETHER)
-               return -EOPNOTSUPP;
-
-       entry = kmalloc(sizeof (struct mac_entry), GFP_KERNEL);
-       if (entry == NULL)
-               return -ENOMEM;
-
-       memcpy(entry->addr, addr->sa_data, ETH_ALEN);
-
-       if (mutex_lock_interruptible(&acl->lock)) {
-               kfree(entry);
-               return -ERESTARTSYS;
-       }
-       list_add_tail(&entry->_list, &acl->mac_list);
-       acl->size++;
-       mutex_unlock(&acl->lock);
-
-       return 0;
-}
-
-static int
-prism54_del_mac(struct net_device *ndev, struct iw_request_info *info,
-               struct sockaddr *awrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct islpci_acl *acl = &priv->acl;
-       struct mac_entry *entry;
-       struct sockaddr *addr = (struct sockaddr *) extra;
-
-       if (addr->sa_family != ARPHRD_ETHER)
-               return -EOPNOTSUPP;
-
-       if (mutex_lock_interruptible(&acl->lock))
-               return -ERESTARTSYS;
-       list_for_each_entry(entry, &acl->mac_list, _list) {
-               if (ether_addr_equal(entry->addr, addr->sa_data)) {
-                       list_del(&entry->_list);
-                       acl->size--;
-                       kfree(entry);
-                       mutex_unlock(&acl->lock);
-                       return 0;
-               }
-       }
-       mutex_unlock(&acl->lock);
-       return -EINVAL;
-}
-
-static int
-prism54_get_mac(struct net_device *ndev, struct iw_request_info *info,
-               struct iw_point *dwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct islpci_acl *acl = &priv->acl;
-       struct mac_entry *entry;
-       struct sockaddr *dst = (struct sockaddr *) extra;
-
-       dwrq->length = 0;
-
-       if (mutex_lock_interruptible(&acl->lock))
-               return -ERESTARTSYS;
-
-       list_for_each_entry(entry, &acl->mac_list, _list) {
-               memcpy(dst->sa_data, entry->addr, ETH_ALEN);
-               dst->sa_family = ARPHRD_ETHER;
-               dwrq->length++;
-               dst++;
-       }
-       mutex_unlock(&acl->lock);
-       return 0;
-}
-
-/* Setting policy also clears the MAC acl, even if we don't change the default
- * policy
- */
-
-static int
-prism54_set_policy(struct net_device *ndev, struct iw_request_info *info,
-                  __u32 * uwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct islpci_acl *acl = &priv->acl;
-       u32 mlmeautolevel;
-
-       prism54_clear_mac(acl);
-
-       if ((*uwrq < MAC_POLICY_OPEN) || (*uwrq > MAC_POLICY_REJECT))
-               return -EINVAL;
-
-       down_write(&priv->mib_sem);
-
-       acl->policy = *uwrq;
-
-       /* the ACL code needs an intermediate mlmeautolevel */
-       if ((priv->iw_mode == IW_MODE_MASTER) &&
-           (acl->policy != MAC_POLICY_OPEN))
-               mlmeautolevel = DOT11_MLME_INTERMEDIATE;
-       else
-               mlmeautolevel = CARD_DEFAULT_MLME_MODE;
-       if (priv->wpa)
-               mlmeautolevel = DOT11_MLME_EXTENDED;
-       mgt_set(priv, DOT11_OID_MLMEAUTOLEVEL, &mlmeautolevel);
-       /* restart the card with our new policy */
-       if (mgt_commit(priv)) {
-               up_write(&priv->mib_sem);
-               return -EIO;
-       }
-       up_write(&priv->mib_sem);
-
-       return 0;
-}
-
-static int
-prism54_get_policy(struct net_device *ndev, struct iw_request_info *info,
-                  __u32 * uwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct islpci_acl *acl = &priv->acl;
-
-       *uwrq = acl->policy;
-
-       return 0;
-}
-
-/* Return 1 only if client should be accepted. */
-
-static int
-prism54_mac_accept(struct islpci_acl *acl, char *mac)
-{
-       struct mac_entry *entry;
-       int res = 0;
-
-       if (mutex_lock_interruptible(&acl->lock))
-               return -ERESTARTSYS;
-
-       if (acl->policy == MAC_POLICY_OPEN) {
-               mutex_unlock(&acl->lock);
-               return 1;
-       }
-
-       list_for_each_entry(entry, &acl->mac_list, _list) {
-               if (memcmp(entry->addr, mac, ETH_ALEN) == 0) {
-                       res = 1;
-                       break;
-               }
-       }
-       res = (acl->policy == MAC_POLICY_ACCEPT) ? !res : res;
-       mutex_unlock(&acl->lock);
-
-       return res;
-}
-
-static int
-prism54_kick_all(struct net_device *ndev, struct iw_request_info *info,
-                struct iw_point *dwrq, char *extra)
-{
-       struct obj_mlme *mlme;
-       int rvalue;
-
-       mlme = kmalloc(sizeof (struct obj_mlme), GFP_KERNEL);
-       if (mlme == NULL)
-               return -ENOMEM;
-
-       /* Tell the card to kick every client */
-       mlme->id = 0;
-       rvalue =
-           mgt_set_request(netdev_priv(ndev), DOT11_OID_DISASSOCIATE, 0, mlme);
-       kfree(mlme);
-
-       return rvalue;
-}
-
-static int
-prism54_kick_mac(struct net_device *ndev, struct iw_request_info *info,
-                struct sockaddr *awrq, char *extra)
-{
-       struct obj_mlme *mlme;
-       struct sockaddr *addr = (struct sockaddr *) extra;
-       int rvalue;
-
-       if (addr->sa_family != ARPHRD_ETHER)
-               return -EOPNOTSUPP;
-
-       mlme = kmalloc(sizeof (struct obj_mlme), GFP_KERNEL);
-       if (mlme == NULL)
-               return -ENOMEM;
-
-       /* Tell the card to only kick the corresponding bastard */
-       memcpy(mlme->address, addr->sa_data, ETH_ALEN);
-       mlme->id = -1;
-       rvalue =
-           mgt_set_request(netdev_priv(ndev), DOT11_OID_DISASSOCIATE, 0, mlme);
-
-       kfree(mlme);
-
-       return rvalue;
-}
-
-/* Translate a TRAP oid into a wireless event. Called in islpci_mgt_receive. */
-
-static void
-format_event(islpci_private *priv, char *dest, const char *str,
-            const struct obj_mlme *mlme, u16 *length, int error)
-{
-       int n = snprintf(dest, IW_CUSTOM_MAX,
-                        "%s %s %pM %s (%2.2X)",
-                        str,
-                        ((priv->iw_mode == IW_MODE_MASTER) ? "from" : "to"),
-                        mlme->address,
-                        (error ? (mlme->code ? " : REJECTED " : " : ACCEPTED ")
-                         : ""), mlme->code);
-       WARN_ON(n >= IW_CUSTOM_MAX);
-       *length = n;
-}
-
-static void
-send_formatted_event(islpci_private *priv, const char *str,
-                    const struct obj_mlme *mlme, int error)
-{
-       union iwreq_data wrqu;
-       char *memptr;
-
-       memptr = kmalloc(IW_CUSTOM_MAX, GFP_KERNEL);
-       if (!memptr)
-               return;
-       wrqu.data.pointer = memptr;
-       wrqu.data.length = 0;
-       format_event(priv, memptr, str, mlme, &wrqu.data.length,
-                    error);
-       wireless_send_event(priv->ndev, IWEVCUSTOM, &wrqu, memptr);
-       kfree(memptr);
-}
-
-static void
-send_simple_event(islpci_private *priv, const char *str)
-{
-       union iwreq_data wrqu;
-       char *memptr;
-       int n = strlen(str);
-
-       memptr = kmalloc(IW_CUSTOM_MAX, GFP_KERNEL);
-       if (!memptr)
-               return;
-       BUG_ON(n >= IW_CUSTOM_MAX);
-       wrqu.data.pointer = memptr;
-       wrqu.data.length = n;
-       strcpy(memptr, str);
-       wireless_send_event(priv->ndev, IWEVCUSTOM, &wrqu, memptr);
-       kfree(memptr);
-}
-
-static void
-link_changed(struct net_device *ndev, u32 bitrate)
-{
-       islpci_private *priv = netdev_priv(ndev);
-
-       if (bitrate) {
-               netif_carrier_on(ndev);
-               if (priv->iw_mode == IW_MODE_INFRA) {
-                       union iwreq_data uwrq;
-                       prism54_get_wap(ndev, NULL, (struct sockaddr *) &uwrq,
-                                       NULL);
-                       wireless_send_event(ndev, SIOCGIWAP, &uwrq, NULL);
-               } else
-                       send_simple_event(netdev_priv(ndev),
-                                         "Link established");
-       } else {
-               netif_carrier_off(ndev);
-               send_simple_event(netdev_priv(ndev), "Link lost");
-       }
-}
-
-/* Beacon/ProbeResp payload header */
-struct ieee80211_beacon_phdr {
-       u8 timestamp[8];
-       u16 beacon_int;
-       u16 capab_info;
-} __packed;
-
-#define WLAN_EID_GENERIC 0xdd
-static u8 wpa_oid[4] = { 0x00, 0x50, 0xf2, 1 };
-
-static void
-prism54_wpa_bss_ie_add(islpci_private *priv, u8 *bssid,
-                      u8 *wpa_ie, size_t wpa_ie_len)
-{
-       struct list_head *ptr;
-       struct islpci_bss_wpa_ie *bss = NULL;
-
-       if (wpa_ie_len > MAX_WPA_IE_LEN)
-               wpa_ie_len = MAX_WPA_IE_LEN;
-
-       mutex_lock(&priv->wpa_lock);
-
-       /* try to use existing entry */
-       list_for_each(ptr, &priv->bss_wpa_list) {
-               bss = list_entry(ptr, struct islpci_bss_wpa_ie, list);
-               if (memcmp(bss->bssid, bssid, ETH_ALEN) == 0) {
-                       list_move(&bss->list, &priv->bss_wpa_list);
-                       break;
-               }
-               bss = NULL;
-       }
-
-       if (bss == NULL) {
-               /* add a new BSS entry; if max number of entries is already
-                * reached, replace the least recently updated */
-               if (priv->num_bss_wpa >= MAX_BSS_WPA_IE_COUNT) {
-                       bss = list_entry(priv->bss_wpa_list.prev,
-                                        struct islpci_bss_wpa_ie, list);
-                       list_del(&bss->list);
-               } else {
-                       bss = kzalloc(sizeof (*bss), GFP_ATOMIC);
-                       if (bss != NULL)
-                               priv->num_bss_wpa++;
-               }
-               if (bss != NULL) {
-                       memcpy(bss->bssid, bssid, ETH_ALEN);
-                       list_add(&bss->list, &priv->bss_wpa_list);
-               }
-       }
-
-       if (bss != NULL) {
-               memcpy(bss->wpa_ie, wpa_ie, wpa_ie_len);
-               bss->wpa_ie_len = wpa_ie_len;
-               bss->last_update = jiffies;
-       } else {
-               printk(KERN_DEBUG "Failed to add BSS WPA entry for "
-                      "%pM\n", bssid);
-       }
-
-       /* expire old entries from WPA list */
-       while (priv->num_bss_wpa > 0) {
-               bss = list_entry(priv->bss_wpa_list.prev,
-                                struct islpci_bss_wpa_ie, list);
-               if (!time_after(jiffies, bss->last_update + 60 * HZ))
-                       break;
-
-               list_del(&bss->list);
-               priv->num_bss_wpa--;
-               kfree(bss);
-       }
-
-       mutex_unlock(&priv->wpa_lock);
-}
-
-static size_t
-prism54_wpa_bss_ie_get(islpci_private *priv, u8 *bssid, u8 *wpa_ie)
-{
-       struct list_head *ptr;
-       struct islpci_bss_wpa_ie *bss = NULL;
-       size_t len = 0;
-
-       mutex_lock(&priv->wpa_lock);
-
-       list_for_each(ptr, &priv->bss_wpa_list) {
-               bss = list_entry(ptr, struct islpci_bss_wpa_ie, list);
-               if (memcmp(bss->bssid, bssid, ETH_ALEN) == 0)
-                       break;
-               bss = NULL;
-       }
-       if (bss) {
-               len = bss->wpa_ie_len;
-               memcpy(wpa_ie, bss->wpa_ie, len);
-       }
-       mutex_unlock(&priv->wpa_lock);
-
-       return len;
-}
-
-void
-prism54_wpa_bss_ie_init(islpci_private *priv)
-{
-       INIT_LIST_HEAD(&priv->bss_wpa_list);
-       mutex_init(&priv->wpa_lock);
-}
-
-void
-prism54_wpa_bss_ie_clean(islpci_private *priv)
-{
-       struct islpci_bss_wpa_ie *bss, *n;
-
-       list_for_each_entry_safe(bss, n, &priv->bss_wpa_list, list) {
-               kfree(bss);
-       }
-}
-
-static void
-prism54_process_bss_data(islpci_private *priv, u32 oid, u8 *addr,
-                        u8 *payload, size_t len)
-{
-       struct ieee80211_beacon_phdr *hdr;
-       u8 *pos, *end;
-
-       if (!priv->wpa)
-               return;
-
-       hdr = (struct ieee80211_beacon_phdr *) payload;
-       pos = (u8 *) (hdr + 1);
-       end = payload + len;
-       while (pos < end) {
-               if (pos + 2 + pos[1] > end) {
-                       printk(KERN_DEBUG "Parsing Beacon/ProbeResp failed "
-                              "for %pM\n", addr);
-                       return;
-               }
-               if (pos[0] == WLAN_EID_GENERIC && pos[1] >= 4 &&
-                   memcmp(pos + 2, wpa_oid, 4) == 0) {
-                       prism54_wpa_bss_ie_add(priv, addr, pos, pos[1] + 2);
-                       return;
-               }
-               pos += 2 + pos[1];
-       }
-}
-
-static void
-handle_request(islpci_private *priv, struct obj_mlme *mlme, enum oid_num_t oid)
-{
-       if (((mlme->state == DOT11_STATE_AUTHING) ||
-            (mlme->state == DOT11_STATE_ASSOCING))
-           && mgt_mlme_answer(priv)) {
-               /* Someone is requesting auth and we must respond. Just send back
-                * the trap with error code set accordingly.
-                */
-               mlme->code = prism54_mac_accept(&priv->acl,
-                                               mlme->address) ? 0 : 1;
-               mgt_set_request(priv, oid, 0, mlme);
-       }
-}
-
-static int
-prism54_process_trap_helper(islpci_private *priv, enum oid_num_t oid,
-                           char *data)
-{
-       struct obj_mlme *mlme = (struct obj_mlme *) data;
-       struct obj_mlmeex *mlmeex = (struct obj_mlmeex *) data;
-       struct obj_mlmeex *confirm;
-       u8 wpa_ie[MAX_WPA_IE_LEN];
-       int wpa_ie_len;
-       size_t len = 0; /* u16, better? */
-       u8 *payload = NULL, *pos = NULL;
-       int ret;
-
-       /* I think all trapable objects are listed here.
-        * Some oids have a EX version. The difference is that they are emitted
-        * in DOT11_MLME_EXTENDED mode (set with DOT11_OID_MLMEAUTOLEVEL)
-        * with more info.
-        * The few events already defined by the wireless tools are not really
-        * suited. We use the more flexible custom event facility.
-        */
-
-       if (oid >= DOT11_OID_BEACON) {
-               len = mlmeex->size;
-               payload = pos = mlmeex->data;
-       }
-
-       /* I fear prism54_process_bss_data won't work with big endian data */
-       if ((oid == DOT11_OID_BEACON) || (oid == DOT11_OID_PROBE))
-               prism54_process_bss_data(priv, oid, mlmeex->address,
-                                        payload, len);
-
-       mgt_le_to_cpu(isl_oid[oid].flags & OID_FLAG_TYPE, (void *) mlme);
-
-       switch (oid) {
-
-       case GEN_OID_LINKSTATE:
-               link_changed(priv->ndev, (u32) *data);
-               break;
-
-       case DOT11_OID_MICFAILURE:
-               send_simple_event(priv, "Mic failure");
-               break;
-
-       case DOT11_OID_DEAUTHENTICATE:
-               send_formatted_event(priv, "DeAuthenticate request", mlme, 0);
-               break;
-
-       case DOT11_OID_AUTHENTICATE:
-               handle_request(priv, mlme, oid);
-               send_formatted_event(priv, "Authenticate request", mlme, 1);
-               break;
-
-       case DOT11_OID_DISASSOCIATE:
-               send_formatted_event(priv, "Disassociate request", mlme, 0);
-               break;
-
-       case DOT11_OID_ASSOCIATE:
-               handle_request(priv, mlme, oid);
-               send_formatted_event(priv, "Associate request", mlme, 1);
-               break;
-
-       case DOT11_OID_REASSOCIATE:
-               handle_request(priv, mlme, oid);
-               send_formatted_event(priv, "ReAssociate request", mlme, 1);
-               break;
-
-       case DOT11_OID_BEACON:
-               send_formatted_event(priv,
-                                    "Received a beacon from an unknown AP",
-                                    mlme, 0);
-               break;
-
-       case DOT11_OID_PROBE:
-               /* we received a probe from a client. */
-               send_formatted_event(priv, "Received a probe from client", mlme,
-                                    0);
-               break;
-
-               /* Note : "mlme" is actually a "struct obj_mlmeex *" here, but this
-                * is backward compatible layout-wise with "struct obj_mlme".
-                */
-
-       case DOT11_OID_DEAUTHENTICATEEX:
-               send_formatted_event(priv, "DeAuthenticate request", mlme, 0);
-               break;
-
-       case DOT11_OID_AUTHENTICATEEX:
-               handle_request(priv, mlme, oid);
-               send_formatted_event(priv, "Authenticate request (ex)", mlme, 1);
-
-               if (priv->iw_mode != IW_MODE_MASTER
-                               && mlmeex->state != DOT11_STATE_AUTHING)
-                       break;
-
-               confirm = kmalloc(sizeof(struct obj_mlmeex) + 6, GFP_ATOMIC);
-
-               if (!confirm)
-                       break;
-
-               memcpy(&confirm->address, mlmeex->address, ETH_ALEN);
-               printk(KERN_DEBUG "Authenticate from: address:\t%pM\n",
-                      mlmeex->address);
-               confirm->id = -1; /* or mlmeex->id ? */
-               confirm->state = 0; /* not used */
-               confirm->code = 0;
-               confirm->size = 6;
-               confirm->data[0] = 0x00;
-               confirm->data[1] = 0x00;
-               confirm->data[2] = 0x02;
-               confirm->data[3] = 0x00;
-               confirm->data[4] = 0x00;
-               confirm->data[5] = 0x00;
-
-               ret = mgt_set_varlen(priv, DOT11_OID_ASSOCIATEEX, confirm, 6);
-
-               kfree(confirm);
-               if (ret)
-                       return ret;
-               break;
-
-       case DOT11_OID_DISASSOCIATEEX:
-               send_formatted_event(priv, "Disassociate request (ex)", mlme, 0);
-               break;
-
-       case DOT11_OID_ASSOCIATEEX:
-               handle_request(priv, mlme, oid);
-               send_formatted_event(priv, "Associate request (ex)", mlme, 1);
-
-               if (priv->iw_mode != IW_MODE_MASTER
-                               && mlmeex->state != DOT11_STATE_ASSOCING)
-                       break;
-
-               confirm = kmalloc(sizeof(struct obj_mlmeex), GFP_ATOMIC);
-
-               if (!confirm)
-                       break;
-
-               memcpy(&confirm->address, mlmeex->address, ETH_ALEN);
-
-               confirm->id = ((struct obj_mlmeex *)mlme)->id;
-               confirm->state = 0; /* not used */
-               confirm->code = 0;
-
-               wpa_ie_len = prism54_wpa_bss_ie_get(priv, mlmeex->address, wpa_ie);
-
-               if (!wpa_ie_len) {
-                       printk(KERN_DEBUG "No WPA IE found from address:\t%pM\n",
-                              mlmeex->address);
-                       kfree(confirm);
-                       break;
-               }
-
-               confirm->size = wpa_ie_len;
-               memcpy(&confirm->data, wpa_ie, wpa_ie_len);
-
-               mgt_set_varlen(priv, oid, confirm, wpa_ie_len);
-
-               kfree(confirm);
-
-               break;
-
-       case DOT11_OID_REASSOCIATEEX:
-               handle_request(priv, mlme, oid);
-               send_formatted_event(priv, "Reassociate request (ex)", mlme, 1);
-
-               if (priv->iw_mode != IW_MODE_MASTER
-                               && mlmeex->state != DOT11_STATE_ASSOCING)
-                       break;
-
-               confirm = kmalloc(sizeof(struct obj_mlmeex), GFP_ATOMIC);
-
-               if (!confirm)
-                       break;
-
-               memcpy(&confirm->address, mlmeex->address, ETH_ALEN);
-
-               confirm->id = mlmeex->id;
-               confirm->state = 0; /* not used */
-               confirm->code = 0;
-
-               wpa_ie_len = prism54_wpa_bss_ie_get(priv, mlmeex->address, wpa_ie);
-
-               if (!wpa_ie_len) {
-                       printk(KERN_DEBUG "No WPA IE found from address:\t%pM\n",
-                              mlmeex->address);
-                       kfree(confirm);
-                       break;
-               }
-
-               confirm->size = wpa_ie_len;
-               memcpy(&confirm->data, wpa_ie, wpa_ie_len);
-
-               mgt_set_varlen(priv, oid, confirm, wpa_ie_len);
-
-               kfree(confirm);
-
-               break;
-
-       default:
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-/*
- * Process a device trap.  This is called via schedule_work(), outside of
- * interrupt context, no locks held.
- */
-void
-prism54_process_trap(struct work_struct *work)
-{
-       struct islpci_mgmtframe *frame =
-               container_of(work, struct islpci_mgmtframe, ws);
-       struct net_device *ndev = frame->ndev;
-       enum oid_num_t n = mgt_oidtonum(frame->header->oid);
-
-       if (n != OID_NUM_LAST)
-               prism54_process_trap_helper(netdev_priv(ndev), n, frame->data);
-       islpci_mgt_release(frame);
-}
-
-int
-prism54_set_mac_address(struct net_device *ndev, void *addr)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       int ret;
-
-       if (ndev->addr_len != 6)
-               return -EINVAL;
-       ret = mgt_set_request(priv, GEN_OID_MACADDRESS, 0,
-                             &((struct sockaddr *) addr)->sa_data);
-       if (!ret)
-               memcpy(priv->ndev->dev_addr,
-                      &((struct sockaddr *) addr)->sa_data, ETH_ALEN);
-
-       return ret;
-}
-
-#define PRISM54_SET_WPA                        SIOCIWFIRSTPRIV+12
-
-static int
-prism54_set_wpa(struct net_device *ndev, struct iw_request_info *info,
-               __u32 * uwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       u32 mlme, authen, dot1x, filter, wep;
-
-       if (islpci_get_state(priv) < PRV_STATE_INIT)
-               return 0;
-
-       wep = 1; /* For privacy invoked */
-       filter = 1; /* Filter out all unencrypted frames */
-       dot1x = 0x01; /* To enable eap filter */
-       mlme = DOT11_MLME_EXTENDED;
-       authen = DOT11_AUTH_OS; /* Only WEP uses _SK and _BOTH */
-
-       down_write(&priv->mib_sem);
-       priv->wpa = *uwrq;
-
-       switch (priv->wpa) {
-               default:
-               case 0: /* Clears/disables WPA and friends */
-                       wep = 0;
-                       filter = 0; /* Do not filter un-encrypted data */
-                       dot1x = 0;
-                       mlme = DOT11_MLME_AUTO;
-                       printk("%s: Disabling WPA\n", ndev->name);
-                       break;
-               case 2:
-               case 1: /* WPA */
-                       printk("%s: Enabling WPA\n", ndev->name);
-                       break;
-       }
-       up_write(&priv->mib_sem);
-
-       mgt_set_request(priv, DOT11_OID_AUTHENABLE, 0, &authen);
-       mgt_set_request(priv, DOT11_OID_PRIVACYINVOKED, 0, &wep);
-       mgt_set_request(priv, DOT11_OID_EXUNENCRYPTED, 0, &filter);
-       mgt_set_request(priv, DOT11_OID_DOT1XENABLE, 0, &dot1x);
-       mgt_set_request(priv, DOT11_OID_MLMEAUTOLEVEL, 0, &mlme);
-
-       return 0;
-}
-
-static int
-prism54_get_wpa(struct net_device *ndev, struct iw_request_info *info,
-               __u32 * uwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       *uwrq = priv->wpa;
-       return 0;
-}
-
-static int
-prism54_set_prismhdr(struct net_device *ndev, struct iw_request_info *info,
-                    __u32 * uwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       priv->monitor_type =
-           (*uwrq ? ARPHRD_IEEE80211_PRISM : ARPHRD_IEEE80211);
-       if (priv->iw_mode == IW_MODE_MONITOR)
-               priv->ndev->type = priv->monitor_type;
-
-       return 0;
-}
-
-static int
-prism54_get_prismhdr(struct net_device *ndev, struct iw_request_info *info,
-                    __u32 * uwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       *uwrq = (priv->monitor_type == ARPHRD_IEEE80211_PRISM);
-       return 0;
-}
-
-static int
-prism54_debug_oid(struct net_device *ndev, struct iw_request_info *info,
-                 __u32 * uwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-
-       priv->priv_oid = *uwrq;
-       printk("%s: oid 0x%08X\n", ndev->name, *uwrq);
-
-       return 0;
-}
-
-static int
-prism54_debug_get_oid(struct net_device *ndev, struct iw_request_info *info,
-                     struct iw_point *data, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct islpci_mgmtframe *response;
-       int ret = -EIO;
-
-       printk("%s: get_oid 0x%08X\n", ndev->name, priv->priv_oid);
-       data->length = 0;
-
-       if (islpci_get_state(priv) >= PRV_STATE_INIT) {
-               ret =
-                   islpci_mgt_transaction(priv->ndev, PIMFOR_OP_GET,
-                                          priv->priv_oid, extra, 256,
-                                          &response);
-               printk("%s: ret: %i\n", ndev->name, ret);
-               if (ret || !response
-                   || response->header->operation == PIMFOR_OP_ERROR) {
-                       if (response) {
-                               islpci_mgt_release(response);
-                       }
-                       printk("%s: EIO\n", ndev->name);
-                       ret = -EIO;
-               }
-               if (!ret) {
-                       data->length = response->header->length;
-                       memcpy(extra, response->data, data->length);
-                       islpci_mgt_release(response);
-                       printk("%s: len: %i\n", ndev->name, data->length);
-               }
-       }
-
-       return ret;
-}
-
-static int
-prism54_debug_set_oid(struct net_device *ndev, struct iw_request_info *info,
-                     struct iw_point *data, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       struct islpci_mgmtframe *response;
-       int ret = 0, response_op = PIMFOR_OP_ERROR;
-
-       printk("%s: set_oid 0x%08X\tlen: %d\n", ndev->name, priv->priv_oid,
-              data->length);
-
-       if (islpci_get_state(priv) >= PRV_STATE_INIT) {
-               ret =
-                   islpci_mgt_transaction(priv->ndev, PIMFOR_OP_SET,
-                                          priv->priv_oid, extra, data->length,
-                                          &response);
-               printk("%s: ret: %i\n", ndev->name, ret);
-               if (ret || !response
-                   || response->header->operation == PIMFOR_OP_ERROR) {
-                       if (response) {
-                               islpci_mgt_release(response);
-                       }
-                       printk("%s: EIO\n", ndev->name);
-                       ret = -EIO;
-               }
-               if (!ret) {
-                       response_op = response->header->operation;
-                       printk("%s: response_op: %i\n", ndev->name,
-                              response_op);
-                       islpci_mgt_release(response);
-               }
-       }
-
-       return (ret ? ret : -EINPROGRESS);
-}
-
-static int
-prism54_set_spy(struct net_device *ndev,
-               struct iw_request_info *info,
-               union iwreq_data *uwrq, char *extra)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       u32 u;
-       enum oid_num_t oid = OID_INL_CONFIG;
-
-       down_write(&priv->mib_sem);
-       mgt_get(priv, OID_INL_CONFIG, &u);
-
-       if ((uwrq->data.length == 0) && (priv->spy_data.spy_number > 0))
-               /* disable spy */
-               u &= ~INL_CONFIG_RXANNEX;
-       else if ((uwrq->data.length > 0) && (priv->spy_data.spy_number == 0))
-               /* enable spy */
-               u |= INL_CONFIG_RXANNEX;
-
-       mgt_set(priv, OID_INL_CONFIG, &u);
-       mgt_commit_list(priv, &oid, 1);
-       up_write(&priv->mib_sem);
-
-       return iw_handler_set_spy(ndev, info, uwrq, extra);
-}
-
-static const iw_handler prism54_handler[] = {
-       (iw_handler) prism54_commit,    /* SIOCSIWCOMMIT */
-       (iw_handler) prism54_get_name,  /* SIOCGIWNAME */
-       (iw_handler) NULL,      /* SIOCSIWNWID */
-       (iw_handler) NULL,      /* SIOCGIWNWID */
-       (iw_handler) prism54_set_freq,  /* SIOCSIWFREQ */
-       (iw_handler) prism54_get_freq,  /* SIOCGIWFREQ */
-       (iw_handler) prism54_set_mode,  /* SIOCSIWMODE */
-       (iw_handler) prism54_get_mode,  /* SIOCGIWMODE */
-       (iw_handler) prism54_set_sens,  /* SIOCSIWSENS */
-       (iw_handler) prism54_get_sens,  /* SIOCGIWSENS */
-       (iw_handler) NULL,      /* SIOCSIWRANGE */
-       (iw_handler) prism54_get_range, /* SIOCGIWRANGE */
-       (iw_handler) NULL,      /* SIOCSIWPRIV */
-       (iw_handler) NULL,      /* SIOCGIWPRIV */
-       (iw_handler) NULL,      /* SIOCSIWSTATS */
-       (iw_handler) NULL,      /* SIOCGIWSTATS */
-       prism54_set_spy,        /* SIOCSIWSPY */
-       iw_handler_get_spy,     /* SIOCGIWSPY */
-       iw_handler_set_thrspy,  /* SIOCSIWTHRSPY */
-       iw_handler_get_thrspy,  /* SIOCGIWTHRSPY */
-       (iw_handler) prism54_set_wap,   /* SIOCSIWAP */
-       (iw_handler) prism54_get_wap,   /* SIOCGIWAP */
-       (iw_handler) NULL,      /* -- hole -- */
-       (iw_handler) NULL,      /* SIOCGIWAPLIST deprecated */
-       (iw_handler) prism54_set_scan,  /* SIOCSIWSCAN */
-       (iw_handler) prism54_get_scan,  /* SIOCGIWSCAN */
-       (iw_handler) prism54_set_essid, /* SIOCSIWESSID */
-       (iw_handler) prism54_get_essid, /* SIOCGIWESSID */
-       (iw_handler) prism54_set_nick,  /* SIOCSIWNICKN */
-       (iw_handler) prism54_get_nick,  /* SIOCGIWNICKN */
-       (iw_handler) NULL,      /* -- hole -- */
-       (iw_handler) NULL,      /* -- hole -- */
-       (iw_handler) prism54_set_rate,  /* SIOCSIWRATE */
-       (iw_handler) prism54_get_rate,  /* SIOCGIWRATE */
-       (iw_handler) prism54_set_rts,   /* SIOCSIWRTS */
-       (iw_handler) prism54_get_rts,   /* SIOCGIWRTS */
-       (iw_handler) prism54_set_frag,  /* SIOCSIWFRAG */
-       (iw_handler) prism54_get_frag,  /* SIOCGIWFRAG */
-       (iw_handler) prism54_set_txpower,       /* SIOCSIWTXPOW */
-       (iw_handler) prism54_get_txpower,       /* SIOCGIWTXPOW */
-       (iw_handler) prism54_set_retry, /* SIOCSIWRETRY */
-       (iw_handler) prism54_get_retry, /* SIOCGIWRETRY */
-       (iw_handler) prism54_set_encode,        /* SIOCSIWENCODE */
-       (iw_handler) prism54_get_encode,        /* SIOCGIWENCODE */
-       (iw_handler) NULL,      /* SIOCSIWPOWER */
-       (iw_handler) NULL,      /* SIOCGIWPOWER */
-       NULL,                   /* -- hole -- */
-       NULL,                   /* -- hole -- */
-       (iw_handler) prism54_set_genie, /* SIOCSIWGENIE */
-       (iw_handler) prism54_get_genie, /* SIOCGIWGENIE */
-       (iw_handler) prism54_set_auth,  /* SIOCSIWAUTH */
-       (iw_handler) prism54_get_auth,  /* SIOCGIWAUTH */
-       (iw_handler) prism54_set_encodeext, /* SIOCSIWENCODEEXT */
-       (iw_handler) prism54_get_encodeext, /* SIOCGIWENCODEEXT */
-       NULL,                   /* SIOCSIWPMKSA */
-};
-
-/* The low order bit identify a SET (0) or a GET (1) ioctl.  */
-
-#define PRISM54_RESET          SIOCIWFIRSTPRIV
-#define PRISM54_GET_POLICY     SIOCIWFIRSTPRIV+1
-#define PRISM54_SET_POLICY     SIOCIWFIRSTPRIV+2
-#define PRISM54_GET_MAC                SIOCIWFIRSTPRIV+3
-#define PRISM54_ADD_MAC                SIOCIWFIRSTPRIV+4
-
-#define PRISM54_DEL_MAC                SIOCIWFIRSTPRIV+6
-
-#define PRISM54_KICK_MAC       SIOCIWFIRSTPRIV+8
-
-#define PRISM54_KICK_ALL       SIOCIWFIRSTPRIV+10
-
-#define PRISM54_GET_WPA                SIOCIWFIRSTPRIV+11
-#define PRISM54_SET_WPA                SIOCIWFIRSTPRIV+12
-
-#define PRISM54_DBG_OID                SIOCIWFIRSTPRIV+14
-#define PRISM54_DBG_GET_OID    SIOCIWFIRSTPRIV+15
-#define PRISM54_DBG_SET_OID    SIOCIWFIRSTPRIV+16
-
-#define PRISM54_GET_OID                SIOCIWFIRSTPRIV+17
-#define PRISM54_SET_OID_U32    SIOCIWFIRSTPRIV+18
-#define        PRISM54_SET_OID_STR     SIOCIWFIRSTPRIV+20
-#define        PRISM54_SET_OID_ADDR    SIOCIWFIRSTPRIV+22
-
-#define PRISM54_GET_PRISMHDR   SIOCIWFIRSTPRIV+23
-#define PRISM54_SET_PRISMHDR   SIOCIWFIRSTPRIV+24
-
-#define IWPRIV_SET_U32(n,x)    { n, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0, "s_"x }
-#define IWPRIV_SET_SSID(n,x)   { n, IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED | 1, 0, "s_"x }
-#define IWPRIV_SET_ADDR(n,x)   { n, IW_PRIV_TYPE_ADDR | IW_PRIV_SIZE_FIXED | 1, 0, "s_"x }
-#define IWPRIV_GET(n,x)        { n, 0, IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED | PRIV_STR_SIZE, "g_"x }
-
-#define IWPRIV_U32(n,x)                IWPRIV_SET_U32(n,x), IWPRIV_GET(n,x)
-#define IWPRIV_SSID(n,x)       IWPRIV_SET_SSID(n,x), IWPRIV_GET(n,x)
-#define IWPRIV_ADDR(n,x)       IWPRIV_SET_ADDR(n,x), IWPRIV_GET(n,x)
-
-/* Note : limited to 128 private ioctls (wireless tools 26) */
-
-static const struct iw_priv_args prism54_private_args[] = {
-/*{ cmd, set_args, get_args, name } */
-       {PRISM54_RESET, 0, 0, "reset"},
-       {PRISM54_GET_PRISMHDR, 0, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
-        "get_prismhdr"},
-       {PRISM54_SET_PRISMHDR, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0,
-        "set_prismhdr"},
-       {PRISM54_GET_POLICY, 0, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
-        "getPolicy"},
-       {PRISM54_SET_POLICY, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0,
-        "setPolicy"},
-       {PRISM54_GET_MAC, 0, IW_PRIV_TYPE_ADDR | 64, "getMac"},
-       {PRISM54_ADD_MAC, IW_PRIV_TYPE_ADDR | IW_PRIV_SIZE_FIXED | 1, 0,
-        "addMac"},
-       {PRISM54_DEL_MAC, IW_PRIV_TYPE_ADDR | IW_PRIV_SIZE_FIXED | 1, 0,
-        "delMac"},
-       {PRISM54_KICK_MAC, IW_PRIV_TYPE_ADDR | IW_PRIV_SIZE_FIXED | 1, 0,
-        "kickMac"},
-       {PRISM54_KICK_ALL, 0, 0, "kickAll"},
-       {PRISM54_GET_WPA, 0, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1,
-        "get_wpa"},
-       {PRISM54_SET_WPA, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0,
-        "set_wpa"},
-       {PRISM54_DBG_OID, IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0,
-        "dbg_oid"},
-       {PRISM54_DBG_GET_OID, 0, IW_PRIV_TYPE_BYTE | 256, "dbg_get_oid"},
-       {PRISM54_DBG_SET_OID, IW_PRIV_TYPE_BYTE | 256, 0, "dbg_set_oid"},
-       /* --- sub-ioctls handlers --- */
-       {PRISM54_GET_OID,
-        0, IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED | PRIV_STR_SIZE, ""},
-       {PRISM54_SET_OID_U32,
-        IW_PRIV_TYPE_INT | IW_PRIV_SIZE_FIXED | 1, 0, ""},
-       {PRISM54_SET_OID_STR,
-        IW_PRIV_TYPE_CHAR | IW_PRIV_SIZE_FIXED | 1, 0, ""},
-       {PRISM54_SET_OID_ADDR,
-        IW_PRIV_TYPE_ADDR | IW_PRIV_SIZE_FIXED | 1, 0, ""},
-       /* --- sub-ioctls definitions --- */
-       IWPRIV_ADDR(GEN_OID_MACADDRESS, "addr"),
-       IWPRIV_GET(GEN_OID_LINKSTATE, "linkstate"),
-       IWPRIV_U32(DOT11_OID_BSSTYPE, "bsstype"),
-       IWPRIV_ADDR(DOT11_OID_BSSID, "bssid"),
-       IWPRIV_U32(DOT11_OID_STATE, "state"),
-       IWPRIV_U32(DOT11_OID_AID, "aid"),
-
-       IWPRIV_SSID(DOT11_OID_SSIDOVERRIDE, "ssidoverride"),
-
-       IWPRIV_U32(DOT11_OID_MEDIUMLIMIT, "medlimit"),
-       IWPRIV_U32(DOT11_OID_BEACONPERIOD, "beacon"),
-       IWPRIV_U32(DOT11_OID_DTIMPERIOD, "dtimperiod"),
-
-       IWPRIV_U32(DOT11_OID_AUTHENABLE, "authenable"),
-       IWPRIV_U32(DOT11_OID_PRIVACYINVOKED, "privinvok"),
-       IWPRIV_U32(DOT11_OID_EXUNENCRYPTED, "exunencrypt"),
-
-       IWPRIV_U32(DOT11_OID_REKEYTHRESHOLD, "rekeythresh"),
-
-       IWPRIV_U32(DOT11_OID_MAXTXLIFETIME, "maxtxlife"),
-       IWPRIV_U32(DOT11_OID_MAXRXLIFETIME, "maxrxlife"),
-       IWPRIV_U32(DOT11_OID_ALOFT_FIXEDRATE, "fixedrate"),
-       IWPRIV_U32(DOT11_OID_MAXFRAMEBURST, "frameburst"),
-       IWPRIV_U32(DOT11_OID_PSM, "psm"),
-
-       IWPRIV_U32(DOT11_OID_BRIDGELOCAL, "bridge"),
-       IWPRIV_U32(DOT11_OID_CLIENTS, "clients"),
-       IWPRIV_U32(DOT11_OID_CLIENTSASSOCIATED, "clientassoc"),
-       IWPRIV_U32(DOT11_OID_DOT1XENABLE, "dot1xenable"),
-       IWPRIV_U32(DOT11_OID_ANTENNARX, "rxant"),
-       IWPRIV_U32(DOT11_OID_ANTENNATX, "txant"),
-       IWPRIV_U32(DOT11_OID_ANTENNADIVERSITY, "antdivers"),
-       IWPRIV_U32(DOT11_OID_EDTHRESHOLD, "edthresh"),
-       IWPRIV_U32(DOT11_OID_PREAMBLESETTINGS, "preamble"),
-       IWPRIV_GET(DOT11_OID_RATES, "rates"),
-       IWPRIV_U32(DOT11_OID_OUTPUTPOWER, ".11outpower"),
-       IWPRIV_GET(DOT11_OID_SUPPORTEDRATES, "supprates"),
-       IWPRIV_GET(DOT11_OID_SUPPORTEDFREQUENCIES, "suppfreq"),
-
-       IWPRIV_U32(DOT11_OID_NOISEFLOOR, "noisefloor"),
-       IWPRIV_GET(DOT11_OID_FREQUENCYACTIVITY, "freqactivity"),
-       IWPRIV_U32(DOT11_OID_NONERPPROTECTION, "nonerpprotec"),
-       IWPRIV_U32(DOT11_OID_PROFILES, "profile"),
-       IWPRIV_GET(DOT11_OID_EXTENDEDRATES, "extrates"),
-       IWPRIV_U32(DOT11_OID_MLMEAUTOLEVEL, "mlmelevel"),
-
-       IWPRIV_GET(DOT11_OID_BSSS, "bsss"),
-       IWPRIV_GET(DOT11_OID_BSSLIST, "bsslist"),
-       IWPRIV_U32(OID_INL_MODE, "mode"),
-       IWPRIV_U32(OID_INL_CONFIG, "config"),
-       IWPRIV_U32(OID_INL_DOT11D_CONFORMANCE, ".11dconform"),
-       IWPRIV_GET(OID_INL_PHYCAPABILITIES, "phycapa"),
-       IWPRIV_U32(OID_INL_OUTPUTPOWER, "outpower"),
-};
-
-static const iw_handler prism54_private_handler[] = {
-       (iw_handler) prism54_reset,
-       (iw_handler) prism54_get_policy,
-       (iw_handler) prism54_set_policy,
-       (iw_handler) prism54_get_mac,
-       (iw_handler) prism54_add_mac,
-       (iw_handler) NULL,
-       (iw_handler) prism54_del_mac,
-       (iw_handler) NULL,
-       (iw_handler) prism54_kick_mac,
-       (iw_handler) NULL,
-       (iw_handler) prism54_kick_all,
-       (iw_handler) prism54_get_wpa,
-       (iw_handler) prism54_set_wpa,
-       (iw_handler) NULL,
-       (iw_handler) prism54_debug_oid,
-       (iw_handler) prism54_debug_get_oid,
-       (iw_handler) prism54_debug_set_oid,
-       (iw_handler) prism54_get_oid,
-       (iw_handler) prism54_set_u32,
-       (iw_handler) NULL,
-       (iw_handler) prism54_set_raw,
-       (iw_handler) NULL,
-       (iw_handler) prism54_set_raw,
-       (iw_handler) prism54_get_prismhdr,
-       (iw_handler) prism54_set_prismhdr,
-};
-
-const struct iw_handler_def prism54_handler_def = {
-       .num_standard = ARRAY_SIZE(prism54_handler),
-       .num_private = ARRAY_SIZE(prism54_private_handler),
-       .num_private_args = ARRAY_SIZE(prism54_private_args),
-       .standard = (iw_handler *) prism54_handler,
-       .private = (iw_handler *) prism54_private_handler,
-       .private_args = (struct iw_priv_args *) prism54_private_args,
-       .get_wireless_stats = prism54_get_wireless_stats,
-};
diff --git a/drivers/net/wireless/intersil/prism54/isl_ioctl.h b/drivers/net/wireless/intersil/prism54/isl_ioctl.h
deleted file mode 100644 (file)
index 3f85fd7..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2002 Intersil Americas Inc.
- *            (C) 2003 Aurelien Alleaume <slts@free.fr>
- *            (C) 2003 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- */
-
-#ifndef _ISL_IOCTL_H
-#define _ISL_IOCTL_H
-
-#include "islpci_mgt.h"
-#include "islpci_dev.h"
-
-#include <net/iw_handler.h>    /* New driver API */
-
-#define SUPPORTED_WIRELESS_EXT                  19
-
-void prism54_mib_init(islpci_private *);
-
-struct iw_statistics *prism54_get_wireless_stats(struct net_device *);
-void prism54_update_stats(struct work_struct *);
-
-void prism54_acl_init(struct islpci_acl *);
-void prism54_acl_clean(struct islpci_acl *);
-
-void prism54_process_trap(struct work_struct *);
-
-void prism54_wpa_bss_ie_init(islpci_private *priv);
-void prism54_wpa_bss_ie_clean(islpci_private *priv);
-
-int prism54_set_mac_address(struct net_device *, void *);
-
-extern const struct iw_handler_def prism54_handler_def;
-
-#endif                         /* _ISL_IOCTL_H */
diff --git a/drivers/net/wireless/intersil/prism54/isl_oid.h b/drivers/net/wireless/intersil/prism54/isl_oid.h
deleted file mode 100644 (file)
index b889bb7..0000000
+++ /dev/null
@@ -1,492 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2003 Herbert Valerio Riedel <hvr@gnu.org>
- *  Copyright (C) 2004 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- *  Copyright (C) 2004 Aurelien Alleaume <slts@free.fr>
- */
-
-#if !defined(_ISL_OID_H)
-#define _ISL_OID_H
-
-/*
- * MIB related constant and structure definitions for communicating
- * with the device firmware
- */
-
-struct obj_ssid {
-       u8 length;
-       char octets[33];
-} __packed;
-
-struct obj_key {
-       u8 type;                /* dot11_priv_t */
-       u8 length;
-       char key[32];
-} __packed;
-
-struct obj_mlme {
-       u8 address[6];
-       u16 id;
-       u16 state;
-       u16 code;
-} __packed;
-
-struct obj_mlmeex {
-       u8 address[6];
-       u16 id;
-       u16 state;
-       u16 code;
-       u16 size;
-       u8 data[];
-} __packed;
-
-struct obj_buffer {
-       u32 size;
-       u32 addr;               /* 32bit bus address */
-} __packed;
-
-struct obj_bss {
-       u8 address[6];
-       int:16;                 /* padding */
-
-       char state;
-       char reserved;
-       short age;
-
-       char quality;
-       char rssi;
-
-       struct obj_ssid ssid;
-       short channel;
-       char beacon_period;
-       char dtim_period;
-       short capinfo;
-       short rates;
-       short basic_rates;
-       int:16;                 /* padding */
-} __packed;
-
-struct obj_bsslist {
-       u32 nr;
-       struct obj_bss bsslist[];
-} __packed;
-
-struct obj_frequencies {
-       u16 nr;
-       u16 mhz[];
-} __packed;
-
-struct obj_attachment {
-       char type;
-       char reserved;
-       short id;
-       short size;
-       char data[];
-} __packed;
-
-/*
- * in case everything's ok, the inlined function below will be
- * optimized away by the compiler...
- */
-static inline void
-__bug_on_wrong_struct_sizes(void)
-{
-       BUILD_BUG_ON(sizeof (struct obj_ssid) != 34);
-       BUILD_BUG_ON(sizeof (struct obj_key) != 34);
-       BUILD_BUG_ON(sizeof (struct obj_mlme) != 12);
-       BUILD_BUG_ON(sizeof (struct obj_mlmeex) != 14);
-       BUILD_BUG_ON(sizeof (struct obj_buffer) != 8);
-       BUILD_BUG_ON(sizeof (struct obj_bss) != 60);
-       BUILD_BUG_ON(sizeof (struct obj_bsslist) != 4);
-       BUILD_BUG_ON(sizeof (struct obj_frequencies) != 2);
-}
-
-enum dot11_state_t {
-       DOT11_STATE_NONE = 0,
-       DOT11_STATE_AUTHING = 1,
-       DOT11_STATE_AUTH = 2,
-       DOT11_STATE_ASSOCING = 3,
-
-       DOT11_STATE_ASSOC = 5,
-       DOT11_STATE_IBSS = 6,
-       DOT11_STATE_WDS = 7
-};
-
-enum dot11_bsstype_t {
-       DOT11_BSSTYPE_NONE = 0,
-       DOT11_BSSTYPE_INFRA = 1,
-       DOT11_BSSTYPE_IBSS = 2,
-       DOT11_BSSTYPE_ANY = 3
-};
-
-enum dot11_auth_t {
-       DOT11_AUTH_NONE = 0,
-       DOT11_AUTH_OS = 1,
-       DOT11_AUTH_SK = 2,
-       DOT11_AUTH_BOTH = 3
-};
-
-enum dot11_mlme_t {
-       DOT11_MLME_AUTO = 0,
-       DOT11_MLME_INTERMEDIATE = 1,
-       DOT11_MLME_EXTENDED = 2
-};
-
-enum dot11_priv_t {
-       DOT11_PRIV_WEP = 0,
-       DOT11_PRIV_TKIP = 1
-};
-
-/* Prism "Nitro" / Frameburst / "Packet Frame Grouping"
- * Value is in microseconds. Represents the # microseconds
- * the firmware will take to group frames before sending out then out
- * together with a CSMA contention. Without this all frames are
- * sent with a CSMA contention.
- * Bibliography:
- * https://www.hpl.hp.com/personal/Jean_Tourrilhes/Papers/Packet.Frame.Grouping.html
- */
-enum dot11_maxframeburst_t {
-       /* Values for DOT11_OID_MAXFRAMEBURST */
-       DOT11_MAXFRAMEBURST_OFF = 0, /* Card firmware default */
-       DOT11_MAXFRAMEBURST_MIXED_SAFE = 650, /* 802.11 a,b,g safe */
-       DOT11_MAXFRAMEBURST_IDEAL = 1300, /* Theoretical ideal level */
-       DOT11_MAXFRAMEBURST_MAX = 5000, /* Use this as max,
-               * Note: firmware allows for greater values. This is a
-               * recommended max. I'll update this as I find
-               * out what the real MAX is. Also note that you don't necessarily
-               * get better results with a greater value here.
-               */
-};
-
-/* Support for 802.11 long and short frame preambles.
- * Long         preamble uses 128-bit sync field, 8-bit  CRC
- * Short preamble uses 56-bit  sync field, 16-bit CRC
- *
- * 802.11a -- not sure, both optionally ?
- * 802.11b supports long and optionally short
- * 802.11g supports both */
-enum dot11_preamblesettings_t {
-       DOT11_PREAMBLESETTING_LONG = 0,
-               /* Allows *only* long 802.11 preambles */
-       DOT11_PREAMBLESETTING_SHORT = 1,
-               /* Allows *only* short 802.11 preambles */
-       DOT11_PREAMBLESETTING_DYNAMIC = 2
-               /* AutomatiGically set */
-};
-
-/* Support for 802.11 slot timing (time between packets).
- *
- * Long uses 802.11a slot timing  (9 usec ?)
- * Short uses 802.11b slot timing (20 use ?) */
-enum dot11_slotsettings_t {
-       DOT11_SLOTSETTINGS_LONG = 0,
-               /* Allows *only* long 802.11b slot timing */
-       DOT11_SLOTSETTINGS_SHORT = 1,
-               /* Allows *only* long 802.11a slot timing */
-       DOT11_SLOTSETTINGS_DYNAMIC = 2
-               /* AutomatiGically set */
-};
-
-/* All you need to know, ERP is "Extended Rate PHY".
- * An Extended Rate PHY (ERP) STA or AP shall support three different
- * preamble and header formats:
- * Long  preamble (refer to above)
- * Short preamble (refer to above)
- * OFDM  preamble ( ? )
- *
- * I'm assuming here Protection tells the AP
- * to be careful, a STA which cannot handle the long pre-amble
- * has joined.
- */
-enum do11_nonerpstatus_t {
-       DOT11_ERPSTAT_NONEPRESENT = 0,
-       DOT11_ERPSTAT_USEPROTECTION = 1
-};
-
-/* (ERP is "Extended Rate PHY") Way to read NONERP is NON-ERP-*
- * The key here is DOT11 NON ERP NEVER protects against
- * NON ERP STA's. You *don't* want this unless
- * you know what you are doing. It means you will only
- * get Extended Rate capabilities */
-enum dot11_nonerpprotection_t {
-       DOT11_NONERP_NEVER = 0,
-       DOT11_NONERP_ALWAYS = 1,
-       DOT11_NONERP_DYNAMIC = 2
-};
-
-/* Preset OID configuration for 802.11 modes
- * Note: DOT11_OID_CW[MIN|MAX] hold the values of the
- * DCS MIN|MAX backoff used */
-enum dot11_profile_t { /* And set/allowed values */
-       /* Allowed values for DOT11_OID_PROFILES */
-       DOT11_PROFILE_B_ONLY = 0,
-               /* DOT11_OID_RATES: 1, 2, 5.5, 11Mbps
-                * DOT11_OID_PREAMBLESETTINGS: DOT11_PREAMBLESETTING_DYNAMIC
-                * DOT11_OID_CWMIN: 31
-                * DOT11_OID_NONEPROTECTION: DOT11_NOERP_DYNAMIC
-                * DOT11_OID_SLOTSETTINGS: DOT11_SLOTSETTINGS_LONG
-                */
-       DOT11_PROFILE_MIXED_G_WIFI = 1,
-               /* DOT11_OID_RATES: 1, 2, 5.5, 11, 6, 9, 12, 18, 24, 36, 48, 54Mbs
-                * DOT11_OID_PREAMBLESETTINGS: DOT11_PREAMBLESETTING_DYNAMIC
-                * DOT11_OID_CWMIN: 15
-                * DOT11_OID_NONEPROTECTION: DOT11_NOERP_DYNAMIC
-                * DOT11_OID_SLOTSETTINGS: DOT11_SLOTSETTINGS_DYNAMIC
-                */
-       DOT11_PROFILE_MIXED_LONG = 2, /* "Long range" */
-               /* Same as Profile MIXED_G_WIFI */
-       DOT11_PROFILE_G_ONLY = 3,
-               /* Same as Profile MIXED_G_WIFI */
-       DOT11_PROFILE_TEST = 4,
-               /* Same as Profile MIXED_G_WIFI except:
-                * DOT11_OID_PREAMBLESETTINGS: DOT11_PREAMBLESETTING_SHORT
-                * DOT11_OID_NONEPROTECTION: DOT11_NOERP_NEVER
-                * DOT11_OID_SLOTSETTINGS: DOT11_SLOTSETTINGS_SHORT
-                */
-       DOT11_PROFILE_B_WIFI = 5,
-               /* Same as Profile B_ONLY */
-       DOT11_PROFILE_A_ONLY = 6,
-               /* Same as Profile MIXED_G_WIFI except:
-                * DOT11_OID_RATES: 6, 9, 12, 18, 24, 36, 48, 54Mbs
-                */
-       DOT11_PROFILE_MIXED_SHORT = 7
-               /* Same as MIXED_G_WIFI */
-};
-
-
-/* The dot11d conformance level configures the 802.11d conformance levels.
- * The following conformance levels exist:*/
-enum oid_inl_conformance_t {
-       OID_INL_CONFORMANCE_NONE = 0,   /* Perform active scanning */
-       OID_INL_CONFORMANCE_STRICT = 1, /* Strictly adhere to 802.11d */
-       OID_INL_CONFORMANCE_FLEXIBLE = 2,       /* Use passed 802.11d info to
-               * determine channel AND/OR just make assumption that active
-               * channels are valid  channels */
-};
-
-enum oid_inl_mode_t {
-       INL_MODE_NONE = -1,
-       INL_MODE_PROMISCUOUS = 0,
-       INL_MODE_CLIENT = 1,
-       INL_MODE_AP = 2,
-       INL_MODE_SNIFFER = 3
-};
-
-enum oid_inl_config_t {
-       INL_CONFIG_NOTHING = 0x00,
-       INL_CONFIG_MANUALRUN = 0x01,
-       INL_CONFIG_FRAMETRAP = 0x02,
-       INL_CONFIG_RXANNEX = 0x04,
-       INL_CONFIG_TXANNEX = 0x08,
-       INL_CONFIG_WDS = 0x10
-};
-
-enum oid_inl_phycap_t {
-       INL_PHYCAP_2400MHZ = 1,
-       INL_PHYCAP_5000MHZ = 2,
-       INL_PHYCAP_FAA = 0x80000000,    /* Means card supports the FAA switch */
-};
-
-
-enum oid_num_t {
-       GEN_OID_MACADDRESS = 0,
-       GEN_OID_LINKSTATE,
-       GEN_OID_WATCHDOG,
-       GEN_OID_MIBOP,
-       GEN_OID_OPTIONS,
-       GEN_OID_LEDCONFIG,
-
-       /* 802.11 */
-       DOT11_OID_BSSTYPE,
-       DOT11_OID_BSSID,
-       DOT11_OID_SSID,
-       DOT11_OID_STATE,
-       DOT11_OID_AID,
-       DOT11_OID_COUNTRYSTRING,
-       DOT11_OID_SSIDOVERRIDE,
-
-       DOT11_OID_MEDIUMLIMIT,
-       DOT11_OID_BEACONPERIOD,
-       DOT11_OID_DTIMPERIOD,
-       DOT11_OID_ATIMWINDOW,
-       DOT11_OID_LISTENINTERVAL,
-       DOT11_OID_CFPPERIOD,
-       DOT11_OID_CFPDURATION,
-
-       DOT11_OID_AUTHENABLE,
-       DOT11_OID_PRIVACYINVOKED,
-       DOT11_OID_EXUNENCRYPTED,
-       DOT11_OID_DEFKEYID,
-       DOT11_OID_DEFKEYX,      /* DOT11_OID_DEFKEY1,...DOT11_OID_DEFKEY4 */
-       DOT11_OID_STAKEY,
-       DOT11_OID_REKEYTHRESHOLD,
-       DOT11_OID_STASC,
-
-       DOT11_OID_PRIVTXREJECTED,
-       DOT11_OID_PRIVRXPLAIN,
-       DOT11_OID_PRIVRXFAILED,
-       DOT11_OID_PRIVRXNOKEY,
-
-       DOT11_OID_RTSTHRESH,
-       DOT11_OID_FRAGTHRESH,
-       DOT11_OID_SHORTRETRIES,
-       DOT11_OID_LONGRETRIES,
-       DOT11_OID_MAXTXLIFETIME,
-       DOT11_OID_MAXRXLIFETIME,
-       DOT11_OID_AUTHRESPTIMEOUT,
-       DOT11_OID_ASSOCRESPTIMEOUT,
-
-       DOT11_OID_ALOFT_TABLE,
-       DOT11_OID_ALOFT_CTRL_TABLE,
-       DOT11_OID_ALOFT_RETREAT,
-       DOT11_OID_ALOFT_PROGRESS,
-       DOT11_OID_ALOFT_FIXEDRATE,
-       DOT11_OID_ALOFT_RSSIGRAPH,
-       DOT11_OID_ALOFT_CONFIG,
-
-       DOT11_OID_VDCFX,
-       DOT11_OID_MAXFRAMEBURST,
-
-       DOT11_OID_PSM,
-       DOT11_OID_CAMTIMEOUT,
-       DOT11_OID_RECEIVEDTIMS,
-       DOT11_OID_ROAMPREFERENCE,
-
-       DOT11_OID_BRIDGELOCAL,
-       DOT11_OID_CLIENTS,
-       DOT11_OID_CLIENTSASSOCIATED,
-       DOT11_OID_CLIENTX,      /* DOT11_OID_CLIENTX,...DOT11_OID_CLIENT2007 */
-
-       DOT11_OID_CLIENTFIND,
-       DOT11_OID_WDSLINKADD,
-       DOT11_OID_WDSLINKREMOVE,
-       DOT11_OID_EAPAUTHSTA,
-       DOT11_OID_EAPUNAUTHSTA,
-       DOT11_OID_DOT1XENABLE,
-       DOT11_OID_MICFAILURE,
-       DOT11_OID_REKEYINDICATE,
-
-       DOT11_OID_MPDUTXSUCCESSFUL,
-       DOT11_OID_MPDUTXONERETRY,
-       DOT11_OID_MPDUTXMULTIPLERETRIES,
-       DOT11_OID_MPDUTXFAILED,
-       DOT11_OID_MPDURXSUCCESSFUL,
-       DOT11_OID_MPDURXDUPS,
-       DOT11_OID_RTSSUCCESSFUL,
-       DOT11_OID_RTSFAILED,
-       DOT11_OID_ACKFAILED,
-       DOT11_OID_FRAMERECEIVES,
-       DOT11_OID_FRAMEERRORS,
-       DOT11_OID_FRAMEABORTS,
-       DOT11_OID_FRAMEABORTSPHY,
-
-       DOT11_OID_SLOTTIME,
-       DOT11_OID_CWMIN, /* MIN DCS backoff */
-       DOT11_OID_CWMAX, /* MAX DCS backoff */
-       DOT11_OID_ACKWINDOW,
-       DOT11_OID_ANTENNARX,
-       DOT11_OID_ANTENNATX,
-       DOT11_OID_ANTENNADIVERSITY,
-       DOT11_OID_CHANNEL,
-       DOT11_OID_EDTHRESHOLD,
-       DOT11_OID_PREAMBLESETTINGS,
-       DOT11_OID_RATES,
-       DOT11_OID_CCAMODESUPPORTED,
-       DOT11_OID_CCAMODE,
-       DOT11_OID_RSSIVECTOR,
-       DOT11_OID_OUTPUTPOWERTABLE,
-       DOT11_OID_OUTPUTPOWER,
-       DOT11_OID_SUPPORTEDRATES,
-       DOT11_OID_FREQUENCY,
-       DOT11_OID_SUPPORTEDFREQUENCIES,
-       DOT11_OID_NOISEFLOOR,
-       DOT11_OID_FREQUENCYACTIVITY,
-       DOT11_OID_IQCALIBRATIONTABLE,
-       DOT11_OID_NONERPPROTECTION,
-       DOT11_OID_SLOTSETTINGS,
-       DOT11_OID_NONERPTIMEOUT,
-       DOT11_OID_PROFILES,
-       DOT11_OID_EXTENDEDRATES,
-
-       DOT11_OID_DEAUTHENTICATE,
-       DOT11_OID_AUTHENTICATE,
-       DOT11_OID_DISASSOCIATE,
-       DOT11_OID_ASSOCIATE,
-       DOT11_OID_SCAN,
-       DOT11_OID_BEACON,
-       DOT11_OID_PROBE,
-       DOT11_OID_DEAUTHENTICATEEX,
-       DOT11_OID_AUTHENTICATEEX,
-       DOT11_OID_DISASSOCIATEEX,
-       DOT11_OID_ASSOCIATEEX,
-       DOT11_OID_REASSOCIATE,
-       DOT11_OID_REASSOCIATEEX,
-
-       DOT11_OID_NONERPSTATUS,
-
-       DOT11_OID_STATIMEOUT,
-       DOT11_OID_MLMEAUTOLEVEL,
-       DOT11_OID_BSSTIMEOUT,
-       DOT11_OID_ATTACHMENT,
-       DOT11_OID_PSMBUFFER,
-
-       DOT11_OID_BSSS,
-       DOT11_OID_BSSX,         /*DOT11_OID_BSS1,...,DOT11_OID_BSS64 */
-       DOT11_OID_BSSFIND,
-       DOT11_OID_BSSLIST,
-
-       OID_INL_TUNNEL,
-       OID_INL_MEMADDR,
-       OID_INL_MEMORY,
-       OID_INL_MODE,
-       OID_INL_COMPONENT_NR,
-       OID_INL_VERSION,
-       OID_INL_INTERFACE_ID,
-       OID_INL_COMPONENT_ID,
-       OID_INL_CONFIG,
-       OID_INL_DOT11D_CONFORMANCE,
-       OID_INL_PHYCAPABILITIES,
-       OID_INL_OUTPUTPOWER,
-
-       OID_NUM_LAST
-};
-
-#define OID_FLAG_CACHED                0x80
-#define OID_FLAG_TYPE          0x7f
-
-#define OID_TYPE_U32           0x01
-#define OID_TYPE_SSID          0x02
-#define OID_TYPE_KEY           0x03
-#define OID_TYPE_BUFFER                0x04
-#define OID_TYPE_BSS           0x05
-#define OID_TYPE_BSSLIST       0x06
-#define OID_TYPE_FREQUENCIES   0x07
-#define OID_TYPE_MLME          0x08
-#define OID_TYPE_MLMEEX                0x09
-#define OID_TYPE_ADDR          0x0A
-#define OID_TYPE_RAW           0x0B
-#define OID_TYPE_ATTACH                0x0C
-
-/* OID_TYPE_MLMEEX is special because of a variable size field when sending.
- * Not yet implemented (not used in driver anyway).
- */
-
-struct oid_t {
-       enum oid_num_t oid;
-       short range;            /* to define a range of oid */
-       short size;             /* max size of the associated data */
-       char flags;
-};
-
-union oid_res_t {
-       void *ptr;
-       u32 u;
-};
-
-#define        IWMAX_BITRATES  20
-#define        IWMAX_BSS       24
-#define IWMAX_FREQ     30
-#define PRIV_STR_SIZE  1024
-
-#endif                         /* !defined(_ISL_OID_H) */
-/* EOF */
diff --git a/drivers/net/wireless/intersil/prism54/islpci_dev.c b/drivers/net/wireless/intersil/prism54/islpci_dev.c
deleted file mode 100644 (file)
index 8eb6d5e..0000000
+++ /dev/null
@@ -1,951 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 2002 Intersil Americas Inc.
- *  Copyright (C) 2003 Herbert Valerio Riedel <hvr@gnu.org>
- *  Copyright (C) 2003 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- */
-
-#include <linux/hardirq.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-
-#include <linux/netdevice.h>
-#include <linux/ethtool.h>
-#include <linux/pci.h>
-#include <linux/sched.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/if_arp.h>
-
-#include <asm/io.h>
-
-#include "prismcompat.h"
-#include "isl_38xx.h"
-#include "isl_ioctl.h"
-#include "islpci_dev.h"
-#include "islpci_mgt.h"
-#include "islpci_eth.h"
-#include "oid_mgt.h"
-
-#define ISL3877_IMAGE_FILE     "isl3877"
-#define ISL3886_IMAGE_FILE     "isl3886"
-#define ISL3890_IMAGE_FILE     "isl3890"
-MODULE_FIRMWARE(ISL3877_IMAGE_FILE);
-MODULE_FIRMWARE(ISL3886_IMAGE_FILE);
-MODULE_FIRMWARE(ISL3890_IMAGE_FILE);
-
-static int prism54_bring_down(islpci_private *);
-static int islpci_alloc_memory(islpci_private *);
-
-/* Temporary dummy MAC address to use until firmware is loaded.
- * The idea there is that some tools (such as nameif) may query
- * the MAC address before the netdev is 'open'. By using a valid
- * OUI prefix, they can process the netdev properly.
- * Of course, this is not the final/real MAC address. It doesn't
- * matter, as you are suppose to be able to change it anytime via
- * ndev->set_mac_address. Jean II */
-static const unsigned char     dummy_mac[6] = { 0x00, 0x30, 0xB4, 0x00, 0x00, 0x00 };
-
-static int
-isl_upload_firmware(islpci_private *priv)
-{
-       u32 reg, rc;
-       void __iomem *device_base = priv->device_base;
-
-       /* clear the RAMBoot and the Reset bit */
-       reg = readl(device_base + ISL38XX_CTRL_STAT_REG);
-       reg &= ~ISL38XX_CTRL_STAT_RESET;
-       reg &= ~ISL38XX_CTRL_STAT_RAMBOOT;
-       writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
-       wmb();
-       udelay(ISL38XX_WRITEIO_DELAY);
-
-       /* set the Reset bit without reading the register ! */
-       reg |= ISL38XX_CTRL_STAT_RESET;
-       writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
-       wmb();
-       udelay(ISL38XX_WRITEIO_DELAY);
-
-       /* clear the Reset bit */
-       reg &= ~ISL38XX_CTRL_STAT_RESET;
-       writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
-       wmb();
-
-       /* wait a while for the device to reboot */
-       mdelay(50);
-
-       {
-               const struct firmware *fw_entry = NULL;
-               long fw_len;
-               const u32 *fw_ptr;
-
-               rc = request_firmware(&fw_entry, priv->firmware, PRISM_FW_PDEV);
-               if (rc) {
-                       printk(KERN_ERR
-                              "%s: request_firmware() failed for '%s'\n",
-                              "prism54", priv->firmware);
-                       return rc;
-               }
-               /* prepare the Direct Memory Base register */
-               reg = ISL38XX_DEV_FIRMWARE_ADDRES;
-
-               fw_ptr = (u32 *) fw_entry->data;
-               fw_len = fw_entry->size;
-
-               if (fw_len % 4) {
-                       printk(KERN_ERR
-                              "%s: firmware '%s' size is not multiple of 32bit, aborting!\n",
-                              "prism54", priv->firmware);
-                       release_firmware(fw_entry);
-                       return -EILSEQ; /* Illegal byte sequence  */;
-               }
-
-               while (fw_len > 0) {
-                       long _fw_len =
-                           (fw_len >
-                            ISL38XX_MEMORY_WINDOW_SIZE) ?
-                           ISL38XX_MEMORY_WINDOW_SIZE : fw_len;
-                       u32 __iomem *dev_fw_ptr = device_base + ISL38XX_DIRECT_MEM_WIN;
-
-                       /* set the card's base address for writing the data */
-                       isl38xx_w32_flush(device_base, reg,
-                                         ISL38XX_DIR_MEM_BASE_REG);
-                       wmb();  /* be paranoid */
-
-                       /* increment the write address for next iteration */
-                       reg += _fw_len;
-                       fw_len -= _fw_len;
-
-                       /* write the data to the Direct Memory Window 32bit-wise */
-                       /* memcpy_toio() doesn't guarantee 32bit writes :-| */
-                       while (_fw_len > 0) {
-                               /* use non-swapping writel() */
-                               __raw_writel(*fw_ptr, dev_fw_ptr);
-                               fw_ptr++, dev_fw_ptr++;
-                               _fw_len -= 4;
-                       }
-
-                       /* flush PCI posting */
-                       (void) readl(device_base + ISL38XX_PCI_POSTING_FLUSH);
-                       wmb();  /* be paranoid again */
-
-                       BUG_ON(_fw_len != 0);
-               }
-
-               BUG_ON(fw_len != 0);
-
-               /* Firmware version is at offset 40 (also for "newmac") */
-               printk(KERN_DEBUG "%s: firmware version: %.8s\n",
-                      priv->ndev->name, fw_entry->data + 40);
-
-               release_firmware(fw_entry);
-       }
-
-       /* now reset the device
-        * clear the Reset & ClkRun bit, set the RAMBoot bit */
-       reg = readl(device_base + ISL38XX_CTRL_STAT_REG);
-       reg &= ~ISL38XX_CTRL_STAT_CLKRUN;
-       reg &= ~ISL38XX_CTRL_STAT_RESET;
-       reg |= ISL38XX_CTRL_STAT_RAMBOOT;
-       isl38xx_w32_flush(device_base, reg, ISL38XX_CTRL_STAT_REG);
-       wmb();
-       udelay(ISL38XX_WRITEIO_DELAY);
-
-       /* set the reset bit latches the host override and RAMBoot bits
-        * into the device for operation when the reset bit is reset */
-       reg |= ISL38XX_CTRL_STAT_RESET;
-       writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
-       /* don't do flush PCI posting here! */
-       wmb();
-       udelay(ISL38XX_WRITEIO_DELAY);
-
-       /* clear the reset bit should start the whole circus */
-       reg &= ~ISL38XX_CTRL_STAT_RESET;
-       writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
-       /* don't do flush PCI posting here! */
-       wmb();
-       udelay(ISL38XX_WRITEIO_DELAY);
-
-       return 0;
-}
-
-/******************************************************************************
-    Device Interrupt Handler
-******************************************************************************/
-
-irqreturn_t
-islpci_interrupt(int irq, void *config)
-{
-       u32 reg;
-       islpci_private *priv = config;
-       struct net_device *ndev = priv->ndev;
-       void __iomem *device = priv->device_base;
-       int powerstate = ISL38XX_PSM_POWERSAVE_STATE;
-
-       /* lock the interrupt handler */
-       spin_lock(&priv->slock);
-
-       /* received an interrupt request on a shared IRQ line
-        * first check whether the device is in sleep mode */
-       reg = readl(device + ISL38XX_CTRL_STAT_REG);
-       if (reg & ISL38XX_CTRL_STAT_SLEEPMODE)
-               /* device is in sleep mode, IRQ was generated by someone else */
-       {
-#if VERBOSE > SHOW_ERROR_MESSAGES
-               DEBUG(SHOW_TRACING, "Assuming someone else called the IRQ\n");
-#endif
-               spin_unlock(&priv->slock);
-               return IRQ_NONE;
-       }
-
-
-       /* check whether there is any source of interrupt on the device */
-       reg = readl(device + ISL38XX_INT_IDENT_REG);
-
-       /* also check the contents of the Interrupt Enable Register, because this
-        * will filter out interrupt sources from other devices on the same irq ! */
-       reg &= readl(device + ISL38XX_INT_EN_REG);
-       reg &= ISL38XX_INT_SOURCES;
-
-       if (reg != 0) {
-               if (islpci_get_state(priv) != PRV_STATE_SLEEP)
-                       powerstate = ISL38XX_PSM_ACTIVE_STATE;
-
-               /* reset the request bits in the Identification register */
-               isl38xx_w32_flush(device, reg, ISL38XX_INT_ACK_REG);
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-               DEBUG(SHOW_FUNCTION_CALLS,
-                     "IRQ: Identification register 0x%p 0x%x\n", device, reg);
-#endif
-
-               /* check for each bit in the register separately */
-               if (reg & ISL38XX_INT_IDENT_UPDATE) {
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       /* Queue has been updated */
-                       DEBUG(SHOW_TRACING, "IRQ: Update flag\n");
-
-                       DEBUG(SHOW_QUEUE_INDEXES,
-                             "CB drv Qs: [%i][%i][%i][%i][%i][%i]\n",
-                             le32_to_cpu(priv->control_block->
-                                         driver_curr_frag[0]),
-                             le32_to_cpu(priv->control_block->
-                                         driver_curr_frag[1]),
-                             le32_to_cpu(priv->control_block->
-                                         driver_curr_frag[2]),
-                             le32_to_cpu(priv->control_block->
-                                         driver_curr_frag[3]),
-                             le32_to_cpu(priv->control_block->
-                                         driver_curr_frag[4]),
-                             le32_to_cpu(priv->control_block->
-                                         driver_curr_frag[5])
-                           );
-
-                       DEBUG(SHOW_QUEUE_INDEXES,
-                             "CB dev Qs: [%i][%i][%i][%i][%i][%i]\n",
-                             le32_to_cpu(priv->control_block->
-                                         device_curr_frag[0]),
-                             le32_to_cpu(priv->control_block->
-                                         device_curr_frag[1]),
-                             le32_to_cpu(priv->control_block->
-                                         device_curr_frag[2]),
-                             le32_to_cpu(priv->control_block->
-                                         device_curr_frag[3]),
-                             le32_to_cpu(priv->control_block->
-                                         device_curr_frag[4]),
-                             le32_to_cpu(priv->control_block->
-                                         device_curr_frag[5])
-                           );
-#endif
-
-                       /* cleanup the data low transmit queue */
-                       islpci_eth_cleanup_transmit(priv, priv->control_block);
-
-                       /* device is in active state, update the
-                        * powerstate flag if necessary */
-                       powerstate = ISL38XX_PSM_ACTIVE_STATE;
-
-                       /* check all three queues in priority order
-                        * call the PIMFOR receive function until the
-                        * queue is empty */
-                       if (isl38xx_in_queue(priv->control_block,
-                                               ISL38XX_CB_RX_MGMTQ) != 0) {
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                               DEBUG(SHOW_TRACING,
-                                     "Received frame in Management Queue\n");
-#endif
-                               islpci_mgt_receive(ndev);
-
-                               islpci_mgt_cleanup_transmit(ndev);
-
-                               /* Refill slots in receive queue */
-                               islpci_mgmt_rx_fill(ndev);
-
-                               /* no need to trigger the device, next
-                                   islpci_mgt_transaction does it */
-                       }
-
-                       while (isl38xx_in_queue(priv->control_block,
-                                               ISL38XX_CB_RX_DATA_LQ) != 0) {
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                               DEBUG(SHOW_TRACING,
-                                     "Received frame in Data Low Queue\n");
-#endif
-                               islpci_eth_receive(priv);
-                       }
-
-                       /* check whether the data transmit queues were full */
-                       if (priv->data_low_tx_full) {
-                               /* check whether the transmit is not full anymore */
-                               if (ISL38XX_CB_TX_QSIZE -
-                                   isl38xx_in_queue(priv->control_block,
-                                                    ISL38XX_CB_TX_DATA_LQ) >=
-                                   ISL38XX_MIN_QTHRESHOLD) {
-                                       /* nope, the driver is ready for more network frames */
-                                       netif_wake_queue(priv->ndev);
-
-                                       /* reset the full flag */
-                                       priv->data_low_tx_full = 0;
-                               }
-                       }
-               }
-
-               if (reg & ISL38XX_INT_IDENT_INIT) {
-                       /* Device has been initialized */
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       DEBUG(SHOW_TRACING,
-                             "IRQ: Init flag, device initialized\n");
-#endif
-                       wake_up(&priv->reset_done);
-               }
-
-               if (reg & ISL38XX_INT_IDENT_SLEEP) {
-                       /* Device intends to move to powersave state */
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       DEBUG(SHOW_TRACING, "IRQ: Sleep flag\n");
-#endif
-                       isl38xx_handle_sleep_request(priv->control_block,
-                                                    &powerstate,
-                                                    priv->device_base);
-               }
-
-               if (reg & ISL38XX_INT_IDENT_WAKEUP) {
-                       /* Device has been woken up to active state */
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       DEBUG(SHOW_TRACING, "IRQ: Wakeup flag\n");
-#endif
-
-                       isl38xx_handle_wakeup(priv->control_block,
-                                             &powerstate, priv->device_base);
-               }
-       } else {
-#if VERBOSE > SHOW_ERROR_MESSAGES
-               DEBUG(SHOW_TRACING, "Assuming someone else called the IRQ\n");
-#endif
-               spin_unlock(&priv->slock);
-               return IRQ_NONE;
-       }
-
-       /* sleep -> ready */
-       if (islpci_get_state(priv) == PRV_STATE_SLEEP
-           && powerstate == ISL38XX_PSM_ACTIVE_STATE)
-               islpci_set_state(priv, PRV_STATE_READY);
-
-       /* !sleep -> sleep */
-       if (islpci_get_state(priv) != PRV_STATE_SLEEP
-           && powerstate == ISL38XX_PSM_POWERSAVE_STATE)
-               islpci_set_state(priv, PRV_STATE_SLEEP);
-
-       /* unlock the interrupt handler */
-       spin_unlock(&priv->slock);
-
-       return IRQ_HANDLED;
-}
-
-/******************************************************************************
-    Network Interface Control & Statistical functions
-******************************************************************************/
-static int
-islpci_open(struct net_device *ndev)
-{
-       u32 rc;
-       islpci_private *priv = netdev_priv(ndev);
-
-       /* reset data structures, upload firmware and reset device */
-       rc = islpci_reset(priv,1);
-       if (rc) {
-               prism54_bring_down(priv);
-               return rc; /* Returns informative message */
-       }
-
-       netif_start_queue(ndev);
-
-       /* Turn off carrier if in STA or Ad-hoc mode. It will be turned on
-        * once the firmware receives a trap of being associated
-        * (GEN_OID_LINKSTATE). In other modes (AP or WDS or monitor) we
-        * should just leave the carrier on as its expected the firmware
-        * won't send us a trigger. */
-       if (priv->iw_mode == IW_MODE_INFRA || priv->iw_mode == IW_MODE_ADHOC)
-               netif_carrier_off(ndev);
-       else
-               netif_carrier_on(ndev);
-
-       return 0;
-}
-
-static int
-islpci_close(struct net_device *ndev)
-{
-       islpci_private *priv = netdev_priv(ndev);
-
-       printk(KERN_DEBUG "%s: islpci_close ()\n", ndev->name);
-
-       netif_stop_queue(ndev);
-
-       return prism54_bring_down(priv);
-}
-
-static int
-prism54_bring_down(islpci_private *priv)
-{
-       void __iomem *device_base = priv->device_base;
-       u32 reg;
-       /* we are going to shutdown the device */
-       islpci_set_state(priv, PRV_STATE_PREBOOT);
-
-       /* disable all device interrupts in case they weren't */
-       isl38xx_disable_interrupts(priv->device_base);
-
-       /* For safety reasons, we may want to ensure that no DMA transfer is
-        * currently in progress by emptying the TX and RX queues. */
-
-       /* wait until interrupts have finished executing on other CPUs */
-       synchronize_irq(priv->pdev->irq);
-
-       reg = readl(device_base + ISL38XX_CTRL_STAT_REG);
-       reg &= ~(ISL38XX_CTRL_STAT_RESET | ISL38XX_CTRL_STAT_RAMBOOT);
-       writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
-       wmb();
-       udelay(ISL38XX_WRITEIO_DELAY);
-
-       reg |= ISL38XX_CTRL_STAT_RESET;
-       writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
-       wmb();
-       udelay(ISL38XX_WRITEIO_DELAY);
-
-       /* clear the Reset bit */
-       reg &= ~ISL38XX_CTRL_STAT_RESET;
-       writel(reg, device_base + ISL38XX_CTRL_STAT_REG);
-       wmb();
-
-       /* wait a while for the device to reset */
-       schedule_timeout_uninterruptible(msecs_to_jiffies(50));
-
-       return 0;
-}
-
-static int
-islpci_upload_fw(islpci_private *priv)
-{
-       islpci_state_t old_state;
-       u32 rc;
-
-       old_state = islpci_set_state(priv, PRV_STATE_BOOT);
-
-       printk(KERN_DEBUG "%s: uploading firmware...\n", priv->ndev->name);
-
-       rc = isl_upload_firmware(priv);
-       if (rc) {
-               /* error uploading the firmware */
-               printk(KERN_ERR "%s: could not upload firmware ('%s')\n",
-                      priv->ndev->name, priv->firmware);
-
-               islpci_set_state(priv, old_state);
-               return rc;
-       }
-
-       printk(KERN_DEBUG "%s: firmware upload complete\n",
-              priv->ndev->name);
-
-       islpci_set_state(priv, PRV_STATE_POSTBOOT);
-
-       return 0;
-}
-
-static int
-islpci_reset_if(islpci_private *priv)
-{
-       long remaining;
-       int result = -ETIME;
-       int count;
-
-       DEFINE_WAIT(wait);
-       prepare_to_wait(&priv->reset_done, &wait, TASK_UNINTERRUPTIBLE);
-
-       /* now the last step is to reset the interface */
-       isl38xx_interface_reset(priv->device_base, priv->device_host_address);
-       islpci_set_state(priv, PRV_STATE_PREINIT);
-
-        for(count = 0; count < 2 && result; count++) {
-               /* The software reset acknowledge needs about 220 msec here.
-                * Be conservative and wait for up to one second. */
-
-               remaining = schedule_timeout_uninterruptible(HZ);
-
-               if(remaining > 0) {
-                       result = 0;
-                       break;
-               }
-
-               /* If we're here it's because our IRQ hasn't yet gone through.
-                * Retry a bit more...
-                */
-               printk(KERN_ERR "%s: no 'reset complete' IRQ seen - retrying\n",
-                       priv->ndev->name);
-       }
-
-       finish_wait(&priv->reset_done, &wait);
-
-       if (result) {
-               printk(KERN_ERR "%s: interface reset failure\n", priv->ndev->name);
-               return result;
-       }
-
-       islpci_set_state(priv, PRV_STATE_INIT);
-
-       /* Now that the device is 100% up, let's allow
-        * for the other interrupts --
-        * NOTE: this is not *yet* true since we've only allowed the
-        * INIT interrupt on the IRQ line. We can perhaps poll
-        * the IRQ line until we know for sure the reset went through */
-       isl38xx_enable_common_interrupts(priv->device_base);
-
-       down_write(&priv->mib_sem);
-       result = mgt_commit(priv);
-       if (result) {
-               printk(KERN_ERR "%s: interface reset failure\n", priv->ndev->name);
-               up_write(&priv->mib_sem);
-               return result;
-       }
-       up_write(&priv->mib_sem);
-
-       islpci_set_state(priv, PRV_STATE_READY);
-
-       printk(KERN_DEBUG "%s: interface reset complete\n", priv->ndev->name);
-       return 0;
-}
-
-int
-islpci_reset(islpci_private *priv, int reload_firmware)
-{
-       isl38xx_control_block *cb =    /* volatile not needed */
-               (isl38xx_control_block *) priv->control_block;
-       unsigned counter;
-       int rc;
-
-       if (reload_firmware)
-               islpci_set_state(priv, PRV_STATE_PREBOOT);
-       else
-               islpci_set_state(priv, PRV_STATE_POSTBOOT);
-
-       printk(KERN_DEBUG "%s: resetting device...\n", priv->ndev->name);
-
-       /* disable all device interrupts in case they weren't */
-       isl38xx_disable_interrupts(priv->device_base);
-
-       /* flush all management queues */
-       priv->index_mgmt_tx = 0;
-       priv->index_mgmt_rx = 0;
-
-       /* clear the indexes in the frame pointer */
-       for (counter = 0; counter < ISL38XX_CB_QCOUNT; counter++) {
-               cb->driver_curr_frag[counter] = cpu_to_le32(0);
-               cb->device_curr_frag[counter] = cpu_to_le32(0);
-       }
-
-       /* reset the mgmt receive queue */
-       for (counter = 0; counter < ISL38XX_CB_MGMT_QSIZE; counter++) {
-               isl38xx_fragment *frag = &cb->rx_data_mgmt[counter];
-               frag->size = cpu_to_le16(MGMT_FRAME_SIZE);
-               frag->flags = 0;
-               frag->address = cpu_to_le32(priv->mgmt_rx[counter].pci_addr);
-       }
-
-       for (counter = 0; counter < ISL38XX_CB_RX_QSIZE; counter++) {
-               cb->rx_data_low[counter].address =
-                   cpu_to_le32((u32) priv->pci_map_rx_address[counter]);
-       }
-
-       /* since the receive queues are filled with empty fragments, now we can
-        * set the corresponding indexes in the Control Block */
-       priv->control_block->driver_curr_frag[ISL38XX_CB_RX_DATA_LQ] =
-           cpu_to_le32(ISL38XX_CB_RX_QSIZE);
-       priv->control_block->driver_curr_frag[ISL38XX_CB_RX_MGMTQ] =
-           cpu_to_le32(ISL38XX_CB_MGMT_QSIZE);
-
-       /* reset the remaining real index registers and full flags */
-       priv->free_data_rx = 0;
-       priv->free_data_tx = 0;
-       priv->data_low_tx_full = 0;
-
-       if (reload_firmware) { /* Should we load the firmware ? */
-       /* now that the data structures are cleaned up, upload
-        * firmware and reset interface */
-               rc = islpci_upload_fw(priv);
-               if (rc) {
-                       printk(KERN_ERR "%s: islpci_reset: failure\n",
-                               priv->ndev->name);
-                       return rc;
-               }
-       }
-
-       /* finally reset interface */
-       rc = islpci_reset_if(priv);
-       if (rc)
-               printk(KERN_ERR "prism54: Your card/socket may be faulty, or IRQ line too busy :(\n");
-       return rc;
-}
-
-/******************************************************************************
-    Network device configuration functions
-******************************************************************************/
-static int
-islpci_alloc_memory(islpci_private *priv)
-{
-       int counter;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       printk(KERN_DEBUG "islpci_alloc_memory\n");
-#endif
-
-       /* remap the PCI device base address to accessible */
-       if (!(priv->device_base =
-             ioremap(pci_resource_start(priv->pdev, 0),
-                     ISL38XX_PCI_MEM_SIZE))) {
-               /* error in remapping the PCI device memory address range */
-               printk(KERN_ERR "PCI memory remapping failed\n");
-               return -1;
-       }
-
-       /* memory layout for consistent DMA region:
-        *
-        * Area 1: Control Block for the device interface
-        * Area 2: Power Save Mode Buffer for temporary frame storage. Be aware that
-        *         the number of supported stations in the AP determines the minimal
-        *         size of the buffer !
-        */
-
-       /* perform the allocation */
-       priv->driver_mem_address = dma_alloc_coherent(&priv->pdev->dev,
-                                                     HOST_MEM_BLOCK,
-                                                     &priv->device_host_address,
-                                                     GFP_KERNEL);
-
-       if (!priv->driver_mem_address) {
-               /* error allocating the block of PCI memory */
-               printk(KERN_ERR "%s: could not allocate DMA memory, aborting!",
-                      "prism54");
-               return -1;
-       }
-
-       /* assign the Control Block to the first address of the allocated area */
-       priv->control_block =
-           (isl38xx_control_block *) priv->driver_mem_address;
-
-       /* set the Power Save Buffer pointer directly behind the CB */
-       priv->device_psm_buffer =
-               priv->device_host_address + CONTROL_BLOCK_SIZE;
-
-       /* make sure all buffer pointers are initialized */
-       for (counter = 0; counter < ISL38XX_CB_QCOUNT; counter++) {
-               priv->control_block->driver_curr_frag[counter] = cpu_to_le32(0);
-               priv->control_block->device_curr_frag[counter] = cpu_to_le32(0);
-       }
-
-       priv->index_mgmt_rx = 0;
-       memset(priv->mgmt_rx, 0, sizeof(priv->mgmt_rx));
-       memset(priv->mgmt_tx, 0, sizeof(priv->mgmt_tx));
-
-       /* allocate rx queue for management frames */
-       if (islpci_mgmt_rx_fill(priv->ndev) < 0)
-               goto out_free;
-
-       /* now get the data rx skb's */
-       memset(priv->data_low_rx, 0, sizeof (priv->data_low_rx));
-       memset(priv->pci_map_rx_address, 0, sizeof (priv->pci_map_rx_address));
-
-       for (counter = 0; counter < ISL38XX_CB_RX_QSIZE; counter++) {
-               struct sk_buff *skb;
-
-               /* allocate an sk_buff for received data frames storage
-                * each frame on receive size consists of 1 fragment
-                * include any required allignment operations */
-               if (!(skb = dev_alloc_skb(MAX_FRAGMENT_SIZE_RX + 2))) {
-                       /* error allocating an sk_buff structure elements */
-                       printk(KERN_ERR "Error allocating skb.\n");
-                       skb = NULL;
-                       goto out_free;
-               }
-               skb_reserve(skb, (4 - (long) skb->data) & 0x03);
-               /* add the new allocated sk_buff to the buffer array */
-               priv->data_low_rx[counter] = skb;
-
-               /* map the allocated skb data area to pci */
-               priv->pci_map_rx_address[counter] =
-                   dma_map_single(&priv->pdev->dev, (void *)skb->data,
-                                  MAX_FRAGMENT_SIZE_RX + 2, DMA_FROM_DEVICE);
-               if (dma_mapping_error(&priv->pdev->dev, priv->pci_map_rx_address[counter])) {
-                       priv->pci_map_rx_address[counter] = 0;
-                       /* error mapping the buffer to device
-                          accessible memory address */
-                       printk(KERN_ERR "failed to map skb DMA'able\n");
-                       goto out_free;
-               }
-       }
-
-       prism54_acl_init(&priv->acl);
-       prism54_wpa_bss_ie_init(priv);
-       if (mgt_init(priv))
-               goto out_free;
-
-       return 0;
- out_free:
-       islpci_free_memory(priv);
-       return -1;
-}
-
-int
-islpci_free_memory(islpci_private *priv)
-{
-       int counter;
-
-       if (priv->device_base)
-               iounmap(priv->device_base);
-       priv->device_base = NULL;
-
-       /* free consistent DMA area... */
-       if (priv->driver_mem_address)
-               dma_free_coherent(&priv->pdev->dev, HOST_MEM_BLOCK,
-                                 priv->driver_mem_address,
-                                 priv->device_host_address);
-
-       /* clear some dangling pointers */
-       priv->driver_mem_address = NULL;
-       priv->device_host_address = 0;
-       priv->device_psm_buffer = 0;
-       priv->control_block = NULL;
-
-        /* clean up mgmt rx buffers */
-        for (counter = 0; counter < ISL38XX_CB_MGMT_QSIZE; counter++) {
-               struct islpci_membuf *buf = &priv->mgmt_rx[counter];
-               if (buf->pci_addr)
-                       dma_unmap_single(&priv->pdev->dev, buf->pci_addr,
-                                        buf->size, DMA_FROM_DEVICE);
-               buf->pci_addr = 0;
-               kfree(buf->mem);
-               buf->size = 0;
-               buf->mem = NULL;
-        }
-
-       /* clean up data rx buffers */
-       for (counter = 0; counter < ISL38XX_CB_RX_QSIZE; counter++) {
-               if (priv->pci_map_rx_address[counter])
-                       dma_unmap_single(&priv->pdev->dev,
-                                        priv->pci_map_rx_address[counter],
-                                        MAX_FRAGMENT_SIZE_RX + 2,
-                                        DMA_FROM_DEVICE);
-               priv->pci_map_rx_address[counter] = 0;
-
-               if (priv->data_low_rx[counter])
-                       dev_kfree_skb(priv->data_low_rx[counter]);
-               priv->data_low_rx[counter] = NULL;
-       }
-
-       /* Free the access control list and the WPA list */
-       prism54_acl_clean(&priv->acl);
-       prism54_wpa_bss_ie_clean(priv);
-       mgt_clean(priv);
-
-       return 0;
-}
-
-#if 0
-static void
-islpci_set_multicast_list(struct net_device *dev)
-{
-       /* put device into promisc mode and let network layer handle it */
-}
-#endif
-
-static void islpci_ethtool_get_drvinfo(struct net_device *dev,
-                                       struct ethtool_drvinfo *info)
-{
-       strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
-       strlcpy(info->version, DRV_VERSION, sizeof(info->version));
-}
-
-static const struct ethtool_ops islpci_ethtool_ops = {
-       .get_drvinfo = islpci_ethtool_get_drvinfo,
-};
-
-static const struct net_device_ops islpci_netdev_ops = {
-       .ndo_open               = islpci_open,
-       .ndo_stop               = islpci_close,
-       .ndo_start_xmit         = islpci_eth_transmit,
-       .ndo_tx_timeout         = islpci_eth_tx_timeout,
-       .ndo_set_mac_address    = prism54_set_mac_address,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-static struct device_type wlan_type = {
-       .name   = "wlan",
-};
-
-struct net_device *
-islpci_setup(struct pci_dev *pdev)
-{
-       islpci_private *priv;
-       struct net_device *ndev = alloc_etherdev(sizeof (islpci_private));
-
-       if (!ndev)
-               return ndev;
-
-       pci_set_drvdata(pdev, ndev);
-       SET_NETDEV_DEV(ndev, &pdev->dev);
-       SET_NETDEV_DEVTYPE(ndev, &wlan_type);
-
-       /* setup the structure members */
-       ndev->base_addr = pci_resource_start(pdev, 0);
-       ndev->irq = pdev->irq;
-
-       /* initialize the function pointers */
-       ndev->netdev_ops = &islpci_netdev_ops;
-       ndev->wireless_handlers = &prism54_handler_def;
-       ndev->ethtool_ops = &islpci_ethtool_ops;
-
-       /* ndev->set_multicast_list = &islpci_set_multicast_list; */
-       ndev->addr_len = ETH_ALEN;
-       /* Get a non-zero dummy MAC address for nameif. Jean II */
-       memcpy(ndev->dev_addr, dummy_mac, ETH_ALEN);
-
-       ndev->watchdog_timeo = ISLPCI_TX_TIMEOUT;
-
-       /* allocate a private device structure to the network device  */
-       priv = netdev_priv(ndev);
-       priv->ndev = ndev;
-       priv->pdev = pdev;
-       priv->monitor_type = ARPHRD_IEEE80211;
-       priv->ndev->type = (priv->iw_mode == IW_MODE_MONITOR) ?
-               priv->monitor_type : ARPHRD_ETHER;
-
-       /* Add pointers to enable iwspy support. */
-       priv->wireless_data.spy_data = &priv->spy_data;
-       ndev->wireless_data = &priv->wireless_data;
-
-       /* save the start and end address of the PCI memory area */
-       ndev->mem_start = (unsigned long) priv->device_base;
-       ndev->mem_end = ndev->mem_start + ISL38XX_PCI_MEM_SIZE;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_TRACING, "PCI Memory remapped to 0x%p\n", priv->device_base);
-#endif
-
-       init_waitqueue_head(&priv->reset_done);
-
-       /* init the queue read locks, process wait counter */
-       mutex_init(&priv->mgmt_lock);
-       priv->mgmt_received = NULL;
-       init_waitqueue_head(&priv->mgmt_wqueue);
-       mutex_init(&priv->stats_lock);
-       spin_lock_init(&priv->slock);
-
-       /* init state machine with off#1 state */
-       priv->state = PRV_STATE_OFF;
-       priv->state_off = 1;
-
-       /* initialize workqueue's */
-       INIT_WORK(&priv->stats_work, prism54_update_stats);
-       priv->stats_timestamp = 0;
-
-       INIT_WORK(&priv->reset_task, islpci_do_reset_and_wake);
-       priv->reset_task_pending = 0;
-
-       /* allocate various memory areas */
-       if (islpci_alloc_memory(priv))
-               goto do_free_netdev;
-
-       /* select the firmware file depending on the device id */
-       switch (pdev->device) {
-       case 0x3877:
-               strcpy(priv->firmware, ISL3877_IMAGE_FILE);
-               break;
-
-       case 0x3886:
-               strcpy(priv->firmware, ISL3886_IMAGE_FILE);
-               break;
-
-       default:
-               strcpy(priv->firmware, ISL3890_IMAGE_FILE);
-               break;
-       }
-
-       if (register_netdev(ndev)) {
-               DEBUG(SHOW_ERROR_MESSAGES,
-                     "ERROR: register_netdev() failed\n");
-               goto do_islpci_free_memory;
-       }
-
-       return ndev;
-
-      do_islpci_free_memory:
-       islpci_free_memory(priv);
-      do_free_netdev:
-       free_netdev(ndev);
-       priv = NULL;
-       return NULL;
-}
-
-islpci_state_t
-islpci_set_state(islpci_private *priv, islpci_state_t new_state)
-{
-       islpci_state_t old_state;
-
-       /* lock */
-       old_state = priv->state;
-
-       /* this means either a race condition or some serious error in
-        * the driver code */
-       switch (new_state) {
-       case PRV_STATE_OFF:
-               priv->state_off++;
-               fallthrough;
-       default:
-               priv->state = new_state;
-               break;
-
-       case PRV_STATE_PREBOOT:
-               /* there are actually many off-states, enumerated by
-                * state_off */
-               if (old_state == PRV_STATE_OFF)
-                       priv->state_off--;
-
-               /* only if hw_unavailable is zero now it means we either
-                * were in off#1 state, or came here from
-                * somewhere else */
-               if (!priv->state_off)
-                       priv->state = new_state;
-               break;
-       }
-#if 0
-       printk(KERN_DEBUG "%s: state transition %d -> %d (off#%d)\n",
-              priv->ndev->name, old_state, new_state, priv->state_off);
-#endif
-
-       /* invariants */
-       BUG_ON(priv->state_off < 0);
-       BUG_ON(priv->state_off && (priv->state != PRV_STATE_OFF));
-       BUG_ON(!priv->state_off && (priv->state == PRV_STATE_OFF));
-
-       /* unlock */
-       return old_state;
-}
diff --git a/drivers/net/wireless/intersil/prism54/islpci_dev.h b/drivers/net/wireless/intersil/prism54/islpci_dev.h
deleted file mode 100644 (file)
index 4753418..0000000
+++ /dev/null
@@ -1,204 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2002 Intersil Americas Inc.
- *  Copyright (C) 2003 Herbert Valerio Riedel <hvr@gnu.org>
- *  Copyright (C) 2003 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- *  Copyright (C) 2003 Aurelien Alleaume <slts@free.fr>
- */
-
-#ifndef _ISLPCI_DEV_H
-#define _ISLPCI_DEV_H
-
-#include <linux/irqreturn.h>
-#include <linux/netdevice.h>
-#include <linux/wireless.h>
-#include <net/iw_handler.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-
-#include "isl_38xx.h"
-#include "isl_oid.h"
-#include "islpci_mgt.h"
-
-/* some states might not be superflous and may be removed when
-   design is finalized (hvr) */
-typedef enum {
-       PRV_STATE_OFF = 0,      /* this means hw_unavailable is != 0 */
-       PRV_STATE_PREBOOT,      /* we are in a pre-boot state (empty RAM) */
-       PRV_STATE_BOOT,         /* boot state (fw upload, run fw) */
-       PRV_STATE_POSTBOOT,     /* after boot state, need reset now */
-       PRV_STATE_PREINIT,      /* pre-init state */
-       PRV_STATE_INIT,         /* init state (restore MIB backup to device) */
-       PRV_STATE_READY,        /* driver&device are in operational state */
-       PRV_STATE_SLEEP         /* device in sleep mode */
-} islpci_state_t;
-
-/* ACL using MAC address */
-struct mac_entry {
-   struct list_head _list;
-   char addr[ETH_ALEN];
-};
-
-struct islpci_acl {
-   enum { MAC_POLICY_OPEN=0, MAC_POLICY_ACCEPT=1, MAC_POLICY_REJECT=2 } policy;
-   struct list_head mac_list;  /* a list of mac_entry */
-   int size;   /* size of queue */
-   struct mutex lock;   /* accessed in ioctls and trap_work */
-};
-
-struct islpci_membuf {
-       int size;                   /* size of memory */
-       void *mem;                  /* address of memory as seen by CPU */
-       dma_addr_t pci_addr;        /* address of memory as seen by device */
-};
-
-#define MAX_BSS_WPA_IE_COUNT 64
-#define MAX_WPA_IE_LEN 64
-struct islpci_bss_wpa_ie {
-       struct list_head list;
-       unsigned long last_update;
-       u8 bssid[ETH_ALEN];
-       u8 wpa_ie[MAX_WPA_IE_LEN];
-       size_t wpa_ie_len;
-
-};
-
-typedef struct {
-       spinlock_t slock;       /* generic spinlock; */
-
-       u32 priv_oid;
-
-       /* our mib cache */
-       u32 iw_mode;
-        struct rw_semaphore mib_sem;
-       void **mib;
-       char nickname[IW_ESSID_MAX_SIZE+1];
-
-       /* Take care of the wireless stats */
-       struct work_struct stats_work;
-       struct mutex stats_lock;
-       /* remember when we last updated the stats */
-       unsigned long stats_timestamp;
-       /* The first is accessed under semaphore locking.
-        * The second is the clean one we return to iwconfig.
-        */
-       struct iw_statistics local_iwstatistics;
-       struct iw_statistics iwstatistics;
-
-       struct iw_spy_data spy_data; /* iwspy support */
-
-       struct iw_public_data wireless_data;
-
-       int monitor_type; /* ARPHRD_IEEE80211 or ARPHRD_IEEE80211_PRISM */
-
-       struct islpci_acl acl;
-
-       /* PCI bus allocation & configuration members */
-       struct pci_dev *pdev;   /* PCI structure information */
-       char firmware[33];
-
-       void __iomem *device_base;      /* ioremapped device base address */
-
-       /* consistent DMA region */
-       void *driver_mem_address;       /* base DMA address */
-       dma_addr_t device_host_address; /* base DMA address (bus address) */
-       dma_addr_t device_psm_buffer;   /* host memory for PSM buffering (bus address) */
-
-       /* our network_device structure  */
-       struct net_device *ndev;
-
-       /* device queue interface members */
-       struct isl38xx_cb *control_block;       /* device control block
-                                                          (== driver_mem_address!) */
-
-       /* Each queue has three indexes:
-        *   free/index_mgmt/data_rx/tx (called index, see below),
-        *   driver_curr_frag, and device_curr_frag (in the control block)
-        * All indexes are ever-increasing, but interpreted modulo the
-        * device queue size when used.
-        *   index <= device_curr_frag <= driver_curr_frag  at all times
-        * For rx queues, [index, device_curr_frag) contains fragments
-        * that the interrupt processing needs to handle (owned by driver).
-        * [device_curr_frag, driver_curr_frag) is the free space in the
-        * rx queue, waiting for data (owned by device).  The driver
-        * increments driver_curr_frag to indicate to the device that more
-        * buffers are available.
-        * If device_curr_frag == driver_curr_frag, no more rx buffers are
-        * available, and the rx DMA engine of the device is halted.
-        * For tx queues, [index, device_curr_frag) contains fragments
-        * where tx is done; they need to be freed (owned by driver).
-        * [device_curr_frag, driver_curr_frag) contains the frames
-        * that are being transferred (owned by device).  The driver
-        * increments driver_curr_frag to indicate that more tx work
-        * needs to be done.
-        */
-       u32 index_mgmt_rx;              /* real index mgmt rx queue */
-       u32 index_mgmt_tx;              /* read index mgmt tx queue */
-       u32 free_data_rx;       /* free pointer data rx queue */
-       u32 free_data_tx;       /* free pointer data tx queue */
-       u32 data_low_tx_full;   /* full detected flag */
-
-       /* frame memory buffers for the device queues */
-       struct islpci_membuf mgmt_tx[ISL38XX_CB_MGMT_QSIZE];
-       struct islpci_membuf mgmt_rx[ISL38XX_CB_MGMT_QSIZE];
-       struct sk_buff *data_low_tx[ISL38XX_CB_TX_QSIZE];
-       struct sk_buff *data_low_rx[ISL38XX_CB_RX_QSIZE];
-       dma_addr_t pci_map_tx_address[ISL38XX_CB_TX_QSIZE];
-       dma_addr_t pci_map_rx_address[ISL38XX_CB_RX_QSIZE];
-
-       /* wait for a reset interrupt */
-       wait_queue_head_t reset_done;
-
-       /* used by islpci_mgt_transaction */
-       struct mutex mgmt_lock; /* serialize access to mailbox and wqueue */
-       struct islpci_mgmtframe *mgmt_received;   /* mbox for incoming frame */
-       wait_queue_head_t mgmt_wqueue;            /* waitqueue for mbox */
-
-       /* state machine */
-       islpci_state_t state;
-       int state_off;          /* enumeration of off-state, if 0 then
-                                * we're not in any off-state */
-
-       /* WPA stuff */
-       int wpa; /* WPA mode enabled */
-       struct list_head bss_wpa_list;
-       int num_bss_wpa;
-       struct mutex wpa_lock;
-       u8 wpa_ie[MAX_WPA_IE_LEN];
-       size_t wpa_ie_len;
-
-       struct work_struct reset_task;
-       int reset_task_pending;
-} islpci_private;
-
-static inline islpci_state_t
-islpci_get_state(islpci_private *priv)
-{
-       /* lock */
-       return priv->state;
-       /* unlock */
-}
-
-islpci_state_t islpci_set_state(islpci_private *priv, islpci_state_t new_state);
-
-#define ISLPCI_TX_TIMEOUT               (2*HZ)
-
-irqreturn_t islpci_interrupt(int, void *);
-
-int prism54_post_setup(islpci_private *, int);
-int islpci_reset(islpci_private *, int);
-
-static inline void
-islpci_trigger(islpci_private *priv)
-{
-       isl38xx_trigger_device(islpci_get_state(priv) == PRV_STATE_SLEEP,
-                              priv->device_base);
-}
-
-int islpci_free_memory(islpci_private *);
-struct net_device *islpci_setup(struct pci_dev *);
-
-#define DRV_NAME       "prism54"
-#define DRV_VERSION    "1.2"
-
-#endif                         /* _ISLPCI_DEV_H */
diff --git a/drivers/net/wireless/intersil/prism54/islpci_eth.c b/drivers/net/wireless/intersil/prism54/islpci_eth.c
deleted file mode 100644 (file)
index 74dd657..0000000
+++ /dev/null
@@ -1,489 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 2002 Intersil Americas Inc.
- *  Copyright (C) 2004 Aurelien Alleaume <slts@free.fr>
- */
-
-#include <linux/module.h>
-#include <linux/gfp.h>
-
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_arp.h>
-#include <asm/byteorder.h>
-
-#include "prismcompat.h"
-#include "isl_38xx.h"
-#include "islpci_eth.h"
-#include "islpci_mgt.h"
-#include "oid_mgt.h"
-
-/******************************************************************************
-    Network Interface functions
-******************************************************************************/
-void
-islpci_eth_cleanup_transmit(islpci_private *priv,
-                           isl38xx_control_block *control_block)
-{
-       struct sk_buff *skb;
-       u32 index;
-
-       /* compare the control block read pointer with the free pointer */
-       while (priv->free_data_tx !=
-              le32_to_cpu(control_block->
-                          device_curr_frag[ISL38XX_CB_TX_DATA_LQ])) {
-               /* read the index of the first fragment to be freed */
-               index = priv->free_data_tx % ISL38XX_CB_TX_QSIZE;
-
-               /* check for holes in the arrays caused by multi fragment frames
-                * searching for the last fragment of a frame */
-               if (priv->pci_map_tx_address[index]) {
-                       /* entry is the last fragment of a frame
-                        * free the skb structure and unmap pci memory */
-                       skb = priv->data_low_tx[index];
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       DEBUG(SHOW_TRACING,
-                             "cleanup skb %p skb->data %p skb->len %u truesize %u\n",
-                             skb, skb->data, skb->len, skb->truesize);
-#endif
-
-                       dma_unmap_single(&priv->pdev->dev,
-                                        priv->pci_map_tx_address[index],
-                                        skb->len, DMA_TO_DEVICE);
-                       dev_kfree_skb_irq(skb);
-                       skb = NULL;
-               }
-               /* increment the free data low queue pointer */
-               priv->free_data_tx++;
-       }
-}
-
-netdev_tx_t
-islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       isl38xx_control_block *cb = priv->control_block;
-       u32 index;
-       dma_addr_t pci_map_address;
-       int frame_size;
-       isl38xx_fragment *fragment;
-       int offset;
-       struct sk_buff *newskb;
-       int newskb_offset;
-       unsigned long flags;
-       unsigned char wds_mac[6];
-       u32 curr_frag;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_FUNCTION_CALLS, "islpci_eth_transmit\n");
-#endif
-
-       /* lock the driver code */
-       spin_lock_irqsave(&priv->slock, flags);
-
-       /* check whether the destination queue has enough fragments for the frame */
-       curr_frag = le32_to_cpu(cb->driver_curr_frag[ISL38XX_CB_TX_DATA_LQ]);
-       if (unlikely(curr_frag - priv->free_data_tx >= ISL38XX_CB_TX_QSIZE)) {
-               printk(KERN_ERR "%s: transmit device queue full when awake\n",
-                      ndev->name);
-               netif_stop_queue(ndev);
-
-               /* trigger the device */
-               isl38xx_w32_flush(priv->device_base, ISL38XX_DEV_INT_UPDATE,
-                                 ISL38XX_DEV_INT_REG);
-               udelay(ISL38XX_WRITEIO_DELAY);
-               goto drop_free;
-       }
-       /* Check alignment and WDS frame formatting. The start of the packet should
-        * be aligned on a 4-byte boundary. If WDS is enabled add another 6 bytes
-        * and add WDS address information */
-       if (likely(((long) skb->data & 0x03) | init_wds)) {
-               /* get the number of bytes to add and re-align */
-               offset = (4 - (long) skb->data) & 0x03;
-               offset += init_wds ? 6 : 0;
-
-               /* check whether the current skb can be used  */
-               if (!skb_cloned(skb) && (skb_tailroom(skb) >= offset)) {
-                       unsigned char *src = skb->data;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       DEBUG(SHOW_TRACING, "skb offset %i wds %i\n", offset,
-                             init_wds);
-#endif
-
-                       /* align the buffer on 4-byte boundary */
-                       skb_reserve(skb, (4 - (long) skb->data) & 0x03);
-                       if (init_wds) {
-                               /* wds requires an additional address field of 6 bytes */
-                               skb_put(skb, 6);
-#ifdef ISLPCI_ETH_DEBUG
-                               printk("islpci_eth_transmit:wds_mac\n");
-#endif
-                               memmove(skb->data + 6, src, skb->len);
-                               skb_copy_to_linear_data(skb, wds_mac, 6);
-                       } else {
-                               memmove(skb->data, src, skb->len);
-                       }
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       DEBUG(SHOW_TRACING, "memmove %p %p %i\n", skb->data,
-                             src, skb->len);
-#endif
-               } else {
-                       newskb =
-                           dev_alloc_skb(init_wds ? skb->len + 6 : skb->len);
-                       if (unlikely(newskb == NULL)) {
-                               printk(KERN_ERR "%s: Cannot allocate skb\n",
-                                      ndev->name);
-                               goto drop_free;
-                       }
-                       newskb_offset = (4 - (long) newskb->data) & 0x03;
-
-                       /* Check if newskb->data is aligned */
-                       if (newskb_offset)
-                               skb_reserve(newskb, newskb_offset);
-
-                       skb_put(newskb, init_wds ? skb->len + 6 : skb->len);
-                       if (init_wds) {
-                               skb_copy_from_linear_data(skb,
-                                                         newskb->data + 6,
-                                                         skb->len);
-                               skb_copy_to_linear_data(newskb, wds_mac, 6);
-#ifdef ISLPCI_ETH_DEBUG
-                               printk("islpci_eth_transmit:wds_mac\n");
-#endif
-                       } else
-                               skb_copy_from_linear_data(skb, newskb->data,
-                                                         skb->len);
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       DEBUG(SHOW_TRACING, "memcpy %p %p %i wds %i\n",
-                             newskb->data, skb->data, skb->len, init_wds);
-#endif
-
-                       newskb->dev = skb->dev;
-                       dev_kfree_skb_irq(skb);
-                       skb = newskb;
-               }
-       }
-       /* display the buffer contents for debugging */
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_BUFFER_CONTENTS, "\ntx %p ", skb->data);
-       display_buffer((char *) skb->data, skb->len);
-#endif
-
-       /* map the skb buffer to pci memory for DMA operation */
-       pci_map_address = dma_map_single(&priv->pdev->dev, (void *)skb->data,
-                                        skb->len, DMA_TO_DEVICE);
-       if (dma_mapping_error(&priv->pdev->dev, pci_map_address)) {
-               printk(KERN_WARNING "%s: cannot map buffer to PCI\n",
-                      ndev->name);
-               goto drop_free;
-       }
-       /* Place the fragment in the control block structure. */
-       index = curr_frag % ISL38XX_CB_TX_QSIZE;
-       fragment = &cb->tx_data_low[index];
-
-       priv->pci_map_tx_address[index] = pci_map_address;
-       /* store the skb address for future freeing  */
-       priv->data_low_tx[index] = skb;
-       /* set the proper fragment start address and size information */
-       frame_size = skb->len;
-       fragment->size = cpu_to_le16(frame_size);
-       fragment->flags = cpu_to_le16(0);       /* set to 1 if more fragments */
-       fragment->address = cpu_to_le32(pci_map_address);
-       curr_frag++;
-
-       /* The fragment address in the control block must have been
-        * written before announcing the frame buffer to device. */
-       wmb();
-       cb->driver_curr_frag[ISL38XX_CB_TX_DATA_LQ] = cpu_to_le32(curr_frag);
-
-       if (curr_frag - priv->free_data_tx + ISL38XX_MIN_QTHRESHOLD
-           > ISL38XX_CB_TX_QSIZE) {
-               /* stop sends from upper layers */
-               netif_stop_queue(ndev);
-
-               /* set the full flag for the transmission queue */
-               priv->data_low_tx_full = 1;
-       }
-
-       ndev->stats.tx_packets++;
-       ndev->stats.tx_bytes += skb->len;
-
-       /* trigger the device */
-       islpci_trigger(priv);
-
-       /* unlock the driver code */
-       spin_unlock_irqrestore(&priv->slock, flags);
-
-       return NETDEV_TX_OK;
-
-      drop_free:
-       ndev->stats.tx_dropped++;
-       spin_unlock_irqrestore(&priv->slock, flags);
-       dev_kfree_skb(skb);
-       return NETDEV_TX_OK;
-}
-
-static inline int
-islpci_monitor_rx(islpci_private *priv, struct sk_buff **skb)
-{
-       /* The card reports full 802.11 packets but with a 20 bytes
-        * header and without the FCS. But there a is a bit that
-        * indicates if the packet is corrupted :-) */
-       struct rfmon_header *hdr = (struct rfmon_header *) (*skb)->data;
-
-       if (hdr->flags & 0x01)
-               /* This one is bad. Drop it ! */
-               return -1;
-       if (priv->ndev->type == ARPHRD_IEEE80211_PRISM) {
-               struct avs_80211_1_header *avs;
-               /* extract the relevant data from the header */
-               u32 clock = le32_to_cpu(hdr->clock);
-               u8 rate = hdr->rate;
-               u16 freq = le16_to_cpu(hdr->freq);
-               u8 rssi = hdr->rssi;
-
-               skb_pull(*skb, sizeof (struct rfmon_header));
-
-               if (skb_headroom(*skb) < sizeof (struct avs_80211_1_header)) {
-                       struct sk_buff *newskb = skb_copy_expand(*skb,
-                                                                sizeof (struct
-                                                                        avs_80211_1_header),
-                                                                0, GFP_ATOMIC);
-                       if (newskb) {
-                               dev_kfree_skb_irq(*skb);
-                               *skb = newskb;
-                       } else
-                               return -1;
-                       /* This behavior is not very subtile... */
-               }
-
-               /* make room for the new header and fill it. */
-               avs = skb_push(*skb, sizeof(struct avs_80211_1_header));
-
-               avs->version = cpu_to_be32(P80211CAPTURE_VERSION);
-               avs->length = cpu_to_be32(sizeof (struct avs_80211_1_header));
-               avs->mactime = cpu_to_be64(clock);
-               avs->hosttime = cpu_to_be64(jiffies);
-               avs->phytype = cpu_to_be32(6);  /*OFDM: 6 for (g), 8 for (a) */
-               avs->channel = cpu_to_be32(channel_of_freq(freq));
-               avs->datarate = cpu_to_be32(rate * 5);
-               avs->antenna = cpu_to_be32(0);  /*unknown */
-               avs->priority = cpu_to_be32(0); /*unknown */
-               avs->ssi_type = cpu_to_be32(3); /*2: dBm, 3: raw RSSI */
-               avs->ssi_signal = cpu_to_be32(rssi & 0x7f);
-               avs->ssi_noise = cpu_to_be32(priv->local_iwstatistics.qual.noise);      /*better than 'undefined', I assume */
-               avs->preamble = cpu_to_be32(0); /*unknown */
-               avs->encoding = cpu_to_be32(0); /*unknown */
-       } else
-               skb_pull(*skb, sizeof (struct rfmon_header));
-
-       (*skb)->protocol = htons(ETH_P_802_2);
-       skb_reset_mac_header(*skb);
-       (*skb)->pkt_type = PACKET_OTHERHOST;
-
-       return 0;
-}
-
-int
-islpci_eth_receive(islpci_private *priv)
-{
-       struct net_device *ndev = priv->ndev;
-       isl38xx_control_block *control_block = priv->control_block;
-       struct sk_buff *skb;
-       u16 size;
-       u32 index, offset;
-       unsigned char *src;
-       int discard = 0;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_FUNCTION_CALLS, "islpci_eth_receive\n");
-#endif
-
-       /* the device has written an Ethernet frame in the data area
-        * of the sk_buff without updating the structure, do it now */
-       index = priv->free_data_rx % ISL38XX_CB_RX_QSIZE;
-       size = le16_to_cpu(control_block->rx_data_low[index].size);
-       skb = priv->data_low_rx[index];
-       offset = ((unsigned long)
-                 le32_to_cpu(control_block->rx_data_low[index].address) -
-                 (unsigned long) skb->data) & 3;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_TRACING,
-             "frq->addr %x skb->data %p skb->len %u offset %u truesize %u\n",
-             control_block->rx_data_low[priv->free_data_rx].address, skb->data,
-             skb->len, offset, skb->truesize);
-#endif
-
-       /* delete the streaming DMA mapping before processing the skb */
-       dma_unmap_single(&priv->pdev->dev, priv->pci_map_rx_address[index],
-                        MAX_FRAGMENT_SIZE_RX + 2, DMA_FROM_DEVICE);
-
-       /* update the skb structure and align the buffer */
-       skb_put(skb, size);
-       if (offset) {
-               /* shift the buffer allocation offset bytes to get the right frame */
-               skb_pull(skb, 2);
-               skb_put(skb, 2);
-       }
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       /* display the buffer contents for debugging */
-       DEBUG(SHOW_BUFFER_CONTENTS, "\nrx %p ", skb->data);
-       display_buffer((char *) skb->data, skb->len);
-#endif
-
-       /* check whether WDS is enabled and whether the data frame is a WDS frame */
-
-       if (init_wds) {
-               /* WDS enabled, check for the wds address on the first 6 bytes of the buffer */
-               src = skb->data + 6;
-               memmove(skb->data, src, skb->len - 6);
-               skb_trim(skb, skb->len - 6);
-       }
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_TRACING, "Fragment size %i in skb at %p\n", size, skb);
-       DEBUG(SHOW_TRACING, "Skb data at %p, length %i\n", skb->data, skb->len);
-
-       /* display the buffer contents for debugging */
-       DEBUG(SHOW_BUFFER_CONTENTS, "\nrx %p ", skb->data);
-       display_buffer((char *) skb->data, skb->len);
-#endif
-       /* take care of monitor mode and spy monitoring. */
-       if (unlikely(priv->iw_mode == IW_MODE_MONITOR)) {
-               skb->dev = ndev;
-               discard = islpci_monitor_rx(priv, &skb);
-       } else {
-               if (unlikely(skb->data[2 * ETH_ALEN] == 0)) {
-                       /* The packet has a rx_annex. Read it for spy monitoring, Then
-                        * remove it, while keeping the 2 leading MAC addr.
-                        */
-                       struct iw_quality wstats;
-                       struct rx_annex_header *annex =
-                           (struct rx_annex_header *) skb->data;
-                       wstats.level = annex->rfmon.rssi;
-                       /* The noise value can be a bit outdated if nobody's
-                        * reading wireless stats... */
-                       wstats.noise = priv->local_iwstatistics.qual.noise;
-                       wstats.qual = wstats.level - wstats.noise;
-                       wstats.updated = 0x07;
-                       /* Update spy records */
-                       wireless_spy_update(ndev, annex->addr2, &wstats);
-
-                       skb_copy_from_linear_data(skb,
-                                                 (skb->data +
-                                                  sizeof(struct rfmon_header)),
-                                                 2 * ETH_ALEN);
-                       skb_pull(skb, sizeof (struct rfmon_header));
-               }
-               skb->protocol = eth_type_trans(skb, ndev);
-       }
-       skb->ip_summed = CHECKSUM_NONE;
-       ndev->stats.rx_packets++;
-       ndev->stats.rx_bytes += size;
-
-       /* deliver the skb to the network layer */
-#ifdef ISLPCI_ETH_DEBUG
-       printk
-           ("islpci_eth_receive:netif_rx %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
-            skb->data[0], skb->data[1], skb->data[2], skb->data[3],
-            skb->data[4], skb->data[5]);
-#endif
-       if (unlikely(discard)) {
-               dev_kfree_skb_irq(skb);
-               skb = NULL;
-       } else
-               netif_rx(skb);
-
-       /* increment the read index for the rx data low queue */
-       priv->free_data_rx++;
-
-       /* add one or more sk_buff structures */
-       while (index =
-              le32_to_cpu(control_block->
-                          driver_curr_frag[ISL38XX_CB_RX_DATA_LQ]),
-              index - priv->free_data_rx < ISL38XX_CB_RX_QSIZE) {
-               /* allocate an sk_buff for received data frames storage
-                * include any required allignment operations */
-               skb = dev_alloc_skb(MAX_FRAGMENT_SIZE_RX + 2);
-               if (unlikely(skb == NULL)) {
-                       /* error allocating an sk_buff structure elements */
-                       DEBUG(SHOW_ERROR_MESSAGES, "Error allocating skb\n");
-                       break;
-               }
-               skb_reserve(skb, (4 - (long) skb->data) & 0x03);
-               /* store the new skb structure pointer */
-               index = index % ISL38XX_CB_RX_QSIZE;
-               priv->data_low_rx[index] = skb;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-               DEBUG(SHOW_TRACING,
-                     "new alloc skb %p skb->data %p skb->len %u index %u truesize %u\n",
-                     skb, skb->data, skb->len, index, skb->truesize);
-#endif
-
-               /* set the streaming DMA mapping for proper PCI bus operation */
-               priv->pci_map_rx_address[index] =
-                   dma_map_single(&priv->pdev->dev, (void *)skb->data,
-                                  MAX_FRAGMENT_SIZE_RX + 2, DMA_FROM_DEVICE);
-               if (dma_mapping_error(&priv->pdev->dev, priv->pci_map_rx_address[index])) {
-                       /* error mapping the buffer to device accessible memory address */
-                       DEBUG(SHOW_ERROR_MESSAGES,
-                             "Error mapping DMA address\n");
-
-                       /* free the skbuf structure before aborting */
-                       dev_kfree_skb_irq(skb);
-                       skb = NULL;
-                       break;
-               }
-               /* update the fragment address */
-               control_block->rx_data_low[index].address =
-                       cpu_to_le32((u32)priv->pci_map_rx_address[index]);
-               wmb();
-
-               /* increment the driver read pointer */
-               le32_add_cpu(&control_block->
-                            driver_curr_frag[ISL38XX_CB_RX_DATA_LQ], 1);
-       }
-
-       /* trigger the device */
-       islpci_trigger(priv);
-
-       return 0;
-}
-
-void
-islpci_do_reset_and_wake(struct work_struct *work)
-{
-       islpci_private *priv = container_of(work, islpci_private, reset_task);
-
-       islpci_reset(priv, 1);
-       priv->reset_task_pending = 0;
-       smp_wmb();
-       netif_wake_queue(priv->ndev);
-}
-
-void
-islpci_eth_tx_timeout(struct net_device *ndev, unsigned int txqueue)
-{
-       islpci_private *priv = netdev_priv(ndev);
-
-       /* increment the transmit error counter */
-       ndev->stats.tx_errors++;
-
-       if (!priv->reset_task_pending) {
-               printk(KERN_WARNING
-                       "%s: tx_timeout, scheduling reset", ndev->name);
-               netif_stop_queue(ndev);
-               priv->reset_task_pending = 1;
-               schedule_work(&priv->reset_task);
-       } else {
-               printk(KERN_WARNING
-                       "%s: tx_timeout, waiting for reset", ndev->name);
-       }
-}
diff --git a/drivers/net/wireless/intersil/prism54/islpci_eth.h b/drivers/net/wireless/intersil/prism54/islpci_eth.h
deleted file mode 100644 (file)
index e433ccd..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2002 Intersil Americas Inc.
- */
-
-#ifndef _ISLPCI_ETH_H
-#define _ISLPCI_ETH_H
-
-#include "isl_38xx.h"
-#include "islpci_dev.h"
-
-struct rfmon_header {
-       __le16 unk0;            /* = 0x0000 */
-       __le16 length;          /* = 0x1400 */
-       __le32 clock;           /* 1MHz clock */
-       u8 flags;
-       u8 unk1;
-       u8 rate;
-       u8 unk2;
-       __le16 freq;
-       __le16 unk3;
-       u8 rssi;
-       u8 padding[3];
-} __packed;
-
-struct rx_annex_header {
-       u8 addr1[ETH_ALEN];
-       u8 addr2[ETH_ALEN];
-       struct rfmon_header rfmon;
-} __packed;
-
-/* wlan-ng (and hopefully others) AVS header, version one.  Fields in
- * network byte order. */
-#define P80211CAPTURE_VERSION 0x80211001
-
-struct avs_80211_1_header {
-       __be32 version;
-       __be32 length;
-       __be64 mactime;
-       __be64 hosttime;
-       __be32 phytype;
-       __be32 channel;
-       __be32 datarate;
-       __be32 antenna;
-       __be32 priority;
-       __be32 ssi_type;
-       __be32 ssi_signal;
-       __be32 ssi_noise;
-       __be32 preamble;
-       __be32 encoding;
-};
-
-void islpci_eth_cleanup_transmit(islpci_private *, isl38xx_control_block *);
-netdev_tx_t islpci_eth_transmit(struct sk_buff *, struct net_device *);
-int islpci_eth_receive(islpci_private *);
-void islpci_eth_tx_timeout(struct net_device *, unsigned int txqueue);
-void islpci_do_reset_and_wake(struct work_struct *);
-
-#endif                         /* _ISL_GEN_H */
diff --git a/drivers/net/wireless/intersil/prism54/islpci_hotplug.c b/drivers/net/wireless/intersil/prism54/islpci_hotplug.c
deleted file mode 100644 (file)
index 31a1e61..0000000
+++ /dev/null
@@ -1,316 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 2002 Intersil Americas Inc.
- *  Copyright (C) 2003 Herbert Valerio Riedel <hvr@gnu.org>
- */
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/init.h> /* For __init, __exit */
-#include <linux/dma-mapping.h>
-
-#include "prismcompat.h"
-#include "islpci_dev.h"
-#include "islpci_mgt.h"                /* for pc_debug */
-#include "isl_oid.h"
-
-MODULE_AUTHOR("[Intersil] R.Bastings and W.Termorshuizen, The prism54.org Development Team <prism54-devel@prism54.org>");
-MODULE_DESCRIPTION("The Prism54 802.11 Wireless LAN adapter");
-MODULE_LICENSE("GPL");
-
-static int     init_pcitm = 0;
-module_param(init_pcitm, int, 0);
-
-/* In this order: vendor, device, subvendor, subdevice, class, class_mask,
- * driver_data
- * If you have an update for this please contact prism54-devel@prism54.org
- * The latest list can be found at http://wireless.wiki.kernel.org/en/users/Drivers/p54
- */
-static const struct pci_device_id prism54_id_tbl[] = {
-       /* Intersil PRISM Duette/Prism GT Wireless LAN adapter */
-       {
-        0x1260, 0x3890,
-        PCI_ANY_ID, PCI_ANY_ID,
-        0, 0, 0
-       },
-
-       /* 3COM 3CRWE154G72 Wireless LAN adapter */
-       {
-        PCI_VDEVICE(3COM, 0x6001), 0
-       },
-
-       /* Intersil PRISM Indigo Wireless LAN adapter */
-       {
-        0x1260, 0x3877,
-        PCI_ANY_ID, PCI_ANY_ID,
-        0, 0, 0
-       },
-
-       /* Intersil PRISM Javelin/Xbow Wireless LAN adapter */
-       {
-        0x1260, 0x3886,
-        PCI_ANY_ID, PCI_ANY_ID,
-        0, 0, 0
-       },
-
-       /* End of list */
-       {0,0,0,0,0,0,0}
-};
-
-/* register the device with the Hotplug facilities of the kernel */
-MODULE_DEVICE_TABLE(pci, prism54_id_tbl);
-
-static int prism54_probe(struct pci_dev *, const struct pci_device_id *);
-static void prism54_remove(struct pci_dev *);
-static int __maybe_unused prism54_suspend(struct device *);
-static int __maybe_unused prism54_resume(struct device *);
-
-static SIMPLE_DEV_PM_OPS(prism54_pm_ops, prism54_suspend, prism54_resume);
-
-static struct pci_driver prism54_driver = {
-       .name = DRV_NAME,
-       .id_table = prism54_id_tbl,
-       .probe = prism54_probe,
-       .remove = prism54_remove,
-       .driver.pm = &prism54_pm_ops,
-};
-
-/******************************************************************************
-    Module initialization functions
-******************************************************************************/
-
-static int
-prism54_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
-       struct net_device *ndev;
-       u8 latency_tmr;
-       u32 mem_addr;
-       islpci_private *priv;
-       int rvalue;
-
-       /* Enable the pci device */
-       if (pci_enable_device(pdev)) {
-               printk(KERN_ERR "%s: pci_enable_device() failed.\n", DRV_NAME);
-               return -ENODEV;
-       }
-
-       /* check whether the latency timer is set correctly */
-       pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &latency_tmr);
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_TRACING, "latency timer: %x\n", latency_tmr);
-#endif
-       if (latency_tmr < PCIDEVICE_LATENCY_TIMER_MIN) {
-               /* set the latency timer */
-               pci_write_config_byte(pdev, PCI_LATENCY_TIMER,
-                                     PCIDEVICE_LATENCY_TIMER_VAL);
-       }
-
-       /* enable PCI DMA */
-       if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
-               printk(KERN_ERR "%s: 32-bit PCI DMA not supported", DRV_NAME);
-               goto do_pci_disable_device;
-        }
-
-       /* 0x40 is the programmable timer to configure the response timeout (TRDY_TIMEOUT)
-        * 0x41 is the programmable timer to configure the retry timeout (RETRY_TIMEOUT)
-        *      The RETRY_TIMEOUT is used to set the number of retries that the core, as a
-        *      Master, will perform before abandoning a cycle. The default value for
-        *      RETRY_TIMEOUT is 0x80, which far exceeds the PCI 2.1 requirement for new
-        *      devices. A write of zero to the RETRY_TIMEOUT register disables this
-        *      function to allow use with any non-compliant legacy devices that may
-        *      execute more retries.
-        *
-        *      Writing zero to both these two registers will disable both timeouts and
-        *      *can* solve problems caused by devices that are slow to respond.
-        *      Make this configurable - MSW
-        */
-       if ( init_pcitm >= 0 ) {
-               pci_write_config_byte(pdev, 0x40, (u8)init_pcitm);
-               pci_write_config_byte(pdev, 0x41, (u8)init_pcitm);
-       } else {
-               printk(KERN_INFO "PCI TRDY/RETRY unchanged\n");
-       }
-
-       /* request the pci device I/O regions */
-       rvalue = pci_request_regions(pdev, DRV_NAME);
-       if (rvalue) {
-               printk(KERN_ERR "%s: pci_request_regions failure (rc=%d)\n",
-                      DRV_NAME, rvalue);
-               goto do_pci_disable_device;
-       }
-
-       /* check if the memory window is indeed set */
-       rvalue = pci_read_config_dword(pdev, PCI_BASE_ADDRESS_0, &mem_addr);
-       if (rvalue || !mem_addr) {
-               printk(KERN_ERR "%s: PCI device memory region not configured; fix your BIOS or CardBus bridge/drivers\n",
-                      DRV_NAME);
-               goto do_pci_release_regions;
-       }
-
-       /* enable PCI bus-mastering */
-       DEBUG(SHOW_TRACING, "%s: pci_set_master(pdev)\n", DRV_NAME);
-       pci_set_master(pdev);
-
-       /* enable MWI */
-       pci_try_set_mwi(pdev);
-
-       /* setup the network device interface and its structure */
-       if (!(ndev = islpci_setup(pdev))) {
-               /* error configuring the driver as a network device */
-               printk(KERN_ERR "%s: could not configure network device\n",
-                      DRV_NAME);
-               goto do_pci_clear_mwi;
-       }
-
-       priv = netdev_priv(ndev);
-       islpci_set_state(priv, PRV_STATE_PREBOOT); /* we are attempting to boot */
-
-       /* card is in unknown state yet, might have some interrupts pending */
-       isl38xx_disable_interrupts(priv->device_base);
-
-       /* request for the interrupt before uploading the firmware */
-       rvalue = request_irq(pdev->irq, islpci_interrupt,
-                            IRQF_SHARED, ndev->name, priv);
-
-       if (rvalue) {
-               /* error, could not hook the handler to the irq */
-               printk(KERN_ERR "%s: could not install IRQ handler\n",
-                      ndev->name);
-               goto do_unregister_netdev;
-       }
-
-       /* firmware upload is triggered in islpci_open */
-
-       return 0;
-
-      do_unregister_netdev:
-       unregister_netdev(ndev);
-       islpci_free_memory(priv);
-       free_netdev(ndev);
-       priv = NULL;
-      do_pci_clear_mwi:
-       pci_clear_mwi(pdev);
-      do_pci_release_regions:
-       pci_release_regions(pdev);
-      do_pci_disable_device:
-       pci_disable_device(pdev);
-       return -EIO;
-}
-
-/* set by cleanup_module */
-static volatile int __in_cleanup_module = 0;
-
-/* this one removes one(!!) instance only */
-static void
-prism54_remove(struct pci_dev *pdev)
-{
-       struct net_device *ndev = pci_get_drvdata(pdev);
-       islpci_private *priv = ndev ? netdev_priv(ndev) : NULL;
-       BUG_ON(!priv);
-
-       if (!__in_cleanup_module) {
-               printk(KERN_DEBUG "%s: hot unplug detected\n", ndev->name);
-               islpci_set_state(priv, PRV_STATE_OFF);
-       }
-
-       printk(KERN_DEBUG "%s: removing device\n", ndev->name);
-
-       unregister_netdev(ndev);
-
-       /* free the interrupt request */
-
-       if (islpci_get_state(priv) != PRV_STATE_OFF) {
-               isl38xx_disable_interrupts(priv->device_base);
-               islpci_set_state(priv, PRV_STATE_OFF);
-               /* This bellow causes a lockup at rmmod time. It might be
-                * because some interrupts still linger after rmmod time,
-                * see bug #17 */
-               /* pci_set_power_state(pdev, 3);*/      /* try to power-off */
-       }
-
-       free_irq(pdev->irq, priv);
-
-       /* free the PCI memory and unmap the remapped page */
-       islpci_free_memory(priv);
-
-       free_netdev(ndev);
-       priv = NULL;
-
-       pci_clear_mwi(pdev);
-
-       pci_release_regions(pdev);
-
-       pci_disable_device(pdev);
-}
-
-static int __maybe_unused
-prism54_suspend(struct device *dev)
-{
-       struct net_device *ndev = dev_get_drvdata(dev);
-       islpci_private *priv = ndev ? netdev_priv(ndev) : NULL;
-       BUG_ON(!priv);
-
-       /* tell the device not to trigger interrupts for now... */
-       isl38xx_disable_interrupts(priv->device_base);
-
-       /* from now on assume the hardware was already powered down
-          and don't touch it anymore */
-       islpci_set_state(priv, PRV_STATE_OFF);
-
-       netif_stop_queue(ndev);
-       netif_device_detach(ndev);
-
-       return 0;
-}
-
-static int __maybe_unused
-prism54_resume(struct device *dev)
-{
-       struct net_device *ndev = dev_get_drvdata(dev);
-       islpci_private *priv = ndev ? netdev_priv(ndev) : NULL;
-
-       BUG_ON(!priv);
-
-       printk(KERN_NOTICE "%s: got resume request\n", ndev->name);
-
-       /* alright let's go into the PREBOOT state */
-       islpci_reset(priv, 1);
-
-       netif_device_attach(ndev);
-       netif_start_queue(ndev);
-
-       return 0;
-}
-
-static int __init
-prism54_module_init(void)
-{
-       printk(KERN_INFO "Loaded %s driver, version %s\n",
-              DRV_NAME, DRV_VERSION);
-
-       __bug_on_wrong_struct_sizes ();
-
-       return pci_register_driver(&prism54_driver);
-}
-
-/* by the time prism54_module_exit() terminates, as a postcondition
- * all instances will have been destroyed by calls to
- * prism54_remove() */
-static void __exit
-prism54_module_exit(void)
-{
-       __in_cleanup_module = 1;
-
-       pci_unregister_driver(&prism54_driver);
-
-       printk(KERN_INFO "Unloaded %s driver\n", DRV_NAME);
-
-       __in_cleanup_module = 0;
-}
-
-/* register entry points */
-module_init(prism54_module_init);
-module_exit(prism54_module_exit);
-/* EOF */
diff --git a/drivers/net/wireless/intersil/prism54/islpci_mgt.c b/drivers/net/wireless/intersil/prism54/islpci_mgt.c
deleted file mode 100644 (file)
index 0c7fb76..0000000
+++ /dev/null
@@ -1,491 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 2002 Intersil Americas Inc.
- *  Copyright 2004 Jens Maurer <Jens.Maurer@gmx.net>
- */
-
-#include <linux/netdevice.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <linux/if_arp.h>
-
-#include "prismcompat.h"
-#include "isl_38xx.h"
-#include "islpci_mgt.h"
-#include "isl_oid.h"           /* additional types and defs for isl38xx fw */
-#include "isl_ioctl.h"
-
-#include <net/iw_handler.h>
-
-/******************************************************************************
-        Global variable definition section
-******************************************************************************/
-int pc_debug = VERBOSE;
-module_param(pc_debug, int, 0);
-
-/******************************************************************************
-    Driver general functions
-******************************************************************************/
-#if VERBOSE > SHOW_ERROR_MESSAGES
-void
-display_buffer(char *buffer, int length)
-{
-       if ((pc_debug & SHOW_BUFFER_CONTENTS) == 0)
-               return;
-
-       while (length > 0) {
-               printk("[%02x]", *buffer & 255);
-               length--;
-               buffer++;
-       }
-
-       printk("\n");
-}
-#endif
-
-/*****************************************************************************
-    Queue handling for management frames
-******************************************************************************/
-
-/*
- * Helper function to create a PIMFOR management frame header.
- */
-static void
-pimfor_encode_header(int operation, u32 oid, u32 length, pimfor_header_t *h)
-{
-       h->version = PIMFOR_VERSION;
-       h->operation = operation;
-       h->device_id = PIMFOR_DEV_ID_MHLI_MIB;
-       h->flags = 0;
-       h->oid = cpu_to_be32(oid);
-       h->length = cpu_to_be32(length);
-}
-
-/*
- * Helper function to analyze a PIMFOR management frame header.
- */
-static pimfor_header_t *
-pimfor_decode_header(void *data, int len)
-{
-       pimfor_header_t *h = data;
-
-       while ((void *) h < data + len) {
-               if (h->flags & PIMFOR_FLAG_LITTLE_ENDIAN) {
-                       le32_to_cpus(&h->oid);
-                       le32_to_cpus(&h->length);
-               } else {
-                       be32_to_cpus(&h->oid);
-                       be32_to_cpus(&h->length);
-               }
-               if (h->oid != OID_INL_TUNNEL)
-                       return h;
-               h++;
-       }
-       return NULL;
-}
-
-/*
- * Fill the receive queue for management frames with fresh buffers.
- */
-int
-islpci_mgmt_rx_fill(struct net_device *ndev)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       isl38xx_control_block *cb =     /* volatile not needed */
-           (isl38xx_control_block *) priv->control_block;
-       u32 curr = le32_to_cpu(cb->driver_curr_frag[ISL38XX_CB_RX_MGMTQ]);
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_FUNCTION_CALLS, "islpci_mgmt_rx_fill\n");
-#endif
-
-       while (curr - priv->index_mgmt_rx < ISL38XX_CB_MGMT_QSIZE) {
-               u32 index = curr % ISL38XX_CB_MGMT_QSIZE;
-               struct islpci_membuf *buf = &priv->mgmt_rx[index];
-               isl38xx_fragment *frag = &cb->rx_data_mgmt[index];
-
-               if (buf->mem == NULL) {
-                       buf->mem = kmalloc(MGMT_FRAME_SIZE, GFP_ATOMIC);
-                       if (!buf->mem)
-                               return -ENOMEM;
-                       buf->size = MGMT_FRAME_SIZE;
-               }
-               if (buf->pci_addr == 0) {
-                       buf->pci_addr = dma_map_single(&priv->pdev->dev,
-                                                      buf->mem,
-                                                      MGMT_FRAME_SIZE,
-                                                      DMA_FROM_DEVICE);
-                       if (dma_mapping_error(&priv->pdev->dev, buf->pci_addr)) {
-                               printk(KERN_WARNING
-                                      "Failed to make memory DMA'able.\n");
-                               return -ENOMEM;
-                       }
-               }
-
-               /* be safe: always reset control block information */
-               frag->size = cpu_to_le16(MGMT_FRAME_SIZE);
-               frag->flags = 0;
-               frag->address = cpu_to_le32(buf->pci_addr);
-               curr++;
-
-               /* The fragment address in the control block must have
-                * been written before announcing the frame buffer to
-                * device */
-               wmb();
-               cb->driver_curr_frag[ISL38XX_CB_RX_MGMTQ] = cpu_to_le32(curr);
-       }
-       return 0;
-}
-
-/*
- * Create and transmit a management frame using "operation" and "oid",
- * with arguments data/length.
- * We either return an error and free the frame, or we return 0 and
- * islpci_mgt_cleanup_transmit() frees the frame in the tx-done
- * interrupt.
- */
-static int
-islpci_mgt_transmit(struct net_device *ndev, int operation, unsigned long oid,
-                   void *data, int length)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       isl38xx_control_block *cb =
-           (isl38xx_control_block *) priv->control_block;
-       void *p;
-       int err = -EINVAL;
-       unsigned long flags;
-       isl38xx_fragment *frag;
-       struct islpci_membuf buf;
-       u32 curr_frag;
-       int index;
-       int frag_len = length + PIMFOR_HEADER_SIZE;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_FUNCTION_CALLS, "islpci_mgt_transmit\n");
-#endif
-
-       if (frag_len > MGMT_FRAME_SIZE) {
-               printk(KERN_DEBUG "%s: mgmt frame too large %d\n",
-                      ndev->name, frag_len);
-               goto error;
-       }
-
-       err = -ENOMEM;
-       p = buf.mem = kmalloc(frag_len, GFP_KERNEL);
-       if (!buf.mem)
-               goto error;
-
-       buf.size = frag_len;
-
-       /* create the header directly in the fragment data area */
-       pimfor_encode_header(operation, oid, length, (pimfor_header_t *) p);
-       p += PIMFOR_HEADER_SIZE;
-
-       if (data)
-               memcpy(p, data, length);
-       else
-               memset(p, 0, length);
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       {
-               pimfor_header_t *h = buf.mem;
-               DEBUG(SHOW_PIMFOR_FRAMES,
-                     "PIMFOR: op %i, oid 0x%08lx, device %i, flags 0x%x length 0x%x\n",
-                     h->operation, oid, h->device_id, h->flags, length);
-
-               /* display the buffer contents for debugging */
-               display_buffer((char *) h, sizeof (pimfor_header_t));
-               display_buffer(p, length);
-       }
-#endif
-
-       err = -ENOMEM;
-       buf.pci_addr = dma_map_single(&priv->pdev->dev, buf.mem, frag_len,
-                                     DMA_TO_DEVICE);
-       if (dma_mapping_error(&priv->pdev->dev, buf.pci_addr)) {
-               printk(KERN_WARNING "%s: cannot map PCI memory for mgmt\n",
-                      ndev->name);
-               goto error_free;
-       }
-
-       /* Protect the control block modifications against interrupts. */
-       spin_lock_irqsave(&priv->slock, flags);
-       curr_frag = le32_to_cpu(cb->driver_curr_frag[ISL38XX_CB_TX_MGMTQ]);
-       if (curr_frag - priv->index_mgmt_tx >= ISL38XX_CB_MGMT_QSIZE) {
-               printk(KERN_WARNING "%s: mgmt tx queue is still full\n",
-                      ndev->name);
-               goto error_unlock;
-       }
-
-       /* commit the frame to the tx device queue */
-       index = curr_frag % ISL38XX_CB_MGMT_QSIZE;
-       priv->mgmt_tx[index] = buf;
-       frag = &cb->tx_data_mgmt[index];
-       frag->size = cpu_to_le16(frag_len);
-       frag->flags = 0;        /* for any other than the last fragment, set to 1 */
-       frag->address = cpu_to_le32(buf.pci_addr);
-
-       /* The fragment address in the control block must have
-        * been written before announcing the frame buffer to
-        * device */
-       wmb();
-       cb->driver_curr_frag[ISL38XX_CB_TX_MGMTQ] = cpu_to_le32(curr_frag + 1);
-       spin_unlock_irqrestore(&priv->slock, flags);
-
-       /* trigger the device */
-       islpci_trigger(priv);
-       return 0;
-
-      error_unlock:
-       spin_unlock_irqrestore(&priv->slock, flags);
-      error_free:
-       kfree(buf.mem);
-      error:
-       return err;
-}
-
-/*
- * Receive a management frame from the device.
- * This can be an arbitrary number of traps, and at most one response
- * frame for a previous request sent via islpci_mgt_transmit().
- */
-int
-islpci_mgt_receive(struct net_device *ndev)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       isl38xx_control_block *cb =
-           (isl38xx_control_block *) priv->control_block;
-       u32 curr_frag;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_FUNCTION_CALLS, "islpci_mgt_receive\n");
-#endif
-
-       /* Only once per interrupt, determine fragment range to
-        * process.  This avoids an endless loop (i.e. lockup) if
-        * frames come in faster than we can process them. */
-       curr_frag = le32_to_cpu(cb->device_curr_frag[ISL38XX_CB_RX_MGMTQ]);
-       barrier();
-
-       for (; priv->index_mgmt_rx < curr_frag; priv->index_mgmt_rx++) {
-               pimfor_header_t *header;
-               u32 index = priv->index_mgmt_rx % ISL38XX_CB_MGMT_QSIZE;
-               struct islpci_membuf *buf = &priv->mgmt_rx[index];
-               u16 frag_len;
-               int size;
-               struct islpci_mgmtframe *frame;
-
-               /* I have no idea (and no documentation) if flags != 0
-                * is possible.  Drop the frame, reuse the buffer. */
-               if (le16_to_cpu(cb->rx_data_mgmt[index].flags) != 0) {
-                       printk(KERN_WARNING "%s: unknown flags 0x%04x\n",
-                              ndev->name,
-                              le16_to_cpu(cb->rx_data_mgmt[index].flags));
-                       continue;
-               }
-
-               /* The device only returns the size of the header(s) here. */
-               frag_len = le16_to_cpu(cb->rx_data_mgmt[index].size);
-
-               /*
-                * We appear to have no way to tell the device the
-                * size of a receive buffer.  Thus, if this check
-                * triggers, we likely have kernel heap corruption. */
-               if (frag_len > MGMT_FRAME_SIZE) {
-                       printk(KERN_WARNING
-                               "%s: Bogus packet size of %d (%#x).\n",
-                               ndev->name, frag_len, frag_len);
-                       frag_len = MGMT_FRAME_SIZE;
-               }
-
-               /* Ensure the results of device DMA are visible to the CPU. */
-               dma_sync_single_for_cpu(&priv->pdev->dev, buf->pci_addr,
-                                       buf->size, DMA_FROM_DEVICE);
-
-               /* Perform endianess conversion for PIMFOR header in-place. */
-               header = pimfor_decode_header(buf->mem, frag_len);
-               if (!header) {
-                       printk(KERN_WARNING "%s: no PIMFOR header found\n",
-                              ndev->name);
-                       continue;
-               }
-
-               /* The device ID from the PIMFOR packet received from
-                * the MVC is always 0.  We forward a sensible device_id.
-                * Not that anyone upstream would care... */
-               header->device_id = priv->ndev->ifindex;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-               DEBUG(SHOW_PIMFOR_FRAMES,
-                     "PIMFOR: op %i, oid 0x%08x, device %i, flags 0x%x length 0x%x\n",
-                     header->operation, header->oid, header->device_id,
-                     header->flags, header->length);
-
-               /* display the buffer contents for debugging */
-               display_buffer((char *) header, PIMFOR_HEADER_SIZE);
-               display_buffer((char *) header + PIMFOR_HEADER_SIZE,
-                              header->length);
-#endif
-
-               /* nobody sends these */
-               if (header->flags & PIMFOR_FLAG_APPLIC_ORIGIN) {
-                       printk(KERN_DEBUG
-                              "%s: errant PIMFOR application frame\n",
-                              ndev->name);
-                       continue;
-               }
-
-               /* Determine frame size, skipping OID_INL_TUNNEL headers. */
-               size = PIMFOR_HEADER_SIZE + header->length;
-               frame = kmalloc(sizeof(struct islpci_mgmtframe) + size,
-                               GFP_ATOMIC);
-               if (!frame)
-                       continue;
-
-               frame->ndev = ndev;
-               memcpy(&frame->buf, header, size);
-               frame->header = (pimfor_header_t *) frame->buf;
-               frame->data = frame->buf + PIMFOR_HEADER_SIZE;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-               DEBUG(SHOW_PIMFOR_FRAMES,
-                     "frame: header: %p, data: %p, size: %d\n",
-                     frame->header, frame->data, size);
-#endif
-
-               if (header->operation == PIMFOR_OP_TRAP) {
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       printk(KERN_DEBUG
-                              "TRAP: oid 0x%x, device %i, flags 0x%x length %i\n",
-                              header->oid, header->device_id, header->flags,
-                              header->length);
-#endif
-
-                       /* Create work to handle trap out of interrupt
-                        * context. */
-                       INIT_WORK(&frame->ws, prism54_process_trap);
-                       schedule_work(&frame->ws);
-
-               } else {
-                       /* Signal the one waiting process that a response
-                        * has been received. */
-                       if ((frame = xchg(&priv->mgmt_received, frame)) != NULL) {
-                               printk(KERN_WARNING
-                                      "%s: mgmt response not collected\n",
-                                      ndev->name);
-                               kfree(frame);
-                       }
-#if VERBOSE > SHOW_ERROR_MESSAGES
-                       DEBUG(SHOW_TRACING, "Wake up Mgmt Queue\n");
-#endif
-                       wake_up(&priv->mgmt_wqueue);
-               }
-
-       }
-
-       return 0;
-}
-
-/*
- * Cleanup the transmit queue by freeing all frames handled by the device.
- */
-void
-islpci_mgt_cleanup_transmit(struct net_device *ndev)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       isl38xx_control_block *cb =     /* volatile not needed */
-           (isl38xx_control_block *) priv->control_block;
-       u32 curr_frag;
-
-#if VERBOSE > SHOW_ERROR_MESSAGES
-       DEBUG(SHOW_FUNCTION_CALLS, "islpci_mgt_cleanup_transmit\n");
-#endif
-
-       /* Only once per cleanup, determine fragment range to
-        * process.  This avoids an endless loop (i.e. lockup) if
-        * the device became confused, incrementing device_curr_frag
-        * rapidly. */
-       curr_frag = le32_to_cpu(cb->device_curr_frag[ISL38XX_CB_TX_MGMTQ]);
-       barrier();
-
-       for (; priv->index_mgmt_tx < curr_frag; priv->index_mgmt_tx++) {
-               int index = priv->index_mgmt_tx % ISL38XX_CB_MGMT_QSIZE;
-               struct islpci_membuf *buf = &priv->mgmt_tx[index];
-               dma_unmap_single(&priv->pdev->dev, buf->pci_addr, buf->size,
-                                DMA_TO_DEVICE);
-               buf->pci_addr = 0;
-               kfree(buf->mem);
-               buf->mem = NULL;
-               buf->size = 0;
-       }
-}
-
-/*
- * Perform one request-response transaction to the device.
- */
-int
-islpci_mgt_transaction(struct net_device *ndev,
-                      int operation, unsigned long oid,
-                      void *senddata, int sendlen,
-                      struct islpci_mgmtframe **recvframe)
-{
-       islpci_private *priv = netdev_priv(ndev);
-       const long wait_cycle_jiffies = msecs_to_jiffies(ISL38XX_WAIT_CYCLE * 10);
-       long timeout_left = ISL38XX_MAX_WAIT_CYCLES * wait_cycle_jiffies;
-       int err;
-       DEFINE_WAIT(wait);
-
-       *recvframe = NULL;
-
-       if (mutex_lock_interruptible(&priv->mgmt_lock))
-               return -ERESTARTSYS;
-
-       prepare_to_wait(&priv->mgmt_wqueue, &wait, TASK_UNINTERRUPTIBLE);
-       err = islpci_mgt_transmit(ndev, operation, oid, senddata, sendlen);
-       if (err)
-               goto out;
-
-       err = -ETIMEDOUT;
-       while (timeout_left > 0) {
-               int timeleft;
-               struct islpci_mgmtframe *frame;
-
-               timeleft = schedule_timeout_uninterruptible(wait_cycle_jiffies);
-               frame = xchg(&priv->mgmt_received, NULL);
-               if (frame) {
-                       if (frame->header->oid == oid) {
-                               *recvframe = frame;
-                               err = 0;
-                               goto out;
-                       } else {
-                               printk(KERN_DEBUG
-                                      "%s: expecting oid 0x%x, received 0x%x.\n",
-                                      ndev->name, (unsigned int) oid,
-                                      frame->header->oid);
-                               kfree(frame);
-                               frame = NULL;
-                       }
-               }
-               if (timeleft == 0) {
-                       printk(KERN_DEBUG
-                               "%s: timeout waiting for mgmt response %lu, "
-                               "triggering device\n",
-                               ndev->name, timeout_left);
-                       islpci_trigger(priv);
-               }
-               timeout_left += timeleft - wait_cycle_jiffies;
-       }
-       printk(KERN_WARNING "%s: timeout waiting for mgmt response\n",
-              ndev->name);
-
-       /* TODO: we should reset the device here */
- out:
-       finish_wait(&priv->mgmt_wqueue, &wait);
-       mutex_unlock(&priv->mgmt_lock);
-       return err;
-}
-
diff --git a/drivers/net/wireless/intersil/prism54/islpci_mgt.h b/drivers/net/wireless/intersil/prism54/islpci_mgt.h
deleted file mode 100644 (file)
index 1f87d0a..0000000
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2002 Intersil Americas Inc.
- *  Copyright (C) 2003 Luis R. Rodriguez <mcgrof@ruslug.rutgers.edu>
- */
-
-#ifndef _ISLPCI_MGT_H
-#define _ISLPCI_MGT_H
-
-#include <linux/wireless.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-
-/*
- *  Function definitions
- */
-
-#define K_DEBUG(f, m, args...) do { if(f & m) printk(KERN_DEBUG args); } while(0)
-#define DEBUG(f, args...) K_DEBUG(f, pc_debug, args)
-
-extern int pc_debug;
-#define init_wds 0     /* help compiler optimize away dead code */
-
-
-/* General driver definitions */
-#define PCIDEVICE_LATENCY_TIMER_MIN            0x40
-#define PCIDEVICE_LATENCY_TIMER_VAL            0x50
-
-/* Debugging verbose definitions */
-#define SHOW_NOTHING                            0x00   /* overrules everything */
-#define SHOW_ANYTHING                           0xFF
-#define SHOW_ERROR_MESSAGES                     0x01
-#define SHOW_TRAPS                              0x02
-#define SHOW_FUNCTION_CALLS                     0x04
-#define SHOW_TRACING                            0x08
-#define SHOW_QUEUE_INDEXES                      0x10
-#define SHOW_PIMFOR_FRAMES                      0x20
-#define SHOW_BUFFER_CONTENTS                    0x40
-#define VERBOSE                                 0x01
-
-/* Default card definitions */
-#define CARD_DEFAULT_CHANNEL                    6
-#define CARD_DEFAULT_MODE                       INL_MODE_CLIENT
-#define CARD_DEFAULT_IW_MODE                   IW_MODE_INFRA
-#define CARD_DEFAULT_BSSTYPE                    DOT11_BSSTYPE_INFRA
-#define CARD_DEFAULT_CLIENT_SSID               ""
-#define CARD_DEFAULT_AP_SSID                   "default"
-#define CARD_DEFAULT_KEY1                       "default_key_1"
-#define CARD_DEFAULT_KEY2                       "default_key_2"
-#define CARD_DEFAULT_KEY3                       "default_key_3"
-#define CARD_DEFAULT_KEY4                       "default_key_4"
-#define CARD_DEFAULT_WEP                        0
-#define CARD_DEFAULT_FILTER                     0
-#define CARD_DEFAULT_WDS                        0
-#define        CARD_DEFAULT_AUTHEN                     DOT11_AUTH_OS
-#define        CARD_DEFAULT_DOT1X                      0
-#define CARD_DEFAULT_MLME_MODE                 DOT11_MLME_AUTO
-#define CARD_DEFAULT_CONFORMANCE                OID_INL_CONFORMANCE_NONE
-#define CARD_DEFAULT_PROFILE                   DOT11_PROFILE_MIXED_G_WIFI
-#define CARD_DEFAULT_MAXFRAMEBURST             DOT11_MAXFRAMEBURST_MIXED_SAFE
-
-/* PIMFOR package definitions */
-#define PIMFOR_ETHERTYPE                        0x8828
-#define PIMFOR_HEADER_SIZE                      12
-#define PIMFOR_VERSION                          1
-#define PIMFOR_OP_GET                           0
-#define PIMFOR_OP_SET                           1
-#define PIMFOR_OP_RESPONSE                      2
-#define PIMFOR_OP_ERROR                         3
-#define PIMFOR_OP_TRAP                          4
-#define PIMFOR_OP_RESERVED                      5      /* till 255 */
-#define PIMFOR_DEV_ID_MHLI_MIB                  0
-#define PIMFOR_FLAG_APPLIC_ORIGIN               0x01
-#define PIMFOR_FLAG_LITTLE_ENDIAN               0x02
-
-void display_buffer(char *, int);
-
-/*
- *  Type definition section
- *
- *  the structure defines only the header allowing copyless
- *  frame handling
- */
-typedef struct {
-       u8 version;
-       u8 operation;
-       u32 oid;
-       u8 device_id;
-       u8 flags;
-       u32 length;
-} __packed
-pimfor_header_t;
-
-/* A received and interrupt-processed management frame, either for
- * schedule_work(prism54_process_trap) or for priv->mgmt_received,
- * processed by islpci_mgt_transaction(). */
-struct islpci_mgmtframe {
-       struct net_device *ndev;      /* pointer to network device */
-       pimfor_header_t *header;      /* payload header, points into buf */
-       void *data;                   /* payload ex header, points into buf */
-        struct work_struct ws;       /* argument for schedule_work() */
-       char buf[];                   /* fragment buffer */
-};
-
-int
-islpci_mgt_receive(struct net_device *ndev);
-
-int
-islpci_mgmt_rx_fill(struct net_device *ndev);
-
-void
-islpci_mgt_cleanup_transmit(struct net_device *ndev);
-
-int
-islpci_mgt_transaction(struct net_device *ndev,
-                       int operation, unsigned long oid,
-                      void *senddata, int sendlen,
-                      struct islpci_mgmtframe **recvframe);
-
-static inline void
-islpci_mgt_release(struct islpci_mgmtframe *frame)
-{
-        kfree(frame);
-}
-
-#endif                         /* _ISLPCI_MGT_H */
diff --git a/drivers/net/wireless/intersil/prism54/oid_mgt.c b/drivers/net/wireless/intersil/prism54/oid_mgt.c
deleted file mode 100644 (file)
index 9fd307c..0000000
+++ /dev/null
@@ -1,889 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 2003,2004 Aurelien Alleaume <slts@free.fr>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-
-#include "prismcompat.h"
-#include "islpci_dev.h"
-#include "islpci_mgt.h"
-#include "isl_oid.h"
-#include "oid_mgt.h"
-#include "isl_ioctl.h"
-
-/* to convert between channel and freq */
-static const int frequency_list_bg[] = { 2412, 2417, 2422, 2427, 2432,
-       2437, 2442, 2447, 2452, 2457, 2462, 2467, 2472, 2484
-};
-
-int
-channel_of_freq(int f)
-{
-       int c = 0;
-
-       if ((f >= 2412) && (f <= 2484)) {
-               while ((c < 14) && (f != frequency_list_bg[c]))
-                       c++;
-               return (c >= 14) ? 0 : ++c;
-       } else if ((f >= (int) 5000) && (f <= (int) 6000)) {
-               return ( (f - 5000) / 5 );
-       } else
-               return 0;
-}
-
-#define OID_STRUCT(name,oid,s,t) [name] = {oid, 0, sizeof(s), t}
-#define OID_STRUCT_C(name,oid,s,t) OID_STRUCT(name,oid,s,t | OID_FLAG_CACHED)
-#define OID_U32(name,oid) OID_STRUCT(name,oid,u32,OID_TYPE_U32)
-#define OID_U32_C(name,oid) OID_STRUCT_C(name,oid,u32,OID_TYPE_U32)
-#define OID_STRUCT_MLME(name,oid) OID_STRUCT(name,oid,struct obj_mlme,OID_TYPE_MLME)
-#define OID_STRUCT_MLMEEX(name,oid) OID_STRUCT(name,oid,struct obj_mlmeex,OID_TYPE_MLMEEX)
-
-#define OID_UNKNOWN(name,oid) OID_STRUCT(name,oid,0,0)
-
-struct oid_t isl_oid[] = {
-       OID_STRUCT(GEN_OID_MACADDRESS, 0x00000000, u8[6], OID_TYPE_ADDR),
-       OID_U32(GEN_OID_LINKSTATE, 0x00000001),
-       OID_UNKNOWN(GEN_OID_WATCHDOG, 0x00000002),
-       OID_UNKNOWN(GEN_OID_MIBOP, 0x00000003),
-       OID_UNKNOWN(GEN_OID_OPTIONS, 0x00000004),
-       OID_UNKNOWN(GEN_OID_LEDCONFIG, 0x00000005),
-
-       /* 802.11 */
-       OID_U32_C(DOT11_OID_BSSTYPE, 0x10000000),
-       OID_STRUCT_C(DOT11_OID_BSSID, 0x10000001, u8[6], OID_TYPE_RAW),
-       OID_STRUCT_C(DOT11_OID_SSID, 0x10000002, struct obj_ssid,
-                    OID_TYPE_SSID),
-       OID_U32(DOT11_OID_STATE, 0x10000003),
-       OID_U32(DOT11_OID_AID, 0x10000004),
-       OID_STRUCT(DOT11_OID_COUNTRYSTRING, 0x10000005, u8[4], OID_TYPE_RAW),
-       OID_STRUCT_C(DOT11_OID_SSIDOVERRIDE, 0x10000006, struct obj_ssid,
-                    OID_TYPE_SSID),
-
-       OID_U32(DOT11_OID_MEDIUMLIMIT, 0x11000000),
-       OID_U32_C(DOT11_OID_BEACONPERIOD, 0x11000001),
-       OID_U32(DOT11_OID_DTIMPERIOD, 0x11000002),
-       OID_U32(DOT11_OID_ATIMWINDOW, 0x11000003),
-       OID_U32(DOT11_OID_LISTENINTERVAL, 0x11000004),
-       OID_U32(DOT11_OID_CFPPERIOD, 0x11000005),
-       OID_U32(DOT11_OID_CFPDURATION, 0x11000006),
-
-       OID_U32_C(DOT11_OID_AUTHENABLE, 0x12000000),
-       OID_U32_C(DOT11_OID_PRIVACYINVOKED, 0x12000001),
-       OID_U32_C(DOT11_OID_EXUNENCRYPTED, 0x12000002),
-       OID_U32_C(DOT11_OID_DEFKEYID, 0x12000003),
-       [DOT11_OID_DEFKEYX] = {0x12000004, 3, sizeof (struct obj_key),
-                              OID_FLAG_CACHED | OID_TYPE_KEY}, /* DOT11_OID_DEFKEY1,...DOT11_OID_DEFKEY4 */
-       OID_UNKNOWN(DOT11_OID_STAKEY, 0x12000008),
-       OID_U32(DOT11_OID_REKEYTHRESHOLD, 0x12000009),
-       OID_UNKNOWN(DOT11_OID_STASC, 0x1200000a),
-
-       OID_U32(DOT11_OID_PRIVTXREJECTED, 0x1a000000),
-       OID_U32(DOT11_OID_PRIVRXPLAIN, 0x1a000001),
-       OID_U32(DOT11_OID_PRIVRXFAILED, 0x1a000002),
-       OID_U32(DOT11_OID_PRIVRXNOKEY, 0x1a000003),
-
-       OID_U32_C(DOT11_OID_RTSTHRESH, 0x13000000),
-       OID_U32_C(DOT11_OID_FRAGTHRESH, 0x13000001),
-       OID_U32_C(DOT11_OID_SHORTRETRIES, 0x13000002),
-       OID_U32_C(DOT11_OID_LONGRETRIES, 0x13000003),
-       OID_U32_C(DOT11_OID_MAXTXLIFETIME, 0x13000004),
-       OID_U32(DOT11_OID_MAXRXLIFETIME, 0x13000005),
-       OID_U32(DOT11_OID_AUTHRESPTIMEOUT, 0x13000006),
-       OID_U32(DOT11_OID_ASSOCRESPTIMEOUT, 0x13000007),
-
-       OID_UNKNOWN(DOT11_OID_ALOFT_TABLE, 0x1d000000),
-       OID_UNKNOWN(DOT11_OID_ALOFT_CTRL_TABLE, 0x1d000001),
-       OID_UNKNOWN(DOT11_OID_ALOFT_RETREAT, 0x1d000002),
-       OID_UNKNOWN(DOT11_OID_ALOFT_PROGRESS, 0x1d000003),
-       OID_U32(DOT11_OID_ALOFT_FIXEDRATE, 0x1d000004),
-       OID_UNKNOWN(DOT11_OID_ALOFT_RSSIGRAPH, 0x1d000005),
-       OID_UNKNOWN(DOT11_OID_ALOFT_CONFIG, 0x1d000006),
-
-       [DOT11_OID_VDCFX] = {0x1b000000, 7, 0, 0},
-       OID_U32(DOT11_OID_MAXFRAMEBURST, 0x1b000008),
-
-       OID_U32(DOT11_OID_PSM, 0x14000000),
-       OID_U32(DOT11_OID_CAMTIMEOUT, 0x14000001),
-       OID_U32(DOT11_OID_RECEIVEDTIMS, 0x14000002),
-       OID_U32(DOT11_OID_ROAMPREFERENCE, 0x14000003),
-
-       OID_U32(DOT11_OID_BRIDGELOCAL, 0x15000000),
-       OID_U32(DOT11_OID_CLIENTS, 0x15000001),
-       OID_U32(DOT11_OID_CLIENTSASSOCIATED, 0x15000002),
-       [DOT11_OID_CLIENTX] = {0x15000003, 2006, 0, 0}, /* DOT11_OID_CLIENTX,...DOT11_OID_CLIENT2007 */
-
-       OID_STRUCT(DOT11_OID_CLIENTFIND, 0x150007DB, u8[6], OID_TYPE_ADDR),
-       OID_STRUCT(DOT11_OID_WDSLINKADD, 0x150007DC, u8[6], OID_TYPE_ADDR),
-       OID_STRUCT(DOT11_OID_WDSLINKREMOVE, 0x150007DD, u8[6], OID_TYPE_ADDR),
-       OID_STRUCT(DOT11_OID_EAPAUTHSTA, 0x150007DE, u8[6], OID_TYPE_ADDR),
-       OID_STRUCT(DOT11_OID_EAPUNAUTHSTA, 0x150007DF, u8[6], OID_TYPE_ADDR),
-       OID_U32_C(DOT11_OID_DOT1XENABLE, 0x150007E0),
-       OID_UNKNOWN(DOT11_OID_MICFAILURE, 0x150007E1),
-       OID_UNKNOWN(DOT11_OID_REKEYINDICATE, 0x150007E2),
-
-       OID_U32(DOT11_OID_MPDUTXSUCCESSFUL, 0x16000000),
-       OID_U32(DOT11_OID_MPDUTXONERETRY, 0x16000001),
-       OID_U32(DOT11_OID_MPDUTXMULTIPLERETRIES, 0x16000002),
-       OID_U32(DOT11_OID_MPDUTXFAILED, 0x16000003),
-       OID_U32(DOT11_OID_MPDURXSUCCESSFUL, 0x16000004),
-       OID_U32(DOT11_OID_MPDURXDUPS, 0x16000005),
-       OID_U32(DOT11_OID_RTSSUCCESSFUL, 0x16000006),
-       OID_U32(DOT11_OID_RTSFAILED, 0x16000007),
-       OID_U32(DOT11_OID_ACKFAILED, 0x16000008),
-       OID_U32(DOT11_OID_FRAMERECEIVES, 0x16000009),
-       OID_U32(DOT11_OID_FRAMEERRORS, 0x1600000A),
-       OID_U32(DOT11_OID_FRAMEABORTS, 0x1600000B),
-       OID_U32(DOT11_OID_FRAMEABORTSPHY, 0x1600000C),
-
-       OID_U32(DOT11_OID_SLOTTIME, 0x17000000),
-       OID_U32(DOT11_OID_CWMIN, 0x17000001),
-       OID_U32(DOT11_OID_CWMAX, 0x17000002),
-       OID_U32(DOT11_OID_ACKWINDOW, 0x17000003),
-       OID_U32(DOT11_OID_ANTENNARX, 0x17000004),
-       OID_U32(DOT11_OID_ANTENNATX, 0x17000005),
-       OID_U32(DOT11_OID_ANTENNADIVERSITY, 0x17000006),
-       OID_U32_C(DOT11_OID_CHANNEL, 0x17000007),
-       OID_U32_C(DOT11_OID_EDTHRESHOLD, 0x17000008),
-       OID_U32(DOT11_OID_PREAMBLESETTINGS, 0x17000009),
-       OID_STRUCT(DOT11_OID_RATES, 0x1700000A, u8[IWMAX_BITRATES + 1],
-                  OID_TYPE_RAW),
-       OID_U32(DOT11_OID_CCAMODESUPPORTED, 0x1700000B),
-       OID_U32(DOT11_OID_CCAMODE, 0x1700000C),
-       OID_UNKNOWN(DOT11_OID_RSSIVECTOR, 0x1700000D),
-       OID_UNKNOWN(DOT11_OID_OUTPUTPOWERTABLE, 0x1700000E),
-       OID_U32(DOT11_OID_OUTPUTPOWER, 0x1700000F),
-       OID_STRUCT(DOT11_OID_SUPPORTEDRATES, 0x17000010,
-                  u8[IWMAX_BITRATES + 1], OID_TYPE_RAW),
-       OID_U32_C(DOT11_OID_FREQUENCY, 0x17000011),
-       [DOT11_OID_SUPPORTEDFREQUENCIES] =
-           {0x17000012, 0, sizeof (struct obj_frequencies)
-            + sizeof (u16) * IWMAX_FREQ, OID_TYPE_FREQUENCIES},
-
-       OID_U32(DOT11_OID_NOISEFLOOR, 0x17000013),
-       OID_STRUCT(DOT11_OID_FREQUENCYACTIVITY, 0x17000014, u8[IWMAX_FREQ + 1],
-                  OID_TYPE_RAW),
-       OID_UNKNOWN(DOT11_OID_IQCALIBRATIONTABLE, 0x17000015),
-       OID_U32(DOT11_OID_NONERPPROTECTION, 0x17000016),
-       OID_U32(DOT11_OID_SLOTSETTINGS, 0x17000017),
-       OID_U32(DOT11_OID_NONERPTIMEOUT, 0x17000018),
-       OID_U32(DOT11_OID_PROFILES, 0x17000019),
-       OID_STRUCT(DOT11_OID_EXTENDEDRATES, 0x17000020,
-                  u8[IWMAX_BITRATES + 1], OID_TYPE_RAW),
-
-       OID_STRUCT_MLME(DOT11_OID_DEAUTHENTICATE, 0x18000000),
-       OID_STRUCT_MLME(DOT11_OID_AUTHENTICATE, 0x18000001),
-       OID_STRUCT_MLME(DOT11_OID_DISASSOCIATE, 0x18000002),
-       OID_STRUCT_MLME(DOT11_OID_ASSOCIATE, 0x18000003),
-       OID_UNKNOWN(DOT11_OID_SCAN, 0x18000004),
-       OID_STRUCT_MLMEEX(DOT11_OID_BEACON, 0x18000005),
-       OID_STRUCT_MLMEEX(DOT11_OID_PROBE, 0x18000006),
-       OID_STRUCT_MLMEEX(DOT11_OID_DEAUTHENTICATEEX, 0x18000007),
-       OID_STRUCT_MLMEEX(DOT11_OID_AUTHENTICATEEX, 0x18000008),
-       OID_STRUCT_MLMEEX(DOT11_OID_DISASSOCIATEEX, 0x18000009),
-       OID_STRUCT_MLMEEX(DOT11_OID_ASSOCIATEEX, 0x1800000A),
-       OID_STRUCT_MLMEEX(DOT11_OID_REASSOCIATE, 0x1800000B),
-       OID_STRUCT_MLMEEX(DOT11_OID_REASSOCIATEEX, 0x1800000C),
-
-       OID_U32(DOT11_OID_NONERPSTATUS, 0x1E000000),
-
-       OID_U32(DOT11_OID_STATIMEOUT, 0x19000000),
-       OID_U32_C(DOT11_OID_MLMEAUTOLEVEL, 0x19000001),
-       OID_U32(DOT11_OID_BSSTIMEOUT, 0x19000002),
-       [DOT11_OID_ATTACHMENT] = {0x19000003, 0,
-               sizeof(struct obj_attachment), OID_TYPE_ATTACH},
-       OID_STRUCT_C(DOT11_OID_PSMBUFFER, 0x19000004, struct obj_buffer,
-                    OID_TYPE_BUFFER),
-
-       OID_U32(DOT11_OID_BSSS, 0x1C000000),
-       [DOT11_OID_BSSX] = {0x1C000001, 63, sizeof (struct obj_bss),
-                           OID_TYPE_BSS},      /*DOT11_OID_BSS1,...,DOT11_OID_BSS64 */
-       OID_STRUCT(DOT11_OID_BSSFIND, 0x1C000042, struct obj_bss, OID_TYPE_BSS),
-       [DOT11_OID_BSSLIST] = {0x1C000043, 0, sizeof (struct
-                                                     obj_bsslist) +
-                              sizeof (struct obj_bss[IWMAX_BSS]),
-                              OID_TYPE_BSSLIST},
-
-       OID_UNKNOWN(OID_INL_TUNNEL, 0xFF020000),
-       OID_UNKNOWN(OID_INL_MEMADDR, 0xFF020001),
-       OID_UNKNOWN(OID_INL_MEMORY, 0xFF020002),
-       OID_U32_C(OID_INL_MODE, 0xFF020003),
-       OID_UNKNOWN(OID_INL_COMPONENT_NR, 0xFF020004),
-       OID_STRUCT(OID_INL_VERSION, 0xFF020005, u8[8], OID_TYPE_RAW),
-       OID_UNKNOWN(OID_INL_INTERFACE_ID, 0xFF020006),
-       OID_UNKNOWN(OID_INL_COMPONENT_ID, 0xFF020007),
-       OID_U32_C(OID_INL_CONFIG, 0xFF020008),
-       OID_U32_C(OID_INL_DOT11D_CONFORMANCE, 0xFF02000C),
-       OID_U32(OID_INL_PHYCAPABILITIES, 0xFF02000D),
-       OID_U32_C(OID_INL_OUTPUTPOWER, 0xFF02000F),
-
-};
-
-int
-mgt_init(islpci_private *priv)
-{
-       int i;
-
-       priv->mib = kcalloc(OID_NUM_LAST, sizeof (void *), GFP_KERNEL);
-       if (!priv->mib)
-               return -ENOMEM;
-
-       /* Alloc the cache */
-       for (i = 0; i < OID_NUM_LAST; i++) {
-               if (isl_oid[i].flags & OID_FLAG_CACHED) {
-                       priv->mib[i] = kcalloc(isl_oid[i].size,
-                                              (isl_oid[i].range + 1),
-                                              GFP_KERNEL);
-                       if (!priv->mib[i])
-                               return -ENOMEM;
-               } else
-                       priv->mib[i] = NULL;
-       }
-
-       init_rwsem(&priv->mib_sem);
-       prism54_mib_init(priv);
-
-       return 0;
-}
-
-void
-mgt_clean(islpci_private *priv)
-{
-       int i;
-
-       if (!priv->mib)
-               return;
-       for (i = 0; i < OID_NUM_LAST; i++) {
-               kfree(priv->mib[i]);
-               priv->mib[i] = NULL;
-       }
-       kfree(priv->mib);
-       priv->mib = NULL;
-}
-
-void
-mgt_le_to_cpu(int type, void *data)
-{
-       switch (type) {
-       case OID_TYPE_U32:
-               *(u32 *) data = le32_to_cpu(*(u32 *) data);
-               break;
-       case OID_TYPE_BUFFER:{
-                       struct obj_buffer *buff = data;
-                       buff->size = le32_to_cpu(buff->size);
-                       buff->addr = le32_to_cpu(buff->addr);
-                       break;
-               }
-       case OID_TYPE_BSS:{
-                       struct obj_bss *bss = data;
-                       bss->age = le16_to_cpu(bss->age);
-                       bss->channel = le16_to_cpu(bss->channel);
-                       bss->capinfo = le16_to_cpu(bss->capinfo);
-                       bss->rates = le16_to_cpu(bss->rates);
-                       bss->basic_rates = le16_to_cpu(bss->basic_rates);
-                       break;
-               }
-       case OID_TYPE_BSSLIST:{
-                       struct obj_bsslist *list = data;
-                       int i;
-                       list->nr = le32_to_cpu(list->nr);
-                       for (i = 0; i < list->nr; i++)
-                               mgt_le_to_cpu(OID_TYPE_BSS, &list->bsslist[i]);
-                       break;
-               }
-       case OID_TYPE_FREQUENCIES:{
-                       struct obj_frequencies *freq = data;
-                       int i;
-                       freq->nr = le16_to_cpu(freq->nr);
-                       for (i = 0; i < freq->nr; i++)
-                               freq->mhz[i] = le16_to_cpu(freq->mhz[i]);
-                       break;
-               }
-       case OID_TYPE_MLME:{
-                       struct obj_mlme *mlme = data;
-                       mlme->id = le16_to_cpu(mlme->id);
-                       mlme->state = le16_to_cpu(mlme->state);
-                       mlme->code = le16_to_cpu(mlme->code);
-                       break;
-               }
-       case OID_TYPE_MLMEEX:{
-                       struct obj_mlmeex *mlme = data;
-                       mlme->id = le16_to_cpu(mlme->id);
-                       mlme->state = le16_to_cpu(mlme->state);
-                       mlme->code = le16_to_cpu(mlme->code);
-                       mlme->size = le16_to_cpu(mlme->size);
-                       break;
-               }
-       case OID_TYPE_ATTACH:{
-                       struct obj_attachment *attach = data;
-                       attach->id = le16_to_cpu(attach->id);
-                       attach->size = le16_to_cpu(attach->size);
-                       break;
-       }
-       case OID_TYPE_SSID:
-       case OID_TYPE_KEY:
-       case OID_TYPE_ADDR:
-       case OID_TYPE_RAW:
-               break;
-       default:
-               BUG();
-       }
-}
-
-static void
-mgt_cpu_to_le(int type, void *data)
-{
-       switch (type) {
-       case OID_TYPE_U32:
-               *(u32 *) data = cpu_to_le32(*(u32 *) data);
-               break;
-       case OID_TYPE_BUFFER:{
-                       struct obj_buffer *buff = data;
-                       buff->size = cpu_to_le32(buff->size);
-                       buff->addr = cpu_to_le32(buff->addr);
-                       break;
-               }
-       case OID_TYPE_BSS:{
-                       struct obj_bss *bss = data;
-                       bss->age = cpu_to_le16(bss->age);
-                       bss->channel = cpu_to_le16(bss->channel);
-                       bss->capinfo = cpu_to_le16(bss->capinfo);
-                       bss->rates = cpu_to_le16(bss->rates);
-                       bss->basic_rates = cpu_to_le16(bss->basic_rates);
-                       break;
-               }
-       case OID_TYPE_BSSLIST:{
-                       struct obj_bsslist *list = data;
-                       int i;
-                       list->nr = cpu_to_le32(list->nr);
-                       for (i = 0; i < list->nr; i++)
-                               mgt_cpu_to_le(OID_TYPE_BSS, &list->bsslist[i]);
-                       break;
-               }
-       case OID_TYPE_FREQUENCIES:{
-                       struct obj_frequencies *freq = data;
-                       int i;
-                       freq->nr = cpu_to_le16(freq->nr);
-                       for (i = 0; i < freq->nr; i++)
-                               freq->mhz[i] = cpu_to_le16(freq->mhz[i]);
-                       break;
-               }
-       case OID_TYPE_MLME:{
-                       struct obj_mlme *mlme = data;
-                       mlme->id = cpu_to_le16(mlme->id);
-                       mlme->state = cpu_to_le16(mlme->state);
-                       mlme->code = cpu_to_le16(mlme->code);
-                       break;
-               }
-       case OID_TYPE_MLMEEX:{
-                       struct obj_mlmeex *mlme = data;
-                       mlme->id = cpu_to_le16(mlme->id);
-                       mlme->state = cpu_to_le16(mlme->state);
-                       mlme->code = cpu_to_le16(mlme->code);
-                       mlme->size = cpu_to_le16(mlme->size);
-                       break;
-               }
-       case OID_TYPE_ATTACH:{
-                       struct obj_attachment *attach = data;
-                       attach->id = cpu_to_le16(attach->id);
-                       attach->size = cpu_to_le16(attach->size);
-                       break;
-       }
-       case OID_TYPE_SSID:
-       case OID_TYPE_KEY:
-       case OID_TYPE_ADDR:
-       case OID_TYPE_RAW:
-               break;
-       default:
-               BUG();
-       }
-}
-
-/* Note : data is modified during this function */
-
-int
-mgt_set_request(islpci_private *priv, enum oid_num_t n, int extra, void *data)
-{
-       int ret = 0;
-       struct islpci_mgmtframe *response = NULL;
-       int response_op = PIMFOR_OP_ERROR;
-       int dlen;
-       void *cache, *_data = data;
-       u32 oid;
-
-       BUG_ON(n >= OID_NUM_LAST);
-       BUG_ON(extra > isl_oid[n].range);
-
-       if (!priv->mib)
-               /* memory has been freed */
-               return -1;
-
-       dlen = isl_oid[n].size;
-       cache = priv->mib[n];
-       cache += (cache ? extra * dlen : 0);
-       oid = isl_oid[n].oid + extra;
-
-       if (_data == NULL)
-               /* we are requested to re-set a cached value */
-               _data = cache;
-       else
-               mgt_cpu_to_le(isl_oid[n].flags & OID_FLAG_TYPE, _data);
-       /* If we are going to write to the cache, we don't want anyone to read
-        * it -> acquire write lock.
-        * Else we could acquire a read lock to be sure we don't bother the
-        * commit process (which takes a write lock). But I'm not sure if it's
-        * needed.
-        */
-       if (cache)
-               down_write(&priv->mib_sem);
-
-       if (islpci_get_state(priv) >= PRV_STATE_READY) {
-               ret = islpci_mgt_transaction(priv->ndev, PIMFOR_OP_SET, oid,
-                                            _data, dlen, &response);
-               if (!ret) {
-                       response_op = response->header->operation;
-                       islpci_mgt_release(response);
-               }
-               if (ret || response_op == PIMFOR_OP_ERROR)
-                       ret = -EIO;
-       } else if (!cache)
-               ret = -EIO;
-
-       if (cache) {
-               if (!ret && data)
-                       memcpy(cache, _data, dlen);
-               up_write(&priv->mib_sem);
-       }
-
-       /* re-set given data to what it was */
-       if (data)
-               mgt_le_to_cpu(isl_oid[n].flags & OID_FLAG_TYPE, data);
-
-       return ret;
-}
-
-/* None of these are cached */
-int
-mgt_set_varlen(islpci_private *priv, enum oid_num_t n, void *data, int extra_len)
-{
-       int ret = 0;
-       struct islpci_mgmtframe *response;
-       int response_op = PIMFOR_OP_ERROR;
-       int dlen;
-       u32 oid;
-
-       BUG_ON(n >= OID_NUM_LAST);
-
-       dlen = isl_oid[n].size;
-       oid = isl_oid[n].oid;
-
-       mgt_cpu_to_le(isl_oid[n].flags & OID_FLAG_TYPE, data);
-
-       if (islpci_get_state(priv) >= PRV_STATE_READY) {
-               ret = islpci_mgt_transaction(priv->ndev, PIMFOR_OP_SET, oid,
-                                            data, dlen + extra_len, &response);
-               if (!ret) {
-                       response_op = response->header->operation;
-                       islpci_mgt_release(response);
-               }
-               if (ret || response_op == PIMFOR_OP_ERROR)
-                       ret = -EIO;
-       } else
-               ret = -EIO;
-
-       /* re-set given data to what it was */
-       if (data)
-               mgt_le_to_cpu(isl_oid[n].flags & OID_FLAG_TYPE, data);
-
-       return ret;
-}
-
-int
-mgt_get_request(islpci_private *priv, enum oid_num_t n, int extra, void *data,
-               union oid_res_t *res)
-{
-
-       int ret = -EIO;
-       int reslen = 0;
-       struct islpci_mgmtframe *response = NULL;
-
-       int dlen;
-       void *cache, *_res = NULL;
-       u32 oid;
-
-       BUG_ON(n >= OID_NUM_LAST);
-       BUG_ON(extra > isl_oid[n].range);
-
-       res->ptr = NULL;
-
-       if (!priv->mib)
-               /* memory has been freed */
-               return -1;
-
-       dlen = isl_oid[n].size;
-       cache = priv->mib[n];
-       cache += cache ? extra * dlen : 0;
-       oid = isl_oid[n].oid + extra;
-       reslen = dlen;
-
-       if (cache)
-               down_read(&priv->mib_sem);
-
-       if (islpci_get_state(priv) >= PRV_STATE_READY) {
-               ret = islpci_mgt_transaction(priv->ndev, PIMFOR_OP_GET,
-                                            oid, data, dlen, &response);
-               if (ret || !response ||
-                   response->header->operation == PIMFOR_OP_ERROR) {
-                       if (response)
-                               islpci_mgt_release(response);
-                       ret = -EIO;
-               }
-               if (!ret) {
-                       _res = response->data;
-                       reslen = response->header->length;
-               }
-       } else if (cache) {
-               _res = cache;
-               ret = 0;
-       }
-       if ((isl_oid[n].flags & OID_FLAG_TYPE) == OID_TYPE_U32)
-               res->u = ret ? 0 : le32_to_cpu(*(u32 *) _res);
-       else {
-               res->ptr = kmalloc(reslen, GFP_KERNEL);
-               BUG_ON(res->ptr == NULL);
-               if (ret)
-                       memset(res->ptr, 0, reslen);
-               else {
-                       memcpy(res->ptr, _res, reslen);
-                       mgt_le_to_cpu(isl_oid[n].flags & OID_FLAG_TYPE,
-                                     res->ptr);
-               }
-       }
-       if (cache)
-               up_read(&priv->mib_sem);
-
-       if (response && !ret)
-               islpci_mgt_release(response);
-
-       if (reslen > isl_oid[n].size)
-               printk(KERN_DEBUG
-                      "mgt_get_request(0x%x): received data length was bigger "
-                      "than expected (%d > %d). Memory is probably corrupted...",
-                      oid, reslen, isl_oid[n].size);
-
-       return ret;
-}
-
-/* lock outside */
-int
-mgt_commit_list(islpci_private *priv, enum oid_num_t *l, int n)
-{
-       int i, ret = 0;
-       struct islpci_mgmtframe *response;
-
-       for (i = 0; i < n; i++) {
-               struct oid_t *t = &(isl_oid[l[i]]);
-               void *data = priv->mib[l[i]];
-               int j = 0;
-               u32 oid = t->oid;
-               BUG_ON(data == NULL);
-               while (j <= t->range) {
-                       int r = islpci_mgt_transaction(priv->ndev, PIMFOR_OP_SET,
-                                                     oid, data, t->size,
-                                                     &response);
-                       if (response) {
-                               r |= (response->header->operation == PIMFOR_OP_ERROR);
-                               islpci_mgt_release(response);
-                       }
-                       if (r)
-                               printk(KERN_ERR "%s: mgt_commit_list: failure. "
-                                       "oid=%08x err=%d\n",
-                                       priv->ndev->name, oid, r);
-                       ret |= r;
-                       j++;
-                       oid++;
-                       data += t->size;
-               }
-       }
-       return ret;
-}
-
-/* Lock outside */
-
-void
-mgt_set(islpci_private *priv, enum oid_num_t n, void *data)
-{
-       BUG_ON(n >= OID_NUM_LAST);
-       BUG_ON(priv->mib[n] == NULL);
-
-       memcpy(priv->mib[n], data, isl_oid[n].size);
-       mgt_cpu_to_le(isl_oid[n].flags & OID_FLAG_TYPE, priv->mib[n]);
-}
-
-void
-mgt_get(islpci_private *priv, enum oid_num_t n, void *res)
-{
-       BUG_ON(n >= OID_NUM_LAST);
-       BUG_ON(priv->mib[n] == NULL);
-       BUG_ON(res == NULL);
-
-       memcpy(res, priv->mib[n], isl_oid[n].size);
-       mgt_le_to_cpu(isl_oid[n].flags & OID_FLAG_TYPE, res);
-}
-
-/* Commits the cache. Lock outside. */
-
-static enum oid_num_t commit_part1[] = {
-       OID_INL_CONFIG,
-       OID_INL_MODE,
-       DOT11_OID_BSSTYPE,
-       DOT11_OID_CHANNEL,
-       DOT11_OID_MLMEAUTOLEVEL
-};
-
-static enum oid_num_t commit_part2[] = {
-       DOT11_OID_SSID,
-       DOT11_OID_PSMBUFFER,
-       DOT11_OID_AUTHENABLE,
-       DOT11_OID_PRIVACYINVOKED,
-       DOT11_OID_EXUNENCRYPTED,
-       DOT11_OID_DEFKEYX,      /* MULTIPLE */
-       DOT11_OID_DEFKEYID,
-       DOT11_OID_DOT1XENABLE,
-       OID_INL_DOT11D_CONFORMANCE,
-       /* Do not initialize this - fw < 1.0.4.3 rejects it
-       OID_INL_OUTPUTPOWER,
-       */
-};
-
-/* update the MAC addr. */
-static int
-mgt_update_addr(islpci_private *priv)
-{
-       struct islpci_mgmtframe *res;
-       int ret;
-
-       ret = islpci_mgt_transaction(priv->ndev, PIMFOR_OP_GET,
-                                    isl_oid[GEN_OID_MACADDRESS].oid, NULL,
-                                    isl_oid[GEN_OID_MACADDRESS].size, &res);
-
-       if ((ret == 0) && res && (res->header->operation != PIMFOR_OP_ERROR))
-               memcpy(priv->ndev->dev_addr, res->data, ETH_ALEN);
-       else
-               ret = -EIO;
-       if (res)
-               islpci_mgt_release(res);
-
-       if (ret)
-               printk(KERN_ERR "%s: mgt_update_addr: failure\n", priv->ndev->name);
-       return ret;
-}
-
-int
-mgt_commit(islpci_private *priv)
-{
-       int rvalue;
-       enum oid_num_t u;
-
-       if (islpci_get_state(priv) < PRV_STATE_INIT)
-               return 0;
-
-       rvalue = mgt_commit_list(priv, commit_part1, ARRAY_SIZE(commit_part1));
-
-       if (priv->iw_mode != IW_MODE_MONITOR)
-               rvalue |= mgt_commit_list(priv, commit_part2, ARRAY_SIZE(commit_part2));
-
-       u = OID_INL_MODE;
-       rvalue |= mgt_commit_list(priv, &u, 1);
-       rvalue |= mgt_update_addr(priv);
-
-       if (rvalue) {
-               /* some request have failed. The device might be in an
-                  incoherent state. We should reset it ! */
-               printk(KERN_DEBUG "%s: mgt_commit: failure\n", priv->ndev->name);
-       }
-       return rvalue;
-}
-
-/* The following OIDs need to be "unlatched":
- *
- * MEDIUMLIMIT,BEACONPERIOD,DTIMPERIOD,ATIMWINDOW,LISTENINTERVAL
- * FREQUENCY,EXTENDEDRATES.
- *
- * The way to do this is to set ESSID. Note though that they may get
- * unlatch before though by setting another OID. */
-#if 0
-void
-mgt_unlatch_all(islpci_private *priv)
-{
-       u32 u;
-       int rvalue = 0;
-
-       if (islpci_get_state(priv) < PRV_STATE_INIT)
-               return;
-
-       u = DOT11_OID_SSID;
-       rvalue = mgt_commit_list(priv, &u, 1);
-       /* Necessary if in MANUAL RUN mode? */
-#if 0
-       u = OID_INL_MODE;
-       rvalue |= mgt_commit_list(priv, &u, 1);
-
-       u = DOT11_OID_MLMEAUTOLEVEL;
-       rvalue |= mgt_commit_list(priv, &u, 1);
-
-       u = OID_INL_MODE;
-       rvalue |= mgt_commit_list(priv, &u, 1);
-#endif
-
-       if (rvalue)
-               printk(KERN_DEBUG "%s: Unlatching OIDs failed\n", priv->ndev->name);
-}
-#endif
-
-/* This will tell you if you are allowed to answer a mlme(ex) request .*/
-
-int
-mgt_mlme_answer(islpci_private *priv)
-{
-       u32 mlmeautolevel;
-       /* Acquire a read lock because if we are in a mode change, it's
-        * possible to answer true, while the card is leaving master to managed
-        * mode. Answering to a mlme in this situation could hang the card.
-        */
-       down_read(&priv->mib_sem);
-       mlmeautolevel =
-           le32_to_cpu(*(u32 *) priv->mib[DOT11_OID_MLMEAUTOLEVEL]);
-       up_read(&priv->mib_sem);
-
-       return ((priv->iw_mode == IW_MODE_MASTER) &&
-               (mlmeautolevel >= DOT11_MLME_INTERMEDIATE));
-}
-
-enum oid_num_t
-mgt_oidtonum(u32 oid)
-{
-       int i;
-
-       for (i = 0; i < OID_NUM_LAST; i++)
-               if (isl_oid[i].oid == oid)
-                       return i;
-
-       printk(KERN_DEBUG "looking for an unknown oid 0x%x", oid);
-
-       return OID_NUM_LAST;
-}
-
-int
-mgt_response_to_str(enum oid_num_t n, union oid_res_t *r, char *str)
-{
-       switch (isl_oid[n].flags & OID_FLAG_TYPE) {
-       case OID_TYPE_U32:
-               return scnprintf(str, PRIV_STR_SIZE, "%u\n", r->u);
-       case OID_TYPE_BUFFER:{
-                       struct obj_buffer *buff = r->ptr;
-                       return scnprintf(str, PRIV_STR_SIZE,
-                                       "size=%u\naddr=0x%X\n", buff->size,
-                                       buff->addr);
-               }
-               break;
-       case OID_TYPE_BSS:{
-                       struct obj_bss *bss = r->ptr;
-                       return scnprintf(str, PRIV_STR_SIZE,
-                                       "age=%u\nchannel=%u\n"
-                                       "capinfo=0x%X\nrates=0x%X\n"
-                                       "basic_rates=0x%X\n", bss->age,
-                                       bss->channel, bss->capinfo,
-                                       bss->rates, bss->basic_rates);
-               }
-               break;
-       case OID_TYPE_BSSLIST:{
-                       struct obj_bsslist *list = r->ptr;
-                       int i, k;
-                       k = scnprintf(str, PRIV_STR_SIZE, "nr=%u\n", list->nr);
-                       for (i = 0; i < list->nr; i++)
-                               k += scnprintf(str + k, PRIV_STR_SIZE - k,
-                                             "bss[%u] :\nage=%u\nchannel=%u\n"
-                                             "capinfo=0x%X\nrates=0x%X\n"
-                                             "basic_rates=0x%X\n",
-                                             i, list->bsslist[i].age,
-                                             list->bsslist[i].channel,
-                                             list->bsslist[i].capinfo,
-                                             list->bsslist[i].rates,
-                                             list->bsslist[i].basic_rates);
-                       return k;
-               }
-               break;
-       case OID_TYPE_FREQUENCIES:{
-                       struct obj_frequencies *freq = r->ptr;
-                       int i, t;
-                       printk("nr : %u\n", freq->nr);
-                       t = scnprintf(str, PRIV_STR_SIZE, "nr=%u\n", freq->nr);
-                       for (i = 0; i < freq->nr; i++)
-                               t += scnprintf(str + t, PRIV_STR_SIZE - t,
-                                             "mhz[%u]=%u\n", i, freq->mhz[i]);
-                       return t;
-               }
-               break;
-       case OID_TYPE_MLME:{
-                       struct obj_mlme *mlme = r->ptr;
-                       return scnprintf(str, PRIV_STR_SIZE,
-                                       "id=0x%X\nstate=0x%X\ncode=0x%X\n",
-                                       mlme->id, mlme->state, mlme->code);
-               }
-               break;
-       case OID_TYPE_MLMEEX:{
-                       struct obj_mlmeex *mlme = r->ptr;
-                       return scnprintf(str, PRIV_STR_SIZE,
-                                       "id=0x%X\nstate=0x%X\n"
-                                       "code=0x%X\nsize=0x%X\n", mlme->id,
-                                       mlme->state, mlme->code, mlme->size);
-               }
-               break;
-       case OID_TYPE_ATTACH:{
-                       struct obj_attachment *attach = r->ptr;
-                       return scnprintf(str, PRIV_STR_SIZE,
-                                       "id=%d\nsize=%d\n",
-                                       attach->id,
-                                       attach->size);
-               }
-               break;
-       case OID_TYPE_SSID:{
-                       struct obj_ssid *ssid = r->ptr;
-                       return scnprintf(str, PRIV_STR_SIZE,
-                                       "length=%u\noctets=%.*s\n",
-                                       ssid->length, ssid->length,
-                                       ssid->octets);
-               }
-               break;
-       case OID_TYPE_KEY:{
-                       struct obj_key *key = r->ptr;
-                       int t, i;
-                       t = scnprintf(str, PRIV_STR_SIZE,
-                                    "type=0x%X\nlength=0x%X\nkey=0x",
-                                    key->type, key->length);
-                       for (i = 0; i < key->length; i++)
-                               t += scnprintf(str + t, PRIV_STR_SIZE - t,
-                                             "%02X:", key->key[i]);
-                       t += scnprintf(str + t, PRIV_STR_SIZE - t, "\n");
-                       return t;
-               }
-               break;
-       case OID_TYPE_RAW:
-       case OID_TYPE_ADDR:{
-                       unsigned char *buff = r->ptr;
-                       int t, i;
-                       t = scnprintf(str, PRIV_STR_SIZE, "hex data=");
-                       for (i = 0; i < isl_oid[n].size; i++)
-                               t += scnprintf(str + t, PRIV_STR_SIZE - t,
-                                             "%02X:", buff[i]);
-                       t += scnprintf(str + t, PRIV_STR_SIZE - t, "\n");
-                       return t;
-               }
-               break;
-       default:
-               BUG();
-       }
-       return 0;
-}
diff --git a/drivers/net/wireless/intersil/prism54/oid_mgt.h b/drivers/net/wireless/intersil/prism54/oid_mgt.h
deleted file mode 100644 (file)
index a7dc9e2..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2003 Aurelien Alleaume <slts@free.fr>
- */
-
-#if !defined(_OID_MGT_H)
-#define _OID_MGT_H
-
-#include "isl_oid.h"
-#include "islpci_dev.h"
-
-extern struct oid_t isl_oid[];
-
-int mgt_init(islpci_private *);
-
-void mgt_clean(islpci_private *);
-
-/* I don't know where to put these 2 */
-extern const int frequency_list_a[];
-int channel_of_freq(int);
-
-void mgt_le_to_cpu(int, void *);
-
-int mgt_set_request(islpci_private *, enum oid_num_t, int, void *);
-int mgt_set_varlen(islpci_private *, enum oid_num_t, void *, int);
-
-
-int mgt_get_request(islpci_private *, enum oid_num_t, int, void *,
-                   union oid_res_t *);
-
-int mgt_commit_list(islpci_private *, enum oid_num_t *, int);
-
-void mgt_set(islpci_private *, enum oid_num_t, void *);
-
-void mgt_get(islpci_private *, enum oid_num_t, void *);
-
-int mgt_commit(islpci_private *);
-
-int mgt_mlme_answer(islpci_private *);
-
-enum oid_num_t mgt_oidtonum(u32 oid);
-
-int mgt_response_to_str(enum oid_num_t, union oid_res_t *, char *);
-
-#endif                         /* !defined(_OID_MGT_H) */
-/* EOF */
diff --git a/drivers/net/wireless/intersil/prism54/prismcompat.h b/drivers/net/wireless/intersil/prism54/prismcompat.h
deleted file mode 100644 (file)
index c4489b6..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  (C) 2004 Margit Schubert-While <margitsw@t-online.de>
- */
-
-/*
- *     Compatibility header file to aid support of different kernel versions
- */
-
-#ifdef PRISM54_COMPAT24
-#include "prismcompat24.h"
-#else  /* PRISM54_COMPAT24 */
-
-#ifndef _PRISM_COMPAT_H
-#define _PRISM_COMPAT_H
-
-#include <linux/device.h>
-#include <linux/firmware.h>
-#include <linux/moduleparam.h>
-#include <linux/workqueue.h>
-#include <linux/compiler.h>
-
-#ifndef __iomem
-#define __iomem
-#endif
-
-#define PRISM_FW_PDEV          &priv->pdev->dev
-
-#endif                         /* _PRISM_COMPAT_H */
-#endif                         /* PRISM54_COMPAT24 */
index 1bb8746..d8e4f29 100644 (file)
@@ -43,10 +43,8 @@ static int lbs_ethtool_get_eeprom(struct net_device *dev,
        int ret;
 
        if (eeprom->offset + eeprom->len > LBS_EEPROM_LEN ||
-           eeprom->len > LBS_EEPROM_READ_LEN) {
-               ret = -EINVAL;
-               goto out;
-       }
+           eeprom->len > LBS_EEPROM_READ_LEN)
+               return -EINVAL;
 
        cmd.hdr.size = cpu_to_le16(sizeof(struct cmd_ds_802_11_eeprom_access) -
                LBS_EEPROM_READ_LEN + eeprom->len);
@@ -57,8 +55,7 @@ static int lbs_ethtool_get_eeprom(struct net_device *dev,
        if (!ret)
                memcpy(bytes, cmd.value, eeprom->len);
 
-out:
-        return ret;
+       return ret;
 }
 
 static void lbs_ethtool_get_wol(struct net_device *dev,
index 162d557..2bd00f4 100644 (file)
@@ -49,6 +49,7 @@ mwifiex_sdio-y += sdio.o
 obj-$(CONFIG_MWIFIEX_SDIO) += mwifiex_sdio.o
 
 mwifiex_pcie-y += pcie.o
+mwifiex_pcie-y += pcie_quirks.o
 obj-$(CONFIG_MWIFIEX_PCIE) += mwifiex_pcie.o
 
 mwifiex_usb-y += usb.o
index 3a11342..171a257 100644 (file)
@@ -187,7 +187,7 @@ static int mwifiex_dnld_cmd_to_fw(struct mwifiex_private *priv,
        host_cmd = (struct host_cmd_ds_command *) (cmd_node->cmd_skb->data);
 
        /* Sanity test */
-       if (host_cmd == NULL || host_cmd->size == 0) {
+       if (host_cmd->size == 0) {
                mwifiex_dbg(adapter, ERROR,
                            "DNLD_CMD: host_cmd is null\t"
                            "or cmd size is 0, not sending\n");
index 4651751..c6ccce4 100644 (file)
@@ -27,6 +27,7 @@
 #include "wmm.h"
 #include "11n.h"
 #include "pcie.h"
+#include "pcie_quirks.h"
 
 #define PCIE_VERSION   "1.0"
 #define DRV_NAME        "Marvell mwifiex PCIe"
@@ -410,6 +411,9 @@ static int mwifiex_pcie_probe(struct pci_dev *pdev,
                        return ret;
        }
 
+       /* check quirks */
+       mwifiex_initialize_quirks(card);
+
        if (mwifiex_add_card(card, &card->fw_done, &pcie_ops,
                             MWIFIEX_PCIE, &pdev->dev)) {
                pr_err("%s failed\n", __func__);
@@ -524,6 +528,13 @@ static void mwifiex_pcie_reset_prepare(struct pci_dev *pdev)
        mwifiex_shutdown_sw(adapter);
        clear_bit(MWIFIEX_IFACE_WORK_DEVICE_DUMP, &card->work_flags);
        clear_bit(MWIFIEX_IFACE_WORK_CARD_RESET, &card->work_flags);
+
+       /* On MS Surface gen4+ devices FLR isn't effective to recover from
+        * hangups, so we power-cycle the card instead.
+        */
+       if (card->quirks & QUIRK_FW_RST_D3COLD)
+               mwifiex_pcie_reset_d3cold_quirk(pdev);
+
        mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__);
 
        card->pci_reset_ongoing = true;
index 5ed613d..981e330 100644 (file)
@@ -244,6 +244,7 @@ struct pcie_service_card {
        unsigned long work_flags;
 
        bool pci_reset_ongoing;
+       unsigned long quirks;
 };
 
 static inline int
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.c
new file mode 100644 (file)
index 0000000..0234cf3
--- /dev/null
@@ -0,0 +1,161 @@
+/*
+ * NXP Wireless LAN device driver: PCIE and platform specific quirks
+ *
+ * This software file (the "File") is distributed by NXP
+ * under the terms of the GNU General Public License Version 2, June 1991
+ * (the "License").  You may use, redistribute and/or modify this File in
+ * accordance with the terms and conditions of the License, a copy of which
+ * is available by writing to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA or on the
+ * worldwide web at http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
+ *
+ * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
+ * this warranty disclaimer.
+ */
+
+#include <linux/dmi.h>
+
+#include "pcie_quirks.h"
+
+/* quirk table based on DMI matching */
+static const struct dmi_system_id mwifiex_quirk_table[] = {
+       {
+               .ident = "Surface Pro 4",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 4"),
+               },
+               .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+       },
+       {
+               .ident = "Surface Pro 5",
+               .matches = {
+                       /* match for SKU here due to generic product name "Surface Pro" */
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1796"),
+               },
+               .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+       },
+       {
+               .ident = "Surface Pro 5 (LTE)",
+               .matches = {
+                       /* match for SKU here due to generic product name "Surface Pro" */
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "Surface_Pro_1807"),
+               },
+               .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+       },
+       {
+               .ident = "Surface Pro 6",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Pro 6"),
+               },
+               .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+       },
+       {
+               .ident = "Surface Book 1",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book"),
+               },
+               .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+       },
+       {
+               .ident = "Surface Book 2",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Book 2"),
+               },
+               .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+       },
+       {
+               .ident = "Surface Laptop 1",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop"),
+               },
+               .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+       },
+       {
+               .ident = "Surface Laptop 2",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Surface Laptop 2"),
+               },
+               .driver_data = (void *)QUIRK_FW_RST_D3COLD,
+       },
+       {}
+};
+
+void mwifiex_initialize_quirks(struct pcie_service_card *card)
+{
+       struct pci_dev *pdev = card->dev;
+       const struct dmi_system_id *dmi_id;
+
+       dmi_id = dmi_first_match(mwifiex_quirk_table);
+       if (dmi_id)
+               card->quirks = (uintptr_t)dmi_id->driver_data;
+
+       if (!card->quirks)
+               dev_info(&pdev->dev, "no quirks enabled\n");
+       if (card->quirks & QUIRK_FW_RST_D3COLD)
+               dev_info(&pdev->dev, "quirk reset_d3cold enabled\n");
+}
+
+static void mwifiex_pcie_set_power_d3cold(struct pci_dev *pdev)
+{
+       dev_info(&pdev->dev, "putting into D3cold...\n");
+
+       pci_save_state(pdev);
+       if (pci_is_enabled(pdev))
+               pci_disable_device(pdev);
+       pci_set_power_state(pdev, PCI_D3cold);
+}
+
+static int mwifiex_pcie_set_power_d0(struct pci_dev *pdev)
+{
+       int ret;
+
+       dev_info(&pdev->dev, "putting into D0...\n");
+
+       pci_set_power_state(pdev, PCI_D0);
+       ret = pci_enable_device(pdev);
+       if (ret) {
+               dev_err(&pdev->dev, "pci_enable_device failed\n");
+               return ret;
+       }
+       pci_restore_state(pdev);
+
+       return 0;
+}
+
+int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev)
+{
+       struct pci_dev *parent_pdev = pci_upstream_bridge(pdev);
+       int ret;
+
+       /* Power-cycle (put into D3cold then D0) */
+       dev_info(&pdev->dev, "Using reset_d3cold quirk to perform FW reset\n");
+
+       /* We need to perform power-cycle also for bridge of wifi because
+        * on some devices (e.g. Surface Book 1), the OS for some reasons
+        * can't know the real power state of the bridge.
+        * When tried to power-cycle only wifi, the reset failed with the
+        * following dmesg log:
+        * "Cannot transition to power state D0 for parent in D3hot".
+        */
+       mwifiex_pcie_set_power_d3cold(pdev);
+       mwifiex_pcie_set_power_d3cold(parent_pdev);
+
+       ret = mwifiex_pcie_set_power_d0(parent_pdev);
+       if (ret)
+               return ret;
+       ret = mwifiex_pcie_set_power_d0(pdev);
+       if (ret)
+               return ret;
+
+       return 0;
+}
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h b/drivers/net/wireless/marvell/mwifiex/pcie_quirks.h
new file mode 100644 (file)
index 0000000..8ec4176
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * NXP Wireless LAN device driver: PCIE and platform specific quirks
+ *
+ * This software file (the "File") is distributed by NXP
+ * under the terms of the GNU General Public License Version 2, June 1991
+ * (the "License").  You may use, redistribute and/or modify this File in
+ * accordance with the terms and conditions of the License, a copy of which
+ * is available by writing to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA or on the
+ * worldwide web at http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
+ *
+ * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
+ * this warranty disclaimer.
+ */
+
+#include "pcie.h"
+
+#define QUIRK_FW_RST_D3COLD    BIT(0)
+
+void mwifiex_initialize_quirks(struct pcie_service_card *card);
+int mwifiex_pcie_reset_d3cold_quirk(struct pci_dev *pdev);
index 653f9e0..4062e51 100644 (file)
@@ -1325,8 +1325,8 @@ mwifiex_set_gen_ie_helper(struct mwifiex_private *priv, u8 *ie_data_ptr,
                          u16 ie_len)
 {
        struct ieee_types_vendor_header *pvendor_ie;
-       const u8 wpa_oui[] = { 0x00, 0x50, 0xf2, 0x01 };
-       const u8 wps_oui[] = { 0x00, 0x50, 0xf2, 0x04 };
+       static const u8 wpa_oui[] = { 0x00, 0x50, 0xf2, 0x01 };
+       static const u8 wps_oui[] = { 0x00, 0x50, 0xf2, 0x04 };
        u16 unparsed_len = ie_len, cur_ie_len;
 
        /* If the passed length is zero, reset the buffer */
index d822ec1..61a96b7 100644 (file)
@@ -134,7 +134,7 @@ struct fw_sync_header {
 struct fw_data {
        struct fw_header fw_hdr;
        __le32 seq_num;
-       u8 data[1];
+       u8 data[];
 } __packed;
 
 #endif /*_MWIFIEX_USB_H */
index e14b9fc..42e03a7 100644 (file)
@@ -129,10 +129,8 @@ static int wilc_sdio_probe(struct sdio_func *func,
 
        ret = wilc_cfg80211_init(&wilc, &func->dev, WILC_HIF_SDIO,
                                 &wilc_hif_sdio);
-       if (ret) {
-               kfree(sdio_priv);
-               return ret;
-       }
+       if (ret)
+               goto free;
 
        if (IS_ENABLED(CONFIG_WILC1000_HW_OOB_INTR)) {
                struct device_node *np = func->card->dev.of_node;
@@ -148,24 +146,29 @@ static int wilc_sdio_probe(struct sdio_func *func,
        wilc->bus_data = sdio_priv;
        wilc->dev = &func->dev;
 
-       wilc->rtc_clk = devm_clk_get(&func->card->dev, "rtc");
-       if (PTR_ERR_OR_ZERO(wilc->rtc_clk) == -EPROBE_DEFER) {
-               kfree(sdio_priv);
-               return -EPROBE_DEFER;
-       } else if (!IS_ERR(wilc->rtc_clk))
-               clk_prepare_enable(wilc->rtc_clk);
+       wilc->rtc_clk = devm_clk_get_optional(&func->card->dev, "rtc");
+       if (IS_ERR(wilc->rtc_clk)) {
+               ret = PTR_ERR(wilc->rtc_clk);
+               goto dispose_irq;
+       }
+       clk_prepare_enable(wilc->rtc_clk);
 
        dev_info(&func->dev, "Driver Initializing success\n");
        return 0;
+
+dispose_irq:
+       irq_dispose_mapping(wilc->dev_irq_num);
+       wilc_netdev_cleanup(wilc);
+free:
+       kfree(sdio_priv);
+       return ret;
 }
 
 static void wilc_sdio_remove(struct sdio_func *func)
 {
        struct wilc *wilc = sdio_get_drvdata(func);
 
-       if (!IS_ERR(wilc->rtc_clk))
-               clk_disable_unprepare(wilc->rtc_clk);
-
+       clk_disable_unprepare(wilc->rtc_clk);
        wilc_netdev_cleanup(wilc);
 }
 
index 8e9aaf0..dd481dc 100644 (file)
@@ -39,6 +39,7 @@ MODULE_PARM_DESC(enable_crc16,
 #define WILC_SPI_RSP_HDR_EXTRA_DATA    8
 
 struct wilc_spi {
+       bool isinit;            /* true if SPI protocol has been configured */
        bool probing_crc;       /* true if we're probing chip's CRC config */
        bool crc7_enabled;      /* true if crc7 is currently enabled */
        bool crc16_enabled;     /* true if crc16 is currently enabled */
@@ -154,34 +155,37 @@ static int wilc_bus_probe(struct spi_device *spi)
                return -ENOMEM;
 
        ret = wilc_cfg80211_init(&wilc, &spi->dev, WILC_HIF_SPI, &wilc_hif_spi);
-       if (ret) {
-               kfree(spi_priv);
-               return ret;
-       }
+       if (ret)
+               goto free;
 
        spi_set_drvdata(spi, wilc);
        wilc->dev = &spi->dev;
        wilc->bus_data = spi_priv;
        wilc->dev_irq_num = spi->irq;
 
-       wilc->rtc_clk = devm_clk_get(&spi->dev, "rtc");
-       if (PTR_ERR_OR_ZERO(wilc->rtc_clk) == -EPROBE_DEFER) {
-               kfree(spi_priv);
-               return -EPROBE_DEFER;
-       } else if (!IS_ERR(wilc->rtc_clk))
-               clk_prepare_enable(wilc->rtc_clk);
+       wilc->rtc_clk = devm_clk_get_optional(&spi->dev, "rtc");
+       if (IS_ERR(wilc->rtc_clk)) {
+               ret = PTR_ERR(wilc->rtc_clk);
+               goto netdev_cleanup;
+       }
+       clk_prepare_enable(wilc->rtc_clk);
 
        return 0;
+
+netdev_cleanup:
+       wilc_netdev_cleanup(wilc);
+free:
+       kfree(spi_priv);
+       return ret;
 }
 
 static int wilc_bus_remove(struct spi_device *spi)
 {
        struct wilc *wilc = spi_get_drvdata(spi);
 
-       if (!IS_ERR(wilc->rtc_clk))
-               clk_disable_unprepare(wilc->rtc_clk);
-
+       clk_disable_unprepare(wilc->rtc_clk);
        wilc_netdev_cleanup(wilc);
+
        return 0;
 }
 
@@ -905,15 +909,15 @@ static int wilc_spi_init(struct wilc *wilc, bool resume)
        struct wilc_spi *spi_priv = wilc->bus_data;
        u32 reg;
        u32 chipid;
-       static int isinit;
        int ret, i;
 
-       if (isinit) {
+       if (spi_priv->isinit) {
+               /* Confirm we can read chipid register without error: */
                ret = wilc_spi_read_reg(wilc, WILC_CHIPID, &chipid);
-               if (ret)
-                       dev_err(&spi->dev, "Fail cmd read chip id...\n");
+               if (ret == 0)
+                       return 0;
 
-               return ret;
+               dev_err(&spi->dev, "Fail cmd read chip id...\n");
        }
 
        /*
@@ -971,7 +975,7 @@ static int wilc_spi_init(struct wilc *wilc, bool resume)
        spi_priv->probing_crc = false;
 
        /*
-        * make sure can read back chip id correctly
+        * make sure can read chip id without protocol error
         */
        ret = wilc_spi_read_reg(wilc, WILC_CHIPID, &chipid);
        if (ret) {
@@ -979,7 +983,7 @@ static int wilc_spi_init(struct wilc *wilc, bool resume)
                return ret;
        }
 
-       isinit = 1;
+       spi_priv->isinit = true;
 
        return 0;
 }
index 2030fc7..200a103 100644 (file)
@@ -1127,27 +1127,22 @@ int wilc_wlan_start(struct wilc *wilc)
        }
        acquire_bus(wilc, WILC_BUS_ACQUIRE_ONLY);
        ret = wilc->hif_func->hif_write_reg(wilc, WILC_VMM_CORE_CFG, reg);
-       if (ret) {
-               release_bus(wilc, WILC_BUS_RELEASE_ONLY);
-               return ret;
-       }
+       if (ret)
+               goto release;
+
        reg = 0;
        if (wilc->io_type == WILC_HIF_SDIO && wilc->dev_irq_num)
                reg |= WILC_HAVE_SDIO_IRQ_GPIO;
 
        ret = wilc->hif_func->hif_write_reg(wilc, WILC_GP_REG_1, reg);
-       if (ret) {
-               release_bus(wilc, WILC_BUS_RELEASE_ONLY);
-               return ret;
-       }
+       if (ret)
+               goto release;
 
        wilc->hif_func->hif_sync_ext(wilc, NUM_INT_EXT);
 
        ret = wilc->hif_func->hif_read_reg(wilc, WILC_CHIPID, &chipid);
-       if (ret) {
-               release_bus(wilc, WILC_BUS_RELEASE_ONLY);
-               return ret;
-       }
+       if (ret)
+               goto release;
 
        wilc->hif_func->hif_read_reg(wilc, WILC_GLB_RESET_0, &reg);
        if ((reg & BIT(10)) == BIT(10)) {
@@ -1159,8 +1154,9 @@ int wilc_wlan_start(struct wilc *wilc)
        reg |= BIT(10);
        ret = wilc->hif_func->hif_write_reg(wilc, WILC_GLB_RESET_0, reg);
        wilc->hif_func->hif_read_reg(wilc, WILC_GLB_RESET_0, &reg);
-       release_bus(wilc, WILC_BUS_RELEASE_ONLY);
 
+release:
+       release_bus(wilc, WILC_BUS_RELEASE_ONLY);
        return ret;
 }
 
@@ -1174,36 +1170,34 @@ int wilc_wlan_stop(struct wilc *wilc, struct wilc_vif *vif)
        ret = wilc->hif_func->hif_read_reg(wilc, WILC_GP_REG_0, &reg);
        if (ret) {
                netdev_err(vif->ndev, "Error while reading reg\n");
-               release_bus(wilc, WILC_BUS_RELEASE_ALLOW_SLEEP);
-               return ret;
+               goto release;
        }
 
        ret = wilc->hif_func->hif_write_reg(wilc, WILC_GP_REG_0,
                                        (reg | WILC_ABORT_REQ_BIT));
        if (ret) {
                netdev_err(vif->ndev, "Error while writing reg\n");
-               release_bus(wilc, WILC_BUS_RELEASE_ALLOW_SLEEP);
-               return ret;
+               goto release;
        }
 
        ret = wilc->hif_func->hif_read_reg(wilc, WILC_FW_HOST_COMM, &reg);
        if (ret) {
                netdev_err(vif->ndev, "Error while reading reg\n");
-               release_bus(wilc, WILC_BUS_RELEASE_ALLOW_SLEEP);
-               return ret;
+               goto release;
        }
        reg = BIT(0);
 
        ret = wilc->hif_func->hif_write_reg(wilc, WILC_FW_HOST_COMM, reg);
        if (ret) {
                netdev_err(vif->ndev, "Error while writing reg\n");
-               release_bus(wilc, WILC_BUS_RELEASE_ALLOW_SLEEP);
-               return ret;
+               goto release;
        }
 
+       ret = 0;
+release:
        release_bus(wilc, WILC_BUS_RELEASE_ALLOW_SLEEP);
 
-       return 0;
+       return ret;
 }
 
 void wilc_wlan_cleanup(struct net_device *dev)
index 0003df5..840728e 100644 (file)
@@ -295,9 +295,9 @@ static int pearl_skb2rbd_attach(struct qtnf_pcie_pearl_state *ps, u16 index)
        priv->rx_skb[index] = skb;
        rxbd = &ps->rx_bd_vbase[index];
 
-       paddr = pci_map_single(priv->pdev, skb->data,
-                              SKB_BUF_SIZE, PCI_DMA_FROMDEVICE);
-       if (pci_dma_mapping_error(priv->pdev, paddr)) {
+       paddr = dma_map_single(&priv->pdev->dev, skb->data, SKB_BUF_SIZE,
+                              DMA_FROM_DEVICE);
+       if (dma_mapping_error(&priv->pdev->dev, paddr)) {
                pr_err("skb DMA mapping error: %pad\n", &paddr);
                return -ENOMEM;
        }
@@ -357,8 +357,8 @@ static void qtnf_pearl_free_xfer_buffers(struct qtnf_pcie_pearl_state *ps)
                        skb = priv->rx_skb[i];
                        paddr = QTN_HOST_ADDR(le32_to_cpu(rxbd->addr_h),
                                              le32_to_cpu(rxbd->addr));
-                       pci_unmap_single(priv->pdev, paddr, SKB_BUF_SIZE,
-                                        PCI_DMA_FROMDEVICE);
+                       dma_unmap_single(&priv->pdev->dev, paddr,
+                                        SKB_BUF_SIZE, DMA_FROM_DEVICE);
                        dev_kfree_skb_any(skb);
                        priv->rx_skb[i] = NULL;
                }
@@ -371,8 +371,8 @@ static void qtnf_pearl_free_xfer_buffers(struct qtnf_pcie_pearl_state *ps)
                        skb = priv->tx_skb[i];
                        paddr = QTN_HOST_ADDR(le32_to_cpu(txbd->addr_h),
                                              le32_to_cpu(txbd->addr));
-                       pci_unmap_single(priv->pdev, paddr, skb->len,
-                                        PCI_DMA_TODEVICE);
+                       dma_unmap_single(&priv->pdev->dev, paddr, skb->len,
+                                        DMA_TO_DEVICE);
                        dev_kfree_skb_any(skb);
                        priv->tx_skb[i] = NULL;
                }
@@ -485,8 +485,8 @@ static void qtnf_pearl_data_tx_reclaim(struct qtnf_pcie_pearl_state *ps)
                        txbd = &ps->tx_bd_vbase[i];
                        paddr = QTN_HOST_ADDR(le32_to_cpu(txbd->addr_h),
                                              le32_to_cpu(txbd->addr));
-                       pci_unmap_single(priv->pdev, paddr, skb->len,
-                                        PCI_DMA_TODEVICE);
+                       dma_unmap_single(&priv->pdev->dev, paddr, skb->len,
+                                        DMA_TO_DEVICE);
 
                        if (skb->dev) {
                                dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
@@ -559,9 +559,9 @@ static int qtnf_pcie_skb_send(struct qtnf_bus *bus, struct sk_buff *skb)
        priv->tx_skb[i] = skb;
        len = skb->len;
 
-       skb_paddr = pci_map_single(priv->pdev, skb->data,
-                                  skb->len, PCI_DMA_TODEVICE);
-       if (pci_dma_mapping_error(priv->pdev, skb_paddr)) {
+       skb_paddr = dma_map_single(&priv->pdev->dev, skb->data, skb->len,
+                                  DMA_TO_DEVICE);
+       if (dma_mapping_error(&priv->pdev->dev, skb_paddr)) {
                pr_err("skb DMA mapping error: %pad\n", &skb_paddr);
                ret = -ENOMEM;
                goto tx_done;
@@ -748,8 +748,8 @@ static int qtnf_pcie_pearl_rx_poll(struct napi_struct *napi, int budget)
                if (skb) {
                        skb_paddr = QTN_HOST_ADDR(le32_to_cpu(rxbd->addr_h),
                                                  le32_to_cpu(rxbd->addr));
-                       pci_unmap_single(priv->pdev, skb_paddr, SKB_BUF_SIZE,
-                                        PCI_DMA_FROMDEVICE);
+                       dma_unmap_single(&priv->pdev->dev, skb_paddr,
+                                        SKB_BUF_SIZE, DMA_FROM_DEVICE);
                }
 
                if (consume) {
index 24f1be8..9534e1b 100644 (file)
@@ -255,9 +255,9 @@ topaz_skb2rbd_attach(struct qtnf_pcie_topaz_state *ts, u16 index, u32 wrap)
 
        ts->base.rx_skb[index] = skb;
 
-       paddr = pci_map_single(ts->base.pdev, skb->data,
-                              SKB_BUF_SIZE, PCI_DMA_FROMDEVICE);
-       if (pci_dma_mapping_error(ts->base.pdev, paddr)) {
+       paddr = dma_map_single(&ts->base.pdev->dev, skb->data, SKB_BUF_SIZE,
+                              DMA_FROM_DEVICE);
+       if (dma_mapping_error(&ts->base.pdev->dev, paddr)) {
                pr_err("skb mapping error: %pad\n", &paddr);
                return -ENOMEM;
        }
@@ -306,8 +306,8 @@ static void qtnf_topaz_free_xfer_buffers(struct qtnf_pcie_topaz_state *ts)
                        rxbd = &ts->rx_bd_vbase[i];
                        skb = priv->rx_skb[i];
                        paddr = QTN_HOST_ADDR(0x0, le32_to_cpu(rxbd->addr));
-                       pci_unmap_single(priv->pdev, paddr, SKB_BUF_SIZE,
-                                        PCI_DMA_FROMDEVICE);
+                       dma_unmap_single(&priv->pdev->dev, paddr,
+                                        SKB_BUF_SIZE, DMA_FROM_DEVICE);
                        dev_kfree_skb_any(skb);
                        priv->rx_skb[i] = NULL;
                        rxbd->addr = 0;
@@ -321,8 +321,8 @@ static void qtnf_topaz_free_xfer_buffers(struct qtnf_pcie_topaz_state *ts)
                        txbd = &ts->tx_bd_vbase[i];
                        skb = priv->tx_skb[i];
                        paddr = QTN_HOST_ADDR(0x0, le32_to_cpu(txbd->addr));
-                       pci_unmap_single(priv->pdev, paddr, SKB_BUF_SIZE,
-                                        PCI_DMA_TODEVICE);
+                       dma_unmap_single(&priv->pdev->dev, paddr,
+                                        SKB_BUF_SIZE, DMA_TO_DEVICE);
                        dev_kfree_skb_any(skb);
                        priv->tx_skb[i] = NULL;
                        txbd->addr = 0;
@@ -414,8 +414,8 @@ static void qtnf_topaz_data_tx_reclaim(struct qtnf_pcie_topaz_state *ts)
                if (likely(skb)) {
                        txbd = &ts->tx_bd_vbase[i];
                        paddr = QTN_HOST_ADDR(0x0, le32_to_cpu(txbd->addr));
-                       pci_unmap_single(priv->pdev, paddr, skb->len,
-                                        PCI_DMA_TODEVICE);
+                       dma_unmap_single(&priv->pdev->dev, paddr, skb->len,
+                                        DMA_TO_DEVICE);
 
                        if (skb->dev) {
                                dev_sw_netstats_tx_add(skb->dev, 1, skb->len);
@@ -522,9 +522,9 @@ static int qtnf_pcie_data_tx(struct qtnf_bus *bus, struct sk_buff *skb,
        priv->tx_skb[i] = skb;
        len = skb->len;
 
-       skb_paddr = pci_map_single(priv->pdev, skb->data,
-                                  skb->len, PCI_DMA_TODEVICE);
-       if (pci_dma_mapping_error(priv->pdev, skb_paddr)) {
+       skb_paddr = dma_map_single(&priv->pdev->dev, skb->data, skb->len,
+                                  DMA_TO_DEVICE);
+       if (dma_mapping_error(&priv->pdev->dev, skb_paddr)) {
                ret = -ENOMEM;
                goto tx_done;
        }
@@ -653,8 +653,8 @@ static int qtnf_topaz_rx_poll(struct napi_struct *napi, int budget)
 
                if (skb) {
                        skb_paddr = QTN_HOST_ADDR(0x0, le32_to_cpu(rxbd->addr));
-                       pci_unmap_single(priv->pdev, skb_paddr, SKB_BUF_SIZE,
-                                        PCI_DMA_FROMDEVICE);
+                       dma_unmap_single(&priv->pdev->dev, skb_paddr,
+                                        SKB_BUF_SIZE, DMA_FROM_DEVICE);
                }
 
                if (consume) {
index 590bd97..0f5009c 100644 (file)
@@ -982,7 +982,9 @@ AP to AP    1       1       dest AP         src AP          dest    source
        if (local->net_type == ADHOC) {
                writeb(0, &ptx->mac.frame_ctl_2);
                memcpy_toio(ptx->mac.addr_1, ((struct ethhdr *)data)->h_dest,
-                           2 * ADDRLEN);
+                           ADDRLEN);
+               memcpy_toio(ptx->mac.addr_2, ((struct ethhdr *)data)->h_source,
+                           ADDRLEN);
                memcpy_toio(ptx->mac.addr_3, local->bss_id, ADDRLEN);
        } else { /* infrastructure */
 
@@ -2424,9 +2426,7 @@ static void rx_authenticate(ray_dev_t *local, struct rcs __iomem *prcs,
        copy_from_rx_buff(local, buff, pkt_addr, rx_len & 0xff);
        /* if we are trying to get authenticated */
        if (local->sparm.b4.a_network_type == ADHOC) {
-               pr_debug("ray_cs rx_auth var= %02x %02x %02x %02x %02x %02x\n",
-                     msg->var[0], msg->var[1], msg->var[2], msg->var[3],
-                     msg->var[4], msg->var[5]);
+               pr_debug("ray_cs rx_auth var= %6ph\n", msg->var);
                if (msg->var[2] == 1) {
                        pr_debug("ray_cs Sending authentication response.\n");
                        if (!build_auth_frame
index 0173577..7ddce3c 100644 (file)
@@ -1378,6 +1378,8 @@ struct rtl8xxxu_priv {
        u8 no_pape:1;
        u8 int_buf[USB_INTR_CONTENT_LENGTH];
        u8 rssi_level;
+       DECLARE_BITMAP(tx_aggr_started, IEEE80211_NUM_TIDS);
+       DECLARE_BITMAP(tid_tx_operational, IEEE80211_NUM_TIDS);
        /*
         * Only one virtual interface permitted because only STA mode
         * is supported and no iface_combinations are provided.
index ac1061c..774341b 100644 (file)
@@ -1670,7 +1670,7 @@ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv)
                        priv->rf_paths = 2;
                        priv->rx_paths = 2;
                        priv->tx_paths = 2;
-                       priv->usb_interrupts = 1;
+                       priv->usb_interrupts = 0;
                        priv->rtl_chip = RTL8192C;
                }
                priv->has_wifi = 1;
@@ -1680,7 +1680,7 @@ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv)
                priv->rx_paths = 1;
                priv->tx_paths = 1;
                priv->rtl_chip = RTL8188C;
-               priv->usb_interrupts = 1;
+               priv->usb_interrupts = 0;
                priv->has_wifi = 1;
        }
 
@@ -4805,6 +4805,8 @@ rtl8xxxu_fill_txdesc_v1(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
        struct ieee80211_rate *tx_rate = ieee80211_get_tx_rate(hw, tx_info);
        struct rtl8xxxu_priv *priv = hw->priv;
        struct device *dev = &priv->udev->dev;
+       u8 *qc = ieee80211_get_qos_ctl(hdr);
+       u8 tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
        u32 rate;
        u16 rate_flags = tx_info->control.rates[0].flags;
        u16 seq_number;
@@ -4828,7 +4830,7 @@ rtl8xxxu_fill_txdesc_v1(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
 
        tx_desc->txdw3 = cpu_to_le32((u32)seq_number << TXDESC32_SEQ_SHIFT);
 
-       if (ampdu_enable)
+       if (ampdu_enable && test_bit(tid, priv->tid_tx_operational))
                tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_ENABLE);
        else
                tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK);
@@ -4876,6 +4878,8 @@ rtl8xxxu_fill_txdesc_v2(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
        struct rtl8xxxu_priv *priv = hw->priv;
        struct device *dev = &priv->udev->dev;
        struct rtl8xxxu_txdesc40 *tx_desc40;
+       u8 *qc = ieee80211_get_qos_ctl(hdr);
+       u8 tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
        u32 rate;
        u16 rate_flags = tx_info->control.rates[0].flags;
        u16 seq_number;
@@ -4902,7 +4906,7 @@ rtl8xxxu_fill_txdesc_v2(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
 
        tx_desc40->txdw9 = cpu_to_le32((u32)seq_number << TXDESC40_SEQ_SHIFT);
 
-       if (ampdu_enable)
+       if (ampdu_enable && test_bit(tid, priv->tid_tx_operational))
                tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_ENABLE);
        else
                tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK);
@@ -5015,12 +5019,19 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw,
        if (ieee80211_is_data_qos(hdr->frame_control) && sta) {
                if (sta->ht_cap.ht_supported) {
                        u32 ampdu, val32;
+                       u8 *qc = ieee80211_get_qos_ctl(hdr);
+                       u8 tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
 
                        ampdu = (u32)sta->ht_cap.ampdu_density;
                        val32 = ampdu << TXDESC_AMPDU_DENSITY_SHIFT;
                        tx_desc->txdw2 |= cpu_to_le32(val32);
 
                        ampdu_enable = true;
+
+                       if (!test_bit(tid, priv->tx_aggr_started) &&
+                           !(skb->protocol == cpu_to_be16(ETH_P_PAE)))
+                               if (!ieee80211_start_tx_ba_session(sta, tid, 0))
+                                       set_bit(tid, priv->tx_aggr_started);
                }
        }
 
@@ -6096,6 +6107,7 @@ rtl8xxxu_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
        struct device *dev = &priv->udev->dev;
        u8 ampdu_factor, ampdu_density;
        struct ieee80211_sta *sta = params->sta;
+       u16 tid = params->tid;
        enum ieee80211_ampdu_mlme_action action = params->action;
 
        switch (action) {
@@ -6108,17 +6120,20 @@ rtl8xxxu_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
                dev_dbg(dev,
                        "Changed HT: ampdu_factor %02x, ampdu_density %02x\n",
                        ampdu_factor, ampdu_density);
-               break;
+               return IEEE80211_AMPDU_TX_START_IMMEDIATE;
+       case IEEE80211_AMPDU_TX_STOP_CONT:
        case IEEE80211_AMPDU_TX_STOP_FLUSH:
-               dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH\n", __func__);
-               rtl8xxxu_set_ampdu_factor(priv, 0);
-               rtl8xxxu_set_ampdu_min_space(priv, 0);
-               break;
        case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
-               dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH_CONT\n",
-                        __func__);
+               dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP\n", __func__);
                rtl8xxxu_set_ampdu_factor(priv, 0);
                rtl8xxxu_set_ampdu_min_space(priv, 0);
+               clear_bit(tid, priv->tx_aggr_started);
+               clear_bit(tid, priv->tid_tx_operational);
+               ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+               break;
+       case IEEE80211_AMPDU_TX_OPERATIONAL:
+               dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_OPERATIONAL\n", __func__);
+               set_bit(tid, priv->tid_tx_operational);
                break;
        case IEEE80211_AMPDU_RX_START:
                dev_dbg(dev, "%s: IEEE80211_AMPDU_RX_START\n", __func__);
index 76dd881..9b83c71 100644 (file)
@@ -160,6 +160,15 @@ static u32 targetchnl_2g[TARGET_CHNL_NUM_2G] = {
        25711, 25658, 25606, 25554, 25502, 25451, 25328
 };
 
+static const u8 channel_all[59] = {
+       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+       36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58,
+       60, 62, 64, 100, 102, 104, 106, 108, 110, 112,
+       114, 116, 118, 120, 122, 124, 126, 128, 130,
+       132, 134, 136, 138, 140, 149, 151, 153, 155,
+       157, 159, 161, 163, 165
+};
+
 static u32 _rtl92d_phy_calculate_bit_shift(u32 bitmask)
 {
        u32 i = ffs(bitmask);
@@ -681,7 +690,7 @@ static bool _rtl92d_phy_bb_config(struct ieee80211_hw *hw)
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        struct rtl_phy *rtlphy = &(rtlpriv->phy);
        struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw));
-       bool rtstatus = true;
+       bool rtstatus;
 
        rtl_dbg(rtlpriv, COMP_INIT, DBG_TRACE, "==>\n");
        rtstatus = _rtl92d_phy_config_bb_with_headerfile(hw,
@@ -1354,15 +1363,7 @@ static void _rtl92d_phy_switch_rf_setting(struct ieee80211_hw *hw, u8 channel)
 
 u8 rtl92d_get_rightchnlplace_for_iqk(u8 chnl)
 {
-       u8 channel_all[59] = {
-               1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
-               36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58,
-               60, 62, 64, 100, 102, 104, 106, 108, 110, 112,
-               114, 116, 118, 120, 122, 124, 126, 128, 130,
-               132, 134, 136, 138, 140, 149, 151, 153, 155,
-               157, 159, 161, 163, 165
-       };
-       u8 place = chnl;
+       u8 place;
 
        if (chnl > 14) {
                for (place = 14; place < sizeof(channel_all); place++) {
@@ -3220,37 +3221,28 @@ void rtl92d_phy_config_macphymode_info(struct ieee80211_hw *hw)
 u8 rtl92d_get_chnlgroup_fromarray(u8 chnl)
 {
        u8 group;
-       u8 channel_info[59] = {
-               1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
-               36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56,
-               58, 60, 62, 64, 100, 102, 104, 106, 108,
-               110, 112, 114, 116, 118, 120, 122, 124,
-               126, 128, 130, 132, 134, 136, 138, 140,
-               149, 151, 153, 155, 157, 159, 161, 163,
-               165
-       };
 
-       if (channel_info[chnl] <= 3)
+       if (channel_all[chnl] <= 3)
                group = 0;
-       else if (channel_info[chnl] <= 9)
+       else if (channel_all[chnl] <= 9)
                group = 1;
-       else if (channel_info[chnl] <= 14)
+       else if (channel_all[chnl] <= 14)
                group = 2;
-       else if (channel_info[chnl] <= 44)
+       else if (channel_all[chnl] <= 44)
                group = 3;
-       else if (channel_info[chnl] <= 54)
+       else if (channel_all[chnl] <= 54)
                group = 4;
-       else if (channel_info[chnl] <= 64)
+       else if (channel_all[chnl] <= 64)
                group = 5;
-       else if (channel_info[chnl] <= 112)
+       else if (channel_all[chnl] <= 112)
                group = 6;
-       else if (channel_info[chnl] <= 126)
+       else if (channel_all[chnl] <= 126)
                group = 7;
-       else if (channel_info[chnl] <= 140)
+       else if (channel_all[chnl] <= 140)
                group = 8;
-       else if (channel_info[chnl] <= 153)
+       else if (channel_all[chnl] <= 153)
                group = 9;
-       else if (channel_info[chnl] <= 159)
+       else if (channel_all[chnl] <= 159)
                group = 10;
        else
                group = 11;
index c0e4b11..73d6807 100644 (file)
@@ -15,9 +15,9 @@ rtw88_core-y += main.o \
           ps.o \
           sec.o \
           bf.o \
-          wow.o \
           regd.o
 
+rtw88_core-$(CONFIG_PM) += wow.o
 
 obj-$(CONFIG_RTW88_8822B)      += rtw88_8822b.o
 rtw88_8822b-objs               := rtw8822b.o rtw8822b_table.o
index 3bfa5ec..e639951 100644 (file)
@@ -819,7 +819,7 @@ static u16 rtw_get_rsvd_page_probe_req_size(struct rtw_dev *rtwdev,
                        continue;
                if ((!ssid && !rsvd_pkt->ssid) ||
                    rtw_ssid_equal(rsvd_pkt->ssid, ssid))
-                       size = rsvd_pkt->skb->len;
+                       size = rsvd_pkt->probe_req_size;
        }
 
        return size;
@@ -1047,6 +1047,8 @@ static struct sk_buff *rtw_get_rsvd_page_skb(struct ieee80211_hw *hw,
                                                         ssid->ssid_len, 0);
                else
                        skb_new = ieee80211_probereq_get(hw, vif->addr, NULL, 0, 0);
+               if (skb_new)
+                       rsvd_pkt->probe_req_size = (u16)skb_new->len;
                break;
        case RSVD_NLO_INFO:
                skb_new = rtw_nlo_info_get(hw);
@@ -1643,6 +1645,7 @@ int rtw_fw_dump_fifo(struct rtw_dev *rtwdev, u8 fifo_sel, u32 addr, u32 size,
 static void __rtw_fw_update_pkt(struct rtw_dev *rtwdev, u8 pkt_id, u16 size,
                                u8 location)
 {
+       struct rtw_chip_info *chip = rtwdev->chip;
        u8 h2c_pkt[H2C_PKT_SIZE] = {0};
        u16 total_size = H2C_PKT_HDR_SIZE + H2C_PKT_UPDATE_PKT_LEN;
 
@@ -1653,6 +1656,7 @@ static void __rtw_fw_update_pkt(struct rtw_dev *rtwdev, u8 pkt_id, u16 size,
        UPDATE_PKT_SET_LOCATION(h2c_pkt, location);
 
        /* include txdesc size */
+       size += chip->tx_pkt_desc_sz;
        UPDATE_PKT_SET_SIZE(h2c_pkt, size);
 
        rtw_fw_send_h2c_packet(rtwdev, h2c_pkt);
@@ -1662,7 +1666,7 @@ void rtw_fw_update_pkt_probe_req(struct rtw_dev *rtwdev,
                                 struct cfg80211_ssid *ssid)
 {
        u8 loc;
-       u32 size;
+       u16 size;
 
        loc = rtw_get_rsvd_page_probe_req_location(rtwdev, ssid);
        if (!loc) {
index a8a7162..64dcde3 100644 (file)
@@ -99,7 +99,7 @@ enum rtw_beacon_filter_offload_mode {
        BCN_FILTER_OFFLOAD_MODE_2,
        BCN_FILTER_OFFLOAD_MODE_3,
 
-       BCN_FILTER_OFFLOAD_MODE_DEFAULT = BCN_FILTER_OFFLOAD_MODE_1,
+       BCN_FILTER_OFFLOAD_MODE_DEFAULT = BCN_FILTER_OFFLOAD_MODE_0,
 };
 
 struct rtw_coex_info_req {
@@ -147,6 +147,7 @@ struct rtw_rsvd_page {
        u8 page;
        bool add_txdesc;
        struct cfg80211_ssid *ssid;
+       u16 probe_req_size;
 };
 
 enum rtw_keep_alive_pkt_type {
index c636483..6bb55e6 100644 (file)
@@ -1338,6 +1338,8 @@ static void rtw_init_ht_cap(struct rtw_dev *rtwdev,
 
        if (rtw_chip_has_rx_ldpc(rtwdev))
                ht_cap->cap |= IEEE80211_HT_CAP_LDPC_CODING;
+       if (rtw_chip_has_tx_stbc(rtwdev))
+               ht_cap->cap |= IEEE80211_HT_CAP_TX_STBC;
 
        if (efuse->hw_cap.bw & BIT(RTW_CHANNEL_WIDTH_40))
                ht_cap->cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
index e5af375..5681212 100644 (file)
@@ -1146,6 +1146,7 @@ struct rtw_chip_info {
        u8 txgi_factor;
        bool is_pwr_by_rate_dec;
        bool rx_ldpc;
+       bool tx_stbc;
        u8 max_power_index;
 
        u16 fw_fifo_addr[RTW_FW_FIFO_MAX];
@@ -1959,6 +1960,11 @@ static inline bool rtw_chip_has_rx_ldpc(struct rtw_dev *rtwdev)
        return rtwdev->chip->rx_ldpc;
 }
 
+static inline bool rtw_chip_has_tx_stbc(struct rtw_dev *rtwdev)
+{
+       return rtwdev->chip->tx_stbc;
+}
+
 static inline void rtw_release_macid(struct rtw_dev *rtwdev, u8 mac_id)
 {
        clear_bit(mac_id, rtwdev->mac_id_map);
index e7d17ab..a7a6ebf 100644 (file)
@@ -268,11 +268,6 @@ static int rtw_pci_init_rx_ring(struct rtw_dev *rtwdev,
        int i, allocated;
        int ret = 0;
 
-       if (len > TRX_BD_IDX_MASK) {
-               rtw_err(rtwdev, "len %d exceeds maximum RX entries\n", len);
-               return -EINVAL;
-       }
-
        head = dma_alloc_coherent(&pdev->dev, ring_sz, &dma, GFP_KERNEL);
        if (!head) {
                rtw_err(rtwdev, "failed to allocate rx ring\n");
@@ -1359,6 +1354,25 @@ static void rtw_pci_clkreq_set(struct rtw_dev *rtwdev, bool enable)
        rtw_dbi_write8(rtwdev, RTK_PCIE_LINK_CFG, value);
 }
 
+static void rtw_pci_clkreq_pad_low(struct rtw_dev *rtwdev, bool enable)
+{
+       u8 value;
+       int ret;
+
+       ret = rtw_dbi_read8(rtwdev, RTK_PCIE_LINK_CFG, &value);
+       if (ret) {
+               rtw_err(rtwdev, "failed to read CLKREQ_L1, ret=%d", ret);
+               return;
+       }
+
+       if (enable)
+               value &= ~BIT_CLKREQ_N_PAD;
+       else
+               value |= BIT_CLKREQ_N_PAD;
+
+       rtw_dbi_write8(rtwdev, RTK_PCIE_LINK_CFG, value);
+}
+
 static void rtw_pci_aspm_set(struct rtw_dev *rtwdev, bool enable)
 {
        u8 value;
@@ -1500,11 +1514,25 @@ static void rtw_pci_phy_cfg(struct rtw_dev *rtwdev)
 
 static int __maybe_unused rtw_pci_suspend(struct device *dev)
 {
+       struct ieee80211_hw *hw = dev_get_drvdata(dev);
+       struct rtw_dev *rtwdev = hw->priv;
+       struct rtw_chip_info *chip = rtwdev->chip;
+       struct rtw_efuse *efuse = &rtwdev->efuse;
+
+       if (chip->id == RTW_CHIP_TYPE_8822C && efuse->rfe_option == 6)
+               rtw_pci_clkreq_pad_low(rtwdev, true);
        return 0;
 }
 
 static int __maybe_unused rtw_pci_resume(struct device *dev)
 {
+       struct ieee80211_hw *hw = dev_get_drvdata(dev);
+       struct rtw_dev *rtwdev = hw->priv;
+       struct rtw_chip_info *chip = rtwdev->chip;
+       struct rtw_efuse *efuse = &rtwdev->efuse;
+
+       if (chip->id == RTW_CHIP_TYPE_8822C && efuse->rfe_option == 6)
+               rtw_pci_clkreq_pad_low(rtwdev, false);
        return 0;
 }
 
@@ -1701,6 +1729,15 @@ static const struct dmi_system_id rtw88_pci_quirks[] = {
                },
                .driver_data = (void *)BIT(QUIRK_DIS_PCI_CAP_ASPM),
        },
+       {
+               .callback = disable_pci_caps,
+               .ident = "HP HP Pavilion Laptop 14-ce0xxx",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion Laptop 14-ce0xxx"),
+               },
+               .driver_data = (void *)BIT(QUIRK_DIS_PCI_CAP_ASPM),
+       },
        {}
 };
 
index 0ffae88..66f78eb 100644 (file)
@@ -37,6 +37,7 @@
 #define RTK_PCIE_LINK_CFG      0x0719
 #define BIT_CLKREQ_SW_EN       BIT(4)
 #define BIT_L1_SW_EN           BIT(3)
+#define BIT_CLKREQ_N_PAD       BIT(0)
 #define RTK_PCIE_CLKDLY_CTRL   0x0725
 
 #define BIT_PCI_BCNQ_FLAG      BIT(4)
index 8bf3cd3..f3ad079 100644 (file)
@@ -5288,6 +5288,7 @@ struct rtw_chip_info rtw8822c_hw_spec = {
        .bfer_su_max_num = 2,
        .bfer_mu_max_num = 1,
        .rx_ldpc = true,
+       .tx_stbc = true,
 
 #ifdef CONFIG_PM
        .wow_fw_name = "rtw88/rtw8822c_wow_fw.bin",
index 0193708..3a101aa 100644 (file)
@@ -162,7 +162,7 @@ void rtw_tx_report_purge_timer(struct timer_list *t)
        if (skb_queue_len(&tx_report->queue) == 0)
                return;
 
-       rtw_dbg(rtwdev, RTW_DBG_TX, "purge skb(s) not reported by firmware\n");
+       rtw_warn(rtwdev, "failed to get tx report from firmware\n");
 
        spin_lock_irqsave(&tx_report->q_lock, flags);
        skb_queue_purge(&tx_report->queue);
index fc9544f..89dc595 100644 (file)
 
 static void rtw_wow_show_wakeup_reason(struct rtw_dev *rtwdev)
 {
+       struct cfg80211_wowlan_nd_info nd_info;
+       struct cfg80211_wowlan_wakeup wakeup = {
+               .pattern_idx = -1,
+       };
        u8 reason;
 
        reason = rtw_read8(rtwdev, REG_WOWLAN_WAKE_REASON);
 
-       if (reason == RTW_WOW_RSN_RX_DEAUTH)
+       switch (reason) {
+       case RTW_WOW_RSN_RX_DEAUTH:
+               wakeup.disconnect = true;
                rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: Rx deauth\n");
-       else if (reason == RTW_WOW_RSN_DISCONNECT)
+               break;
+       case RTW_WOW_RSN_DISCONNECT:
+               wakeup.disconnect = true;
                rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: AP is off\n");
-       else if (reason == RTW_WOW_RSN_RX_MAGIC_PKT)
+               break;
+       case RTW_WOW_RSN_RX_MAGIC_PKT:
+               wakeup.magic_pkt = true;
                rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: Rx magic packet\n");
-       else if (reason == RTW_WOW_RSN_RX_GTK_REKEY)
+               break;
+       case RTW_WOW_RSN_RX_GTK_REKEY:
+               wakeup.gtk_rekey_failure = true;
                rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: Rx gtk rekey\n");
-       else if (reason == RTW_WOW_RSN_RX_PTK_REKEY)
-               rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: Rx ptk rekey\n");
-       else if (reason == RTW_WOW_RSN_RX_PATTERN_MATCH)
+               break;
+       case RTW_WOW_RSN_RX_PATTERN_MATCH:
+               /* Current firmware and driver don't report pattern index
+                * Use pattern_idx to 0 defaultly.
+                */
+               wakeup.pattern_idx = 0;
                rtw_dbg(rtwdev, RTW_DBG_WOW, "WOW: Rx pattern match packet\n");
-       else if (reason == RTW_WOW_RSN_RX_NLO)
+               break;
+       case RTW_WOW_RSN_RX_NLO:
+               /* Current firmware and driver don't report ssid index.
+                * Use 0 for n_matches based on its comment.
+                */
+               nd_info.n_matches = 0;
+               wakeup.net_detect = &nd_info;
                rtw_dbg(rtwdev, RTW_DBG_WOW, "Rx NLO\n");
-       else
+               break;
+       default:
                rtw_warn(rtwdev, "Unknown wakeup reason %x\n", reason);
+               ieee80211_report_wowlan_wakeup(rtwdev->wow.wow_vif, NULL,
+                                              GFP_KERNEL);
+               return;
+       }
+       ieee80211_report_wowlan_wakeup(rtwdev->wow.wow_vif, &wakeup,
+                                      GFP_KERNEL);
 }
 
 static void rtw_wow_pattern_write_cam(struct rtw_dev *rtwdev, u8 addr,
@@ -283,15 +311,26 @@ static void rtw_wow_rx_dma_start(struct rtw_dev *rtwdev)
 
 static int rtw_wow_check_fw_status(struct rtw_dev *rtwdev, bool wow_enable)
 {
-       /* wait 100ms for wow firmware to finish work */
-       msleep(100);
+       int ret;
+       u8 check;
+       u32 check_dis;
 
        if (wow_enable) {
-               if (rtw_read8(rtwdev, REG_WOWLAN_WAKE_REASON))
+               ret = read_poll_timeout(rtw_read8, check, !check, 1000,
+                                       100000, true, rtwdev,
+                                       REG_WOWLAN_WAKE_REASON);
+               if (ret)
                        goto wow_fail;
        } else {
-               if (rtw_read32_mask(rtwdev, REG_FE1IMR, BIT_FS_RXDONE) ||
-                   rtw_read32_mask(rtwdev, REG_RXPKT_NUM, BIT_RW_RELEASE))
+               ret = read_poll_timeout(rtw_read32_mask, check_dis,
+                                       !check_dis, 1000, 100000, true, rtwdev,
+                                       REG_FE1IMR, BIT_FS_RXDONE);
+               if (ret)
+                       goto wow_fail;
+               ret = read_poll_timeout(rtw_read32_mask, check_dis,
+                                       !check_dis, 1000, 100000, false, rtwdev,
+                                       REG_RXPKT_NUM, BIT_RW_RELEASE);
+               if (ret)
                        goto wow_fail;
        }
 
@@ -432,37 +471,31 @@ static void rtw_wow_fw_media_status(struct rtw_dev *rtwdev, bool connect)
        rtw_iterate_stas_atomic(rtwdev, rtw_wow_fw_media_status_iter, &data);
 }
 
-static void rtw_wow_config_pno_rsvd_page(struct rtw_dev *rtwdev,
-                                        struct rtw_vif *rtwvif)
+static int rtw_wow_config_wow_fw_rsvd_page(struct rtw_dev *rtwdev)
 {
-       rtw_add_rsvd_page_pno(rtwdev, rtwvif);
-}
-
-static void rtw_wow_config_linked_rsvd_page(struct rtw_dev *rtwdev,
-                                          struct rtw_vif *rtwvif)
-{
-       rtw_add_rsvd_page_sta(rtwdev, rtwvif);
-}
+       struct ieee80211_vif *wow_vif = rtwdev->wow.wow_vif;
+       struct rtw_vif *rtwvif = (struct rtw_vif *)wow_vif->drv_priv;
 
-static void rtw_wow_config_rsvd_page(struct rtw_dev *rtwdev,
-                                    struct rtw_vif *rtwvif)
-{
        rtw_remove_rsvd_page(rtwdev, rtwvif);
 
-       if (rtw_wow_mgd_linked(rtwdev)) {
-               rtw_wow_config_linked_rsvd_page(rtwdev, rtwvif);
-       } else if (test_bit(RTW_FLAG_WOWLAN, rtwdev->flags) &&
-                  rtw_wow_no_link(rtwdev)) {
-               rtw_wow_config_pno_rsvd_page(rtwdev, rtwvif);
-       }
+       if (rtw_wow_no_link(rtwdev))
+               rtw_add_rsvd_page_pno(rtwdev, rtwvif);
+       else
+               rtw_add_rsvd_page_sta(rtwdev, rtwvif);
+
+       return rtw_fw_download_rsvd_page(rtwdev);
 }
 
-static int rtw_wow_dl_fw_rsvd_page(struct rtw_dev *rtwdev)
+static int rtw_wow_config_normal_fw_rsvd_page(struct rtw_dev *rtwdev)
 {
        struct ieee80211_vif *wow_vif = rtwdev->wow.wow_vif;
        struct rtw_vif *rtwvif = (struct rtw_vif *)wow_vif->drv_priv;
 
-       rtw_wow_config_rsvd_page(rtwdev, rtwvif);
+       rtw_remove_rsvd_page(rtwdev, rtwvif);
+       rtw_add_rsvd_page_sta(rtwdev, rtwvif);
+
+       if (rtw_wow_no_link(rtwdev))
+               return 0;
 
        return rtw_fw_download_rsvd_page(rtwdev);
 }
@@ -660,7 +693,7 @@ static int rtw_wow_enable(struct rtw_dev *rtwdev)
 
        set_bit(RTW_FLAG_WOWLAN, rtwdev->flags);
 
-       ret = rtw_wow_dl_fw_rsvd_page(rtwdev);
+       ret = rtw_wow_config_wow_fw_rsvd_page(rtwdev);
        if (ret) {
                rtw_err(rtwdev, "failed to download wowlan rsvd page\n");
                goto error;
@@ -733,7 +766,7 @@ static int rtw_wow_disable(struct rtw_dev *rtwdev)
                goto out;
        }
 
-       ret = rtw_wow_dl_fw_rsvd_page(rtwdev);
+       ret = rtw_wow_config_normal_fw_rsvd_page(rtwdev);
        if (ret)
                rtw_err(rtwdev, "failed to download normal rsvd page\n");
 
index 24a417e..bf22fd9 100644 (file)
@@ -117,7 +117,7 @@ static int rsi_stats_read(struct seq_file *seq, void *data)
 {
        struct rsi_common *common = seq->private;
 
-       unsigned char fsm_state[][32] = {
+       static const unsigned char fsm_state[][32] = {
                "FSM_FW_NOT_LOADED",
                "FSM_CARD_NOT_READY",
                "FSM_COMMON_DEV_PARAMS_SENT",
index 99b21a2..f4a26f1 100644 (file)
@@ -1038,8 +1038,10 @@ static int rsi_load_9116_firmware(struct rsi_hw *adapter)
        }
 
        ta_firmware = kmemdup(fw_entry->data, fw_entry->size, GFP_KERNEL);
-       if (!ta_firmware)
+       if (!ta_firmware) {
+               status = -ENOMEM;
                goto fail_release_fw;
+       }
        fw_p = ta_firmware;
        instructions_sz = fw_entry->size;
        rsi_dbg(INFO_ZONE, "FW Length = %d bytes\n", instructions_sz);
index 3fbe2a3..416976f 100644 (file)
@@ -816,6 +816,7 @@ static int rsi_probe(struct usb_interface *pfunction,
        } else {
                rsi_dbg(ERR_ZONE, "%s: Unsupported RSI device id 0x%x\n",
                        __func__, id->idProduct);
+               status = -ENODEV;
                goto err1;
        }
 
index de93843..77dbfc4 100644 (file)
@@ -38,6 +38,18 @@ config MHI_WWAN_CTRL
          To compile this driver as a module, choose M here: the module will be
          called mhi_wwan_ctrl.
 
+config MHI_WWAN_MBIM
+        tristate "MHI WWAN MBIM network driver for QCOM-based PCIe modems"
+        depends on MHI_BUS
+        help
+          MHI WWAN MBIM is a WWAN network driver for QCOM-based PCIe modems.
+          It implements MBIM over MHI, for IP data aggregation and muxing.
+          A default wwan0 network interface is created for MBIM data session
+          ID 0. Additional links can be created via wwan rtnetlink type.
+
+          To compile this driver as a module, choose M here: the module will be
+          called mhi_wwan_mbim.
+
 config RPMSG_WWAN_CTRL
        tristate "RPMSG WWAN control driver"
        depends on RPMSG
index d90ac33..fe51fee 100644 (file)
@@ -9,5 +9,6 @@ wwan-objs += wwan_core.o
 obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o
 
 obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o
+obj-$(CONFIG_MHI_WWAN_MBIM) += mhi_wwan_mbim.o
 obj-$(CONFIG_RPMSG_WWAN_CTRL) += rpmsg_wwan_ctrl.o
 obj-$(CONFIG_IOSM) += iosm/
index 7f7d364..2fe88b8 100644 (file)
@@ -479,6 +479,7 @@ static struct pci_driver iosm_ipc_driver = {
        },
        .id_table = iosm_ipc_ids,
 };
+module_pci_driver(iosm_ipc_driver);
 
 int ipc_pcie_addr_map(struct iosm_pcie *ipc_pcie, unsigned char *data,
                      size_t size, dma_addr_t *mapping, int direction)
@@ -560,21 +561,3 @@ void ipc_pcie_kfree_skb(struct iosm_pcie *ipc_pcie, struct sk_buff *skb)
        IPC_CB(skb)->mapping = 0;
        dev_kfree_skb(skb);
 }
-
-static int __init iosm_ipc_driver_init(void)
-{
-       if (pci_register_driver(&iosm_ipc_driver)) {
-               pr_err("registering of IOSM PCIe driver failed");
-               return -1;
-       }
-
-       return 0;
-}
-
-static void __exit iosm_ipc_driver_exit(void)
-{
-       pci_unregister_driver(&iosm_ipc_driver);
-}
-
-module_init(iosm_ipc_driver_init);
-module_exit(iosm_ipc_driver_exit);
index 834d8b1..63fc701 100644 (file)
@@ -239,9 +239,9 @@ struct iosm_protocol *ipc_protocol_init(struct iosm_imem *ipc_imem)
        ipc_protocol->old_msg_tail = 0;
 
        ipc_protocol->p_ap_shm =
-               pci_alloc_consistent(ipc_protocol->pcie->pci,
-                                    sizeof(*ipc_protocol->p_ap_shm),
-                                    &ipc_protocol->phy_ap_shm);
+               dma_alloc_coherent(&ipc_protocol->pcie->pci->dev,
+                                  sizeof(*ipc_protocol->p_ap_shm),
+                                  &ipc_protocol->phy_ap_shm, GFP_KERNEL);
 
        if (!ipc_protocol->p_ap_shm) {
                dev_err(ipc_protocol->dev, "pci shm alloc error");
@@ -275,8 +275,8 @@ struct iosm_protocol *ipc_protocol_init(struct iosm_imem *ipc_imem)
 
 void ipc_protocol_deinit(struct iosm_protocol *proto)
 {
-       pci_free_consistent(proto->pcie->pci, sizeof(*proto->p_ap_shm),
-                           proto->p_ap_shm, proto->phy_ap_shm);
+       dma_free_coherent(&proto->pcie->pci->dev, sizeof(*proto->p_ap_shm),
+                         proto->p_ap_shm, proto->phy_ap_shm);
 
        ipc_pm_deinit(proto);
        kfree(proto);
index 35d5907..c6b032f 100644 (file)
@@ -74,9 +74,9 @@ static int ipc_protocol_msg_prepipe_open(struct iosm_protocol *ipc_protocol,
                return -ENOMEM;
 
        /* Allocate the transfer descriptors for the pipe. */
-       tdr = pci_alloc_consistent(ipc_protocol->pcie->pci,
-                                  pipe->nr_of_entries * sizeof(*tdr),
-                                  &pipe->phy_tdr_start);
+       tdr = dma_alloc_coherent(&ipc_protocol->pcie->pci->dev,
+                                pipe->nr_of_entries * sizeof(*tdr),
+                                &pipe->phy_tdr_start, GFP_ATOMIC);
        if (!tdr) {
                kfree(skbr);
                dev_err(ipc_protocol->dev, "tdr alloc error");
@@ -492,10 +492,9 @@ void ipc_protocol_pipe_cleanup(struct iosm_protocol *ipc_protocol,
 
        /* Free and reset the td and skbuf circular buffers. kfree is save! */
        if (pipe->tdr_start) {
-               pci_free_consistent(ipc_protocol->pcie->pci,
-                                   sizeof(*pipe->tdr_start) *
-                                           pipe->nr_of_entries,
-                                   pipe->tdr_start, pipe->phy_tdr_start);
+               dma_free_coherent(&ipc_protocol->pcie->pci->dev,
+                                 sizeof(*pipe->tdr_start) * pipe->nr_of_entries,
+                                 pipe->tdr_start, pipe->phy_tdr_start);
 
                pipe->tdr_start = NULL;
        }
diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c
new file mode 100644 (file)
index 0000000..71bf9b4
--- /dev/null
@@ -0,0 +1,658 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* MHI MBIM Network driver - Network/MBIM over MHI bus
+ *
+ * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
+ *
+ * This driver copy some code from cdc_ncm, which is:
+ * Copyright (C) ST-Ericsson 2010-2012
+ * and cdc_mbim, which is:
+ * Copyright (c) 2012  Smith Micro Software, Inc.
+ * Copyright (c) 2012  Bjørn Mork <bjorn@mork.no>
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/mhi.h>
+#include <linux/mii.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/usb.h>
+#include <linux/usb/cdc.h>
+#include <linux/usb/usbnet.h>
+#include <linux/usb/cdc_ncm.h>
+#include <linux/wwan.h>
+
+/* 3500 allows to optimize skb allocation, the skbs will basically fit in
+ * one 4K page. Large MBIM packets will simply be split over several MHI
+ * transfers and chained by the MHI net layer (zerocopy).
+ */
+#define MHI_DEFAULT_MRU 3500
+
+#define MHI_MBIM_DEFAULT_MTU 1500
+#define MHI_MAX_BUF_SZ 0xffff
+
+#define MBIM_NDP16_SIGN_MASK 0x00ffffff
+
+#define MHI_MBIM_LINK_HASH_SIZE 8
+#define LINK_HASH(session) ((session) % MHI_MBIM_LINK_HASH_SIZE)
+
+struct mhi_mbim_link {
+       struct mhi_mbim_context *mbim;
+       struct net_device *ndev;
+       unsigned int session;
+
+       /* stats */
+       u64_stats_t rx_packets;
+       u64_stats_t rx_bytes;
+       u64_stats_t rx_errors;
+       u64_stats_t tx_packets;
+       u64_stats_t tx_bytes;
+       u64_stats_t tx_errors;
+       u64_stats_t tx_dropped;
+       struct u64_stats_sync tx_syncp;
+       struct u64_stats_sync rx_syncp;
+
+       struct hlist_node hlnode;
+};
+
+struct mhi_mbim_context {
+       struct mhi_device *mdev;
+       struct sk_buff *skbagg_head;
+       struct sk_buff *skbagg_tail;
+       unsigned int mru;
+       u32 rx_queue_sz;
+       u16 rx_seq;
+       u16 tx_seq;
+       struct delayed_work rx_refill;
+       spinlock_t tx_lock;
+       struct hlist_head link_list[MHI_MBIM_LINK_HASH_SIZE];
+};
+
+struct mbim_tx_hdr {
+       struct usb_cdc_ncm_nth16 nth16;
+       struct usb_cdc_ncm_ndp16 ndp16;
+       struct usb_cdc_ncm_dpe16 dpe16[2];
+} __packed;
+
+static struct mhi_mbim_link *mhi_mbim_get_link_rcu(struct mhi_mbim_context *mbim,
+                                                  unsigned int session)
+{
+       struct mhi_mbim_link *link;
+
+       hlist_for_each_entry_rcu(link, &mbim->link_list[LINK_HASH(session)], hlnode) {
+               if (link->session == session)
+                       return link;
+       }
+
+       return NULL;
+}
+
+static struct sk_buff *mbim_tx_fixup(struct sk_buff *skb, unsigned int session,
+                                    u16 tx_seq)
+{
+       unsigned int dgram_size = skb->len;
+       struct usb_cdc_ncm_nth16 *nth16;
+       struct usb_cdc_ncm_ndp16 *ndp16;
+       struct mbim_tx_hdr *mbim_hdr;
+
+       /* Only one NDP is sent, containing the IP packet (no aggregation) */
+
+       /* Ensure we have enough headroom for crafting MBIM header */
+       if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) {
+               dev_kfree_skb_any(skb);
+               return NULL;
+       }
+
+       mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr));
+
+       /* Fill NTB header */
+       nth16 = &mbim_hdr->nth16;
+       nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN);
+       nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
+       nth16->wSequence = cpu_to_le16(tx_seq);
+       nth16->wBlockLength = cpu_to_le16(skb->len);
+       nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
+
+       /* Fill the unique NDP */
+       ndp16 = &mbim_hdr->ndp16;
+       ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN | (session << 24));
+       ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16)
+                                       + sizeof(struct usb_cdc_ncm_dpe16) * 2);
+       ndp16->wNextNdpIndex = 0;
+
+       /* Datagram follows the mbim header */
+       ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr));
+       ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size);
+
+       /* null termination */
+       ndp16->dpe16[1].wDatagramIndex = 0;
+       ndp16->dpe16[1].wDatagramLength = 0;
+
+       return skb;
+}
+
+static netdev_tx_t mhi_mbim_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+       struct mhi_mbim_context *mbim = link->mbim;
+       unsigned long flags;
+       int err = -ENOMEM;
+
+       /* Serialize MHI channel queuing and MBIM seq */
+       spin_lock_irqsave(&mbim->tx_lock, flags);
+
+       skb = mbim_tx_fixup(skb, link->session, mbim->tx_seq);
+       if (unlikely(!skb))
+               goto exit_unlock;
+
+       err = mhi_queue_skb(mbim->mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
+
+       if (mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE))
+               netif_stop_queue(ndev);
+
+       if (!err)
+               mbim->tx_seq++;
+
+exit_unlock:
+       spin_unlock_irqrestore(&mbim->tx_lock, flags);
+
+       if (unlikely(err)) {
+               net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
+                                   ndev->name, err);
+               dev_kfree_skb_any(skb);
+               goto exit_drop;
+       }
+
+       return NETDEV_TX_OK;
+
+exit_drop:
+       u64_stats_update_begin(&link->tx_syncp);
+       u64_stats_inc(&link->tx_dropped);
+       u64_stats_update_end(&link->tx_syncp);
+
+       return NETDEV_TX_OK;
+}
+
+static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *skb)
+{
+       struct usb_cdc_ncm_nth16 *nth16;
+       int len;
+
+       if (skb->len < sizeof(struct usb_cdc_ncm_nth16) +
+                       sizeof(struct usb_cdc_ncm_ndp16)) {
+               net_err_ratelimited("frame too short\n");
+               return -EINVAL;
+       }
+
+       nth16 = (struct usb_cdc_ncm_nth16 *)skb->data;
+
+       if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) {
+               net_err_ratelimited("invalid NTH16 signature <%#010x>\n",
+                                   le32_to_cpu(nth16->dwSignature));
+               return -EINVAL;
+       }
+
+       /* No limit on the block length, except the size of the data pkt */
+       len = le16_to_cpu(nth16->wBlockLength);
+       if (len > skb->len) {
+               net_err_ratelimited("NTB does not fit into the skb %u/%u\n",
+                                   len, skb->len);
+               return -EINVAL;
+       }
+
+       if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) &&
+           (mbim->rx_seq || le16_to_cpu(nth16->wSequence)) &&
+           !(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) {
+               net_err_ratelimited("sequence number glitch prev=%d curr=%d\n",
+                                   mbim->rx_seq, le16_to_cpu(nth16->wSequence));
+       }
+       mbim->rx_seq = le16_to_cpu(nth16->wSequence);
+
+       return le16_to_cpu(nth16->wNdpIndex);
+}
+
+static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
+{
+       int ret;
+
+       if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
+               net_err_ratelimited("invalid DPT16 length <%u>\n",
+                                   le16_to_cpu(ndp16->wLength));
+               return -EINVAL;
+       }
+
+       ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16))
+                       / sizeof(struct usb_cdc_ncm_dpe16));
+       ret--; /* Last entry is always a NULL terminator */
+
+       if (sizeof(struct usb_cdc_ncm_ndp16) +
+            ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) {
+               net_err_ratelimited("Invalid nframes = %d\n", ret);
+               return -EINVAL;
+       }
+
+       return ret;
+}
+
+static void mhi_mbim_rx(struct mhi_mbim_context *mbim, struct sk_buff *skb)
+{
+       int ndpoffset;
+
+       /* Check NTB header and retrieve first NDP offset */
+       ndpoffset = mbim_rx_verify_nth16(mbim, skb);
+       if (ndpoffset < 0) {
+               net_err_ratelimited("mbim: Incorrect NTB header\n");
+               goto error;
+       }
+
+       /* Process each NDP */
+       while (1) {
+               struct usb_cdc_ncm_ndp16 ndp16;
+               struct usb_cdc_ncm_dpe16 dpe16;
+               struct mhi_mbim_link *link;
+               int nframes, n, dpeoffset;
+               unsigned int session;
+
+               if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) {
+                       net_err_ratelimited("mbim: Incorrect NDP offset (%u)\n",
+                                           ndpoffset);
+                       goto error;
+               }
+
+               /* Check NDP header and retrieve number of datagrams */
+               nframes = mbim_rx_verify_ndp16(skb, &ndp16);
+               if (nframes < 0) {
+                       net_err_ratelimited("mbim: Incorrect NDP16\n");
+                       goto error;
+               }
+
+                /* Only IP data type supported, no DSS in MHI context */
+               if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
+                               != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) {
+                       net_err_ratelimited("mbim: Unsupported NDP type\n");
+                       goto next_ndp;
+               }
+
+               session = (le32_to_cpu(ndp16.dwSignature) & ~MBIM_NDP16_SIGN_MASK) >> 24;
+
+               rcu_read_lock();
+
+               link = mhi_mbim_get_link_rcu(mbim, session);
+               if (!link) {
+                       net_err_ratelimited("mbim: bad packet session (%u)\n", session);
+                       goto unlock;
+               }
+
+               /* de-aggregate and deliver IP packets */
+               dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16);
+               for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) {
+                       u16 dgram_offset, dgram_len;
+                       struct sk_buff *skbn;
+
+                       if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16)))
+                               break;
+
+                       dgram_offset = le16_to_cpu(dpe16.wDatagramIndex);
+                       dgram_len = le16_to_cpu(dpe16.wDatagramLength);
+
+                       if (!dgram_offset || !dgram_len)
+                               break; /* null terminator */
+
+                       skbn = netdev_alloc_skb(link->ndev, dgram_len);
+                       if (!skbn)
+                               continue;
+
+                       skb_put(skbn, dgram_len);
+                       skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len);
+
+                       switch (skbn->data[0] & 0xf0) {
+                       case 0x40:
+                               skbn->protocol = htons(ETH_P_IP);
+                               break;
+                       case 0x60:
+                               skbn->protocol = htons(ETH_P_IPV6);
+                               break;
+                       default:
+                               net_err_ratelimited("%s: unknown protocol\n",
+                                                   link->ndev->name);
+                               dev_kfree_skb_any(skbn);
+                               u64_stats_update_begin(&link->rx_syncp);
+                               u64_stats_inc(&link->rx_errors);
+                               u64_stats_update_end(&link->rx_syncp);
+                               continue;
+                       }
+
+                       u64_stats_update_begin(&link->rx_syncp);
+                       u64_stats_inc(&link->rx_packets);
+                       u64_stats_add(&link->rx_bytes, skbn->len);
+                       u64_stats_update_end(&link->rx_syncp);
+
+                       netif_rx(skbn);
+               }
+unlock:
+               rcu_read_unlock();
+next_ndp:
+               /* Other NDP to process? */
+               ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex);
+               if (!ndpoffset)
+                       break;
+       }
+
+       /* free skb */
+       dev_consume_skb_any(skb);
+       return;
+error:
+       dev_kfree_skb_any(skb);
+}
+
+static struct sk_buff *mhi_net_skb_agg(struct mhi_mbim_context *mbim,
+                                      struct sk_buff *skb)
+{
+       struct sk_buff *head = mbim->skbagg_head;
+       struct sk_buff *tail = mbim->skbagg_tail;
+
+       /* This is non-paged skb chaining using frag_list */
+       if (!head) {
+               mbim->skbagg_head = skb;
+               return skb;
+       }
+
+       if (!skb_shinfo(head)->frag_list)
+               skb_shinfo(head)->frag_list = skb;
+       else
+               tail->next = skb;
+
+       head->len += skb->len;
+       head->data_len += skb->len;
+       head->truesize += skb->truesize;
+
+       mbim->skbagg_tail = skb;
+
+       return mbim->skbagg_head;
+}
+
+static void mhi_net_rx_refill_work(struct work_struct *work)
+{
+       struct mhi_mbim_context *mbim = container_of(work, struct mhi_mbim_context,
+                                                    rx_refill.work);
+       struct mhi_device *mdev = mbim->mdev;
+       int err;
+
+       while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
+               struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL);
+
+               if (unlikely(!skb))
+                       break;
+
+               err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb,
+                                   MHI_DEFAULT_MRU, MHI_EOT);
+               if (unlikely(err)) {
+                       kfree_skb(skb);
+                       break;
+               }
+
+               /* Do not hog the CPU if rx buffers are consumed faster than
+                * queued (unlikely).
+                */
+               cond_resched();
+       }
+
+       /* If we're still starved of rx buffers, reschedule later */
+       if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mbim->rx_queue_sz)
+               schedule_delayed_work(&mbim->rx_refill, HZ / 2);
+}
+
+static void mhi_mbim_dl_callback(struct mhi_device *mhi_dev,
+                                struct mhi_result *mhi_res)
+{
+       struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+       struct sk_buff *skb = mhi_res->buf_addr;
+       int free_desc_count;
+
+       free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       if (unlikely(mhi_res->transaction_status)) {
+               switch (mhi_res->transaction_status) {
+               case -EOVERFLOW:
+                       /* Packet has been split over multiple transfers */
+                       skb_put(skb, mhi_res->bytes_xferd);
+                       mhi_net_skb_agg(mbim, skb);
+                       break;
+               case -ENOTCONN:
+                       /* MHI layer stopping/resetting the DL channel */
+                       dev_kfree_skb_any(skb);
+                       return;
+               default:
+                       /* Unknown error, simply drop */
+                       dev_kfree_skb_any(skb);
+               }
+       } else {
+               skb_put(skb, mhi_res->bytes_xferd);
+
+               if (mbim->skbagg_head) {
+                       /* Aggregate the final fragment */
+                       skb = mhi_net_skb_agg(mbim, skb);
+                       mbim->skbagg_head = NULL;
+               }
+
+               mhi_mbim_rx(mbim, skb);
+       }
+
+       /* Refill if RX buffers queue becomes low */
+       if (free_desc_count >= mbim->rx_queue_sz / 2)
+               schedule_delayed_work(&mbim->rx_refill, 0);
+}
+
+static void mhi_mbim_ndo_get_stats64(struct net_device *ndev,
+                                    struct rtnl_link_stats64 *stats)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+       unsigned int start;
+
+       do {
+               start = u64_stats_fetch_begin_irq(&link->rx_syncp);
+               stats->rx_packets = u64_stats_read(&link->rx_packets);
+               stats->rx_bytes = u64_stats_read(&link->rx_bytes);
+               stats->rx_errors = u64_stats_read(&link->rx_errors);
+       } while (u64_stats_fetch_retry_irq(&link->rx_syncp, start));
+
+       do {
+               start = u64_stats_fetch_begin_irq(&link->tx_syncp);
+               stats->tx_packets = u64_stats_read(&link->tx_packets);
+               stats->tx_bytes = u64_stats_read(&link->tx_bytes);
+               stats->tx_errors = u64_stats_read(&link->tx_errors);
+               stats->tx_dropped = u64_stats_read(&link->tx_dropped);
+       } while (u64_stats_fetch_retry_irq(&link->tx_syncp, start));
+}
+
+static void mhi_mbim_ul_callback(struct mhi_device *mhi_dev,
+                                struct mhi_result *mhi_res)
+{
+       struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+       struct sk_buff *skb = mhi_res->buf_addr;
+       struct net_device *ndev = skb->dev;
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+       /* Hardware has consumed the buffer, so free the skb (which is not
+        * freed by the MHI stack) and perform accounting.
+        */
+       dev_consume_skb_any(skb);
+
+       u64_stats_update_begin(&link->tx_syncp);
+       if (unlikely(mhi_res->transaction_status)) {
+               /* MHI layer stopping/resetting the UL channel */
+               if (mhi_res->transaction_status == -ENOTCONN) {
+                       u64_stats_update_end(&link->tx_syncp);
+                       return;
+               }
+
+               u64_stats_inc(&link->tx_errors);
+       } else {
+               u64_stats_inc(&link->tx_packets);
+               u64_stats_add(&link->tx_bytes, mhi_res->bytes_xferd);
+       }
+       u64_stats_update_end(&link->tx_syncp);
+
+       if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE))
+               netif_wake_queue(ndev);
+}
+
+static int mhi_mbim_ndo_open(struct net_device *ndev)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+       /* Feed the MHI rx buffer pool */
+       schedule_delayed_work(&link->mbim->rx_refill, 0);
+
+       /* Carrier is established via out-of-band channel (e.g. qmi) */
+       netif_carrier_on(ndev);
+
+       netif_start_queue(ndev);
+
+       return 0;
+}
+
+static int mhi_mbim_ndo_stop(struct net_device *ndev)
+{
+       netif_stop_queue(ndev);
+       netif_carrier_off(ndev);
+
+       return 0;
+}
+
+static const struct net_device_ops mhi_mbim_ndo = {
+       .ndo_open = mhi_mbim_ndo_open,
+       .ndo_stop = mhi_mbim_ndo_stop,
+       .ndo_start_xmit = mhi_mbim_ndo_xmit,
+       .ndo_get_stats64 = mhi_mbim_ndo_get_stats64,
+};
+
+static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
+                           struct netlink_ext_ack *extack)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+       struct mhi_mbim_context *mbim = ctxt;
+
+       link->session = if_id;
+       link->mbim = mbim;
+       link->ndev = ndev;
+       u64_stats_init(&link->rx_syncp);
+       u64_stats_init(&link->tx_syncp);
+
+       rcu_read_lock();
+       if (mhi_mbim_get_link_rcu(mbim, if_id)) {
+               rcu_read_unlock();
+               return -EEXIST;
+       }
+       rcu_read_unlock();
+
+       /* Already protected by RTNL lock */
+       hlist_add_head_rcu(&link->hlnode, &mbim->link_list[LINK_HASH(if_id)]);
+
+       return register_netdevice(ndev);
+}
+
+static void mhi_mbim_dellink(void *ctxt, struct net_device *ndev,
+                            struct list_head *head)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+       hlist_del_init_rcu(&link->hlnode);
+       synchronize_rcu();
+
+       unregister_netdevice_queue(ndev, head);
+}
+
+static void mhi_mbim_setup(struct net_device *ndev)
+{
+       ndev->header_ops = NULL;  /* No header */
+       ndev->type = ARPHRD_RAWIP;
+       ndev->needed_headroom = sizeof(struct mbim_tx_hdr);
+       ndev->hard_header_len = 0;
+       ndev->addr_len = 0;
+       ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
+       ndev->netdev_ops = &mhi_mbim_ndo;
+       ndev->mtu = MHI_MBIM_DEFAULT_MTU;
+       ndev->min_mtu = ETH_MIN_MTU;
+       ndev->max_mtu = MHI_MAX_BUF_SZ - ndev->needed_headroom;
+       ndev->tx_queue_len = 1000;
+}
+
+static const struct wwan_ops mhi_mbim_wwan_ops = {
+       .priv_size = sizeof(struct mhi_mbim_link),
+       .setup = mhi_mbim_setup,
+       .newlink = mhi_mbim_newlink,
+       .dellink = mhi_mbim_dellink,
+};
+
+static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+       struct mhi_mbim_context *mbim;
+       int err;
+
+       mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL);
+       if (!mbim)
+               return -ENOMEM;
+
+       spin_lock_init(&mbim->tx_lock);
+       dev_set_drvdata(&mhi_dev->dev, mbim);
+       mbim->mdev = mhi_dev;
+       mbim->mru = mhi_dev->mhi_cntrl->mru ? mhi_dev->mhi_cntrl->mru : MHI_DEFAULT_MRU;
+
+       INIT_DELAYED_WORK(&mbim->rx_refill, mhi_net_rx_refill_work);
+
+       /* Start MHI channels */
+       err = mhi_prepare_for_transfer(mhi_dev);
+       if (err)
+               return err;
+
+       /* Number of transfer descriptors determines size of the queue */
+       mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       /* Register wwan link ops with MHI controller representing WWAN instance */
+       return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0);
+}
+
+static void mhi_mbim_remove(struct mhi_device *mhi_dev)
+{
+       struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+
+       mhi_unprepare_from_transfer(mhi_dev);
+       cancel_delayed_work_sync(&mbim->rx_refill);
+       wwan_unregister_ops(&cntrl->mhi_dev->dev);
+       kfree_skb(mbim->skbagg_head);
+       dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+static const struct mhi_device_id mhi_mbim_id_table[] = {
+       /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */
+       { .chan = "IP_HW0_MBIM", .driver_data = 0 },
+       {}
+};
+MODULE_DEVICE_TABLE(mhi, mhi_mbim_id_table);
+
+static struct mhi_driver mhi_mbim_driver = {
+       .probe = mhi_mbim_probe,
+       .remove = mhi_mbim_remove,
+       .dl_xfer_cb = mhi_mbim_dl_callback,
+       .ul_xfer_cb = mhi_mbim_ul_callback,
+       .id_table = mhi_mbim_id_table,
+       .driver = {
+               .name = "mhi_wwan_mbim",
+               .owner = THIS_MODULE,
+       },
+};
+
+module_mhi_driver(mhi_mbim_driver);
+
+MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
+MODULE_DESCRIPTION("Network/MBIM over MHI");
+MODULE_LICENSE("GPL v2");
index 35ece98..d293ab6 100644 (file)
@@ -359,8 +359,8 @@ struct wwan_port *wwan_create_port(struct device *parent,
 {
        struct wwan_device *wwandev;
        struct wwan_port *port;
-       int minor, err = -ENOMEM;
        char namefmt[0x20];
+       int minor, err;
 
        if (type > WWAN_PORT_MAX || !ops)
                return ERR_PTR(-EINVAL);
@@ -374,11 +374,14 @@ struct wwan_port *wwan_create_port(struct device *parent,
 
        /* A port is exposed as character device, get a minor */
        minor = ida_alloc_range(&minors, 0, WWAN_MAX_MINORS - 1, GFP_KERNEL);
-       if (minor < 0)
+       if (minor < 0) {
+               err = minor;
                goto error_wwandev_remove;
+       }
 
        port = kzalloc(sizeof(*port), GFP_KERNEL);
        if (!port) {
+               err = -ENOMEM;
                ida_free(&minors, minor);
                goto error_wwandev_remove;
        }
index 4427590..e31b984 100644 (file)
@@ -126,21 +126,17 @@ struct netfront_queue {
 
        /*
         * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
-        * are linked from tx_skb_freelist through skb_entry.link.
-        *
-        *  NB. Freelist index entries are always going to be less than
-        *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
-        *  greater than PAGE_OFFSET: we use this property to distinguish
-        *  them.
+        * are linked from tx_skb_freelist through tx_link.
         */
-       union skb_entry {
-               struct sk_buff *skb;
-               unsigned long link;
-       } tx_skbs[NET_TX_RING_SIZE];
+       struct sk_buff *tx_skbs[NET_TX_RING_SIZE];
+       unsigned short tx_link[NET_TX_RING_SIZE];
+#define TX_LINK_NONE 0xffff
+#define TX_PENDING   0xfffe
        grant_ref_t gref_tx_head;
        grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
        struct page *grant_tx_page[NET_TX_RING_SIZE];
        unsigned tx_skb_freelist;
+       unsigned int tx_pend_queue;
 
        spinlock_t   rx_lock ____cacheline_aligned_in_smp;
        struct xen_netif_rx_front_ring rx;
@@ -173,6 +169,9 @@ struct netfront_info {
        bool netback_has_xdp_headroom;
        bool netfront_xdp_enabled;
 
+       /* Is device behaving sane? */
+       bool broken;
+
        atomic_t rx_gso_checksum_fixup;
 };
 
@@ -181,33 +180,25 @@ struct netfront_rx_info {
        struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 };
 
-static void skb_entry_set_link(union skb_entry *list, unsigned short id)
-{
-       list->link = id;
-}
-
-static int skb_entry_is_link(const union skb_entry *list)
-{
-       BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link));
-       return (unsigned long)list->skb < PAGE_OFFSET;
-}
-
 /*
  * Access macros for acquiring freeing slots in tx_skbs[].
  */
 
-static void add_id_to_freelist(unsigned *head, union skb_entry *list,
-                              unsigned short id)
+static void add_id_to_list(unsigned *head, unsigned short *list,
+                          unsigned short id)
 {
-       skb_entry_set_link(&list[id], *head);
+       list[id] = *head;
        *head = id;
 }
 
-static unsigned short get_id_from_freelist(unsigned *head,
-                                          union skb_entry *list)
+static unsigned short get_id_from_list(unsigned *head, unsigned short *list)
 {
        unsigned int id = *head;
-       *head = list[id].link;
+
+       if (id != TX_LINK_NONE) {
+               *head = list[id];
+               list[id] = TX_LINK_NONE;
+       }
        return id;
 }
 
@@ -363,7 +354,7 @@ static int xennet_open(struct net_device *dev)
        unsigned int i = 0;
        struct netfront_queue *queue = NULL;
 
-       if (!np->queues)
+       if (!np->queues || np->broken)
                return -ENODEV;
 
        for (i = 0; i < num_queues; ++i) {
@@ -391,27 +382,47 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
        unsigned short id;
        struct sk_buff *skb;
        bool more_to_do;
+       const struct device *dev = &queue->info->netdev->dev;
 
        BUG_ON(!netif_carrier_ok(queue->info->netdev));
 
        do {
                prod = queue->tx.sring->rsp_prod;
+               if (RING_RESPONSE_PROD_OVERFLOW(&queue->tx, prod)) {
+                       dev_alert(dev, "Illegal number of responses %u\n",
+                                 prod - queue->tx.rsp_cons);
+                       goto err;
+               }
                rmb(); /* Ensure we see responses up to 'rp'. */
 
                for (cons = queue->tx.rsp_cons; cons != prod; cons++) {
-                       struct xen_netif_tx_response *txrsp;
+                       struct xen_netif_tx_response txrsp;
 
-                       txrsp = RING_GET_RESPONSE(&queue->tx, cons);
-                       if (txrsp->status == XEN_NETIF_RSP_NULL)
+                       RING_COPY_RESPONSE(&queue->tx, cons, &txrsp);
+                       if (txrsp.status == XEN_NETIF_RSP_NULL)
                                continue;
 
-                       id  = txrsp->id;
-                       skb = queue->tx_skbs[id].skb;
+                       id = txrsp.id;
+                       if (id >= RING_SIZE(&queue->tx)) {
+                               dev_alert(dev,
+                                         "Response has incorrect id (%u)\n",
+                                         id);
+                               goto err;
+                       }
+                       if (queue->tx_link[id] != TX_PENDING) {
+                               dev_alert(dev,
+                                         "Response for inactive request\n");
+                               goto err;
+                       }
+
+                       queue->tx_link[id] = TX_LINK_NONE;
+                       skb = queue->tx_skbs[id];
+                       queue->tx_skbs[id] = NULL;
                        if (unlikely(gnttab_query_foreign_access(
                                queue->grant_tx_ref[id]) != 0)) {
-                               pr_alert("%s: warning -- grant still in use by backend domain\n",
-                                        __func__);
-                               BUG();
+                               dev_alert(dev,
+                                         "Grant still in use by backend domain\n");
+                               goto err;
                        }
                        gnttab_end_foreign_access_ref(
                                queue->grant_tx_ref[id], GNTMAP_readonly);
@@ -419,7 +430,7 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
                                &queue->gref_tx_head, queue->grant_tx_ref[id]);
                        queue->grant_tx_ref[id] = GRANT_INVALID_REF;
                        queue->grant_tx_page[id] = NULL;
-                       add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id);
+                       add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, id);
                        dev_kfree_skb_irq(skb);
                }
 
@@ -429,13 +440,20 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue)
        } while (more_to_do);
 
        xennet_maybe_wake_tx(queue);
+
+       return;
+
+ err:
+       queue->info->broken = true;
+       dev_alert(dev, "Disabled for further use\n");
 }
 
 struct xennet_gnttab_make_txreq {
        struct netfront_queue *queue;
        struct sk_buff *skb;
        struct page *page;
-       struct xen_netif_tx_request *tx; /* Last request */
+       struct xen_netif_tx_request *tx;      /* Last request on ring page */
+       struct xen_netif_tx_request tx_local; /* Last request local copy*/
        unsigned int size;
 };
 
@@ -451,7 +469,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
        struct netfront_queue *queue = info->queue;
        struct sk_buff *skb = info->skb;
 
-       id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
+       id = get_id_from_list(&queue->tx_skb_freelist, queue->tx_link);
        tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
        ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
        WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
@@ -459,34 +477,37 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
        gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
                                        gfn, GNTMAP_readonly);
 
-       queue->tx_skbs[id].skb = skb;
+       queue->tx_skbs[id] = skb;
        queue->grant_tx_page[id] = page;
        queue->grant_tx_ref[id] = ref;
 
-       tx->id = id;
-       tx->gref = ref;
-       tx->offset = offset;
-       tx->size = len;
-       tx->flags = 0;
+       info->tx_local.id = id;
+       info->tx_local.gref = ref;
+       info->tx_local.offset = offset;
+       info->tx_local.size = len;
+       info->tx_local.flags = 0;
+
+       *tx = info->tx_local;
+
+       /*
+        * Put the request in the pending queue, it will be set to be pending
+        * when the producer index is about to be raised.
+        */
+       add_id_to_list(&queue->tx_pend_queue, queue->tx_link, id);
 
        info->tx = tx;
-       info->size += tx->size;
+       info->size += info->tx_local.size;
 }
 
 static struct xen_netif_tx_request *xennet_make_first_txreq(
-       struct netfront_queue *queue, struct sk_buff *skb,
-       struct page *page, unsigned int offset, unsigned int len)
+       struct xennet_gnttab_make_txreq *info,
+       unsigned int offset, unsigned int len)
 {
-       struct xennet_gnttab_make_txreq info = {
-               .queue = queue,
-               .skb = skb,
-               .page = page,
-               .size = 0,
-       };
+       info->size = 0;
 
-       gnttab_for_one_grant(page, offset, len, xennet_tx_setup_grant, &info);
+       gnttab_for_one_grant(info->page, offset, len, xennet_tx_setup_grant, info);
 
-       return info.tx;
+       return info->tx;
 }
 
 static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
@@ -499,35 +520,27 @@ static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset,
        xennet_tx_setup_grant(gfn, offset, len, data);
 }
 
-static struct xen_netif_tx_request *xennet_make_txreqs(
-       struct netfront_queue *queue, struct xen_netif_tx_request *tx,
-       struct sk_buff *skb, struct page *page,
+static void xennet_make_txreqs(
+       struct xennet_gnttab_make_txreq *info,
+       struct page *page,
        unsigned int offset, unsigned int len)
 {
-       struct xennet_gnttab_make_txreq info = {
-               .queue = queue,
-               .skb = skb,
-               .tx = tx,
-       };
-
        /* Skip unused frames from start of page */
        page += offset >> PAGE_SHIFT;
        offset &= ~PAGE_MASK;
 
        while (len) {
-               info.page = page;
-               info.size = 0;
+               info->page = page;
+               info->size = 0;
 
                gnttab_foreach_grant_in_range(page, offset, len,
                                              xennet_make_one_txreq,
-                                             &info);
+                                             info);
 
                page++;
                offset = 0;
-               len -= info.size;
+               len -= info->size;
        }
-
-       return info.tx;
 }
 
 /*
@@ -574,19 +587,34 @@ static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
        return queue_idx;
 }
 
+static void xennet_mark_tx_pending(struct netfront_queue *queue)
+{
+       unsigned int i;
+
+       while ((i = get_id_from_list(&queue->tx_pend_queue, queue->tx_link)) !=
+              TX_LINK_NONE)
+               queue->tx_link[i] = TX_PENDING;
+}
+
 static int xennet_xdp_xmit_one(struct net_device *dev,
                               struct netfront_queue *queue,
                               struct xdp_frame *xdpf)
 {
        struct netfront_info *np = netdev_priv(dev);
        struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
+       struct xennet_gnttab_make_txreq info = {
+               .queue = queue,
+               .skb = NULL,
+               .page = virt_to_page(xdpf->data),
+       };
        int notify;
 
-       xennet_make_first_txreq(queue, NULL,
-                               virt_to_page(xdpf->data),
+       xennet_make_first_txreq(&info,
                                offset_in_page(xdpf->data),
                                xdpf->len);
 
+       xennet_mark_tx_pending(queue);
+
        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
        if (notify)
                notify_remote_via_irq(queue->tx_irq);
@@ -611,6 +639,8 @@ static int xennet_xdp_xmit(struct net_device *dev, int n,
        int nxmit = 0;
        int i;
 
+       if (unlikely(np->broken))
+               return -ENODEV;
        if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
                return -EINVAL;
 
@@ -638,7 +668,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
 {
        struct netfront_info *np = netdev_priv(dev);
        struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats);
-       struct xen_netif_tx_request *tx, *first_tx;
+       struct xen_netif_tx_request *first_tx;
        unsigned int i;
        int notify;
        int slots;
@@ -647,6 +677,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
        unsigned int len;
        unsigned long flags;
        struct netfront_queue *queue = NULL;
+       struct xennet_gnttab_make_txreq info = { };
        unsigned int num_queues = dev->real_num_tx_queues;
        u16 queue_index;
        struct sk_buff *nskb;
@@ -654,6 +685,8 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
        /* Drop the packet if no queues are set up */
        if (num_queues < 1)
                goto drop;
+       if (unlikely(np->broken))
+               goto drop;
        /* Determine which queue to transmit this SKB on */
        queue_index = skb_get_queue_mapping(skb);
        queue = &np->queues[queue_index];
@@ -704,21 +737,24 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
        }
 
        /* First request for the linear area. */
-       first_tx = tx = xennet_make_first_txreq(queue, skb,
-                                               page, offset, len);
-       offset += tx->size;
+       info.queue = queue;
+       info.skb = skb;
+       info.page = page;
+       first_tx = xennet_make_first_txreq(&info, offset, len);
+       offset += info.tx_local.size;
        if (offset == PAGE_SIZE) {
                page++;
                offset = 0;
        }
-       len -= tx->size;
+       len -= info.tx_local.size;
 
        if (skb->ip_summed == CHECKSUM_PARTIAL)
                /* local packet? */
-               tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated;
+               first_tx->flags |= XEN_NETTXF_csum_blank |
+                                  XEN_NETTXF_data_validated;
        else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
                /* remote but checksummed. */
-               tx->flags |= XEN_NETTXF_data_validated;
+               first_tx->flags |= XEN_NETTXF_data_validated;
 
        /* Optional extra info after the first request. */
        if (skb_shinfo(skb)->gso_size) {
@@ -727,7 +763,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
                gso = (struct xen_netif_extra_info *)
                        RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
 
-               tx->flags |= XEN_NETTXF_extra_info;
+               first_tx->flags |= XEN_NETTXF_extra_info;
 
                gso->u.gso.size = skb_shinfo(skb)->gso_size;
                gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
@@ -741,12 +777,12 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
        }
 
        /* Requests for the rest of the linear area. */
-       tx = xennet_make_txreqs(queue, tx, skb, page, offset, len);
+       xennet_make_txreqs(&info, page, offset, len);
 
        /* Requests for all the frags. */
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-               tx = xennet_make_txreqs(queue, tx, skb, skb_frag_page(frag),
+               xennet_make_txreqs(&info, skb_frag_page(frag),
                                        skb_frag_off(frag),
                                        skb_frag_size(frag));
        }
@@ -757,6 +793,8 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev
        /* timestamp packet in software */
        skb_tx_timestamp(skb);
 
+       xennet_mark_tx_pending(queue);
+
        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify);
        if (notify)
                notify_remote_via_irq(queue->tx_irq);
@@ -814,7 +852,7 @@ static int xennet_get_extras(struct netfront_queue *queue,
                             RING_IDX rp)
 
 {
-       struct xen_netif_extra_info *extra;
+       struct xen_netif_extra_info extra;
        struct device *dev = &queue->info->netdev->dev;
        RING_IDX cons = queue->rx.rsp_cons;
        int err = 0;
@@ -830,24 +868,22 @@ static int xennet_get_extras(struct netfront_queue *queue,
                        break;
                }
 
-               extra = (struct xen_netif_extra_info *)
-                       RING_GET_RESPONSE(&queue->rx, ++cons);
+               RING_COPY_RESPONSE(&queue->rx, ++cons, &extra);
 
-               if (unlikely(!extra->type ||
-                            extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+               if (unlikely(!extra.type ||
+                            extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
                        if (net_ratelimit())
                                dev_warn(dev, "Invalid extra type: %d\n",
-                                       extra->type);
+                                        extra.type);
                        err = -EINVAL;
                } else {
-                       memcpy(&extras[extra->type - 1], extra,
-                              sizeof(*extra));
+                       extras[extra.type - 1] = extra;
                }
 
                skb = xennet_get_rx_skb(queue, cons);
                ref = xennet_get_rx_ref(queue, cons);
                xennet_move_rx_slot(queue, skb, ref);
-       } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+       } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
 
        queue->rx.rsp_cons = cons;
        return err;
@@ -905,7 +941,7 @@ static int xennet_get_responses(struct netfront_queue *queue,
                                struct sk_buff_head *list,
                                bool *need_xdp_flush)
 {
-       struct xen_netif_rx_response *rx = &rinfo->rx;
+       struct xen_netif_rx_response *rx = &rinfo->rx, rx_local;
        int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
        RING_IDX cons = queue->rx.rsp_cons;
        struct sk_buff *skb = xennet_get_rx_skb(queue, cons);
@@ -989,7 +1025,8 @@ next:
                        break;
                }
 
-               rx = RING_GET_RESPONSE(&queue->rx, cons + slots);
+               RING_COPY_RESPONSE(&queue->rx, cons + slots, &rx_local);
+               rx = &rx_local;
                skb = xennet_get_rx_skb(queue, cons + slots);
                ref = xennet_get_rx_ref(queue, cons + slots);
                slots++;
@@ -1044,10 +1081,11 @@ static int xennet_fill_frags(struct netfront_queue *queue,
        struct sk_buff *nskb;
 
        while ((nskb = __skb_dequeue(list))) {
-               struct xen_netif_rx_response *rx =
-                       RING_GET_RESPONSE(&queue->rx, ++cons);
+               struct xen_netif_rx_response rx;
                skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
 
+               RING_COPY_RESPONSE(&queue->rx, ++cons, &rx);
+
                if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
                        unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
 
@@ -1062,7 +1100,7 @@ static int xennet_fill_frags(struct netfront_queue *queue,
 
                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
                                skb_frag_page(nfrag),
-                               rx->offset, rx->status, PAGE_SIZE);
+                               rx.offset, rx.status, PAGE_SIZE);
 
                skb_shinfo(nskb)->nr_frags = 0;
                kfree_skb(nskb);
@@ -1156,12 +1194,19 @@ static int xennet_poll(struct napi_struct *napi, int budget)
        skb_queue_head_init(&tmpq);
 
        rp = queue->rx.sring->rsp_prod;
+       if (RING_RESPONSE_PROD_OVERFLOW(&queue->rx, rp)) {
+               dev_alert(&dev->dev, "Illegal number of responses %u\n",
+                         rp - queue->rx.rsp_cons);
+               queue->info->broken = true;
+               spin_unlock(&queue->rx_lock);
+               return 0;
+       }
        rmb(); /* Ensure we see queued responses up to 'rp'. */
 
        i = queue->rx.rsp_cons;
        work_done = 0;
        while ((i != rp) && (work_done < budget)) {
-               memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx));
+               RING_COPY_RESPONSE(&queue->rx, i, rx);
                memset(extras, 0, sizeof(rinfo.extras));
 
                err = xennet_get_responses(queue, &rinfo, rp, &tmpq,
@@ -1286,17 +1331,18 @@ static void xennet_release_tx_bufs(struct netfront_queue *queue)
 
        for (i = 0; i < NET_TX_RING_SIZE; i++) {
                /* Skip over entries which are actually freelist references */
-               if (skb_entry_is_link(&queue->tx_skbs[i]))
+               if (!queue->tx_skbs[i])
                        continue;
 
-               skb = queue->tx_skbs[i].skb;
+               skb = queue->tx_skbs[i];
+               queue->tx_skbs[i] = NULL;
                get_page(queue->grant_tx_page[i]);
                gnttab_end_foreign_access(queue->grant_tx_ref[i],
                                          GNTMAP_readonly,
                                          (unsigned long)page_address(queue->grant_tx_page[i]));
                queue->grant_tx_page[i] = NULL;
                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
-               add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i);
+               add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, i);
                dev_kfree_skb_irq(skb);
        }
 }
@@ -1376,6 +1422,9 @@ static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id)
        struct netfront_queue *queue = dev_id;
        unsigned long flags;
 
+       if (queue->info->broken)
+               return IRQ_HANDLED;
+
        spin_lock_irqsave(&queue->tx_lock, flags);
        xennet_tx_buf_gc(queue);
        spin_unlock_irqrestore(&queue->tx_lock, flags);
@@ -1388,6 +1437,9 @@ static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id)
        struct netfront_queue *queue = dev_id;
        struct net_device *dev = queue->info->netdev;
 
+       if (queue->info->broken)
+               return IRQ_HANDLED;
+
        if (likely(netif_carrier_ok(dev) &&
                   RING_HAS_UNCONSUMED_RESPONSES(&queue->rx)))
                napi_schedule(&queue->napi);
@@ -1409,6 +1461,10 @@ static void xennet_poll_controller(struct net_device *dev)
        struct netfront_info *info = netdev_priv(dev);
        unsigned int num_queues = dev->real_num_tx_queues;
        unsigned int i;
+
+       if (info->broken)
+               return;
+
        for (i = 0; i < num_queues; ++i)
                xennet_interrupt(0, &info->queues[i]);
 }
@@ -1480,6 +1536,11 @@ static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 
 static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
+       struct netfront_info *np = netdev_priv(dev);
+
+       if (np->broken)
+               return -ENODEV;
+
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                return xennet_xdp_set(dev, xdp->prog, xdp->extack);
@@ -1853,13 +1914,15 @@ static int xennet_init_queue(struct netfront_queue *queue)
        snprintf(queue->name, sizeof(queue->name), "vif%s-q%u",
                 devid, queue->id);
 
-       /* Initialise tx_skbs as a free chain containing every entry. */
+       /* Initialise tx_skb_freelist as a free chain containing every entry. */
        queue->tx_skb_freelist = 0;
+       queue->tx_pend_queue = TX_LINK_NONE;
        for (i = 0; i < NET_TX_RING_SIZE; i++) {
-               skb_entry_set_link(&queue->tx_skbs[i], i+1);
+               queue->tx_link[i] = i + 1;
                queue->grant_tx_ref[i] = GRANT_INVALID_REF;
                queue->grant_tx_page[i] = NULL;
        }
+       queue->tx_link[NET_TX_RING_SIZE - 1] = TX_LINK_NONE;
 
        /* Clear out rx_skbs */
        for (i = 0; i < NET_RX_RING_SIZE; i++) {
@@ -2128,6 +2191,9 @@ static int talk_to_netback(struct xenbus_device *dev,
        if (info->queues)
                xennet_destroy_queues(info);
 
+       /* For the case of a reconnect reset the "broken" indicator. */
+       info->broken = false;
+
        err = xennet_create_queues(info, &num_queues);
        if (err < 0) {
                xenbus_dev_fatal(dev, err, "creating queues");
index 5287458..c6b3334 100644 (file)
@@ -38,7 +38,7 @@
 #define NCI_OP_PROP_SET_PDATA_OID              0x23
 
 struct fdp_nci_info {
-       struct nfc_phy_ops *phy_ops;
+       const struct nfc_phy_ops *phy_ops;
        struct fdp_i2c_phy *phy;
        struct nci_dev *ndev;
 
@@ -52,7 +52,7 @@ struct fdp_nci_info {
        u32 limited_otp_version;
        u8 key_index;
 
-       u8 *fw_vsc_cfg;
+       const u8 *fw_vsc_cfg;
        u8 clock_type;
        u32 clock_freq;
 
@@ -65,7 +65,7 @@ struct fdp_nci_info {
        wait_queue_head_t setup_wq;
 };
 
-static u8 nci_core_get_config_otp_ram_version[5] = {
+static const u8 nci_core_get_config_otp_ram_version[5] = {
        0x04,
        NCI_PARAM_ID_FW_RAM_VERSION,
        NCI_PARAM_ID_FW_OTP_VERSION,
@@ -111,7 +111,7 @@ static inline int fdp_nci_patch_cmd(struct nci_dev *ndev, u8 type)
 }
 
 static inline int fdp_nci_set_production_data(struct nci_dev *ndev, u8 len,
-                                             char *data)
+                                             const char *data)
 {
        return nci_prop_cmd(ndev, NCI_OP_PROP_SET_PDATA_OID, len, data);
 }
@@ -236,7 +236,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
 
 static int fdp_nci_open(struct nci_dev *ndev)
 {
-       struct fdp_nci_info *info = nci_get_drvdata(ndev);
+       const struct fdp_nci_info *info = nci_get_drvdata(ndev);
 
        return info->phy_ops->enable(info->phy);
 }
@@ -260,7 +260,7 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev)
 {
        struct fdp_nci_info *info = nci_get_drvdata(ndev);
        struct device *dev = &info->phy->i2c_dev->dev;
-       u8 *data;
+       const u8 *data;
        int r;
 
        r = request_firmware(&info->ram_patch, FDP_RAM_PATCH_NAME, dev);
@@ -269,15 +269,15 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev)
                return r;
        }
 
-       data = (u8 *) info->ram_patch->data;
+       data = info->ram_patch->data;
        info->ram_patch_version =
                data[FDP_FW_HEADER_SIZE] |
                (data[FDP_FW_HEADER_SIZE + 1] << 8) |
                (data[FDP_FW_HEADER_SIZE + 2] << 16) |
                (data[FDP_FW_HEADER_SIZE + 3] << 24);
 
-       dev_dbg(dev, "RAM patch version: %d, size: %d\n",
-                 info->ram_patch_version, (int) info->ram_patch->size);
+       dev_dbg(dev, "RAM patch version: %d, size: %zu\n",
+                 info->ram_patch_version, info->ram_patch->size);
 
 
        r = request_firmware(&info->otp_patch, FDP_OTP_PATCH_NAME, dev);
@@ -293,8 +293,8 @@ static int fdp_nci_request_firmware(struct nci_dev *ndev)
                (data[FDP_FW_HEADER_SIZE+2] << 16) |
                (data[FDP_FW_HEADER_SIZE+3] << 24);
 
-       dev_dbg(dev, "OTP patch version: %d, size: %d\n",
-                info->otp_patch_version, (int) info->otp_patch->size);
+       dev_dbg(dev, "OTP patch version: %d, size: %zu\n",
+                info->otp_patch_version, info->otp_patch->size);
        return 0;
 }
 
@@ -610,8 +610,9 @@ static int fdp_nci_core_get_config_rsp_packet(struct nci_dev *ndev,
 {
        struct fdp_nci_info *info = nci_get_drvdata(ndev);
        struct device *dev = &info->phy->i2c_dev->dev;
-       struct nci_core_get_config_rsp *rsp = (void *) skb->data;
-       u8 i, *p;
+       const struct nci_core_get_config_rsp *rsp = (void *) skb->data;
+       unsigned int i;
+       const u8 *p;
 
        if (rsp->status == NCI_STATUS_OK) {
 
@@ -651,7 +652,7 @@ static int fdp_nci_core_get_config_rsp_packet(struct nci_dev *ndev,
        return 0;
 }
 
-static struct nci_driver_ops fdp_core_ops[] = {
+static const struct nci_driver_ops fdp_core_ops[] = {
        {
                .opcode = NCI_OP_CORE_GET_CONFIG_RSP,
                .rsp = fdp_nci_core_get_config_rsp_packet,
@@ -662,7 +663,7 @@ static struct nci_driver_ops fdp_core_ops[] = {
        },
 };
 
-static struct nci_driver_ops fdp_prop_ops[] = {
+static const struct nci_driver_ops fdp_prop_ops[] = {
        {
                .opcode = nci_opcode_pack(NCI_GID_PROP, NCI_OP_PROP_PATCH_OID),
                .rsp = fdp_nci_prop_patch_rsp_packet,
@@ -675,7 +676,7 @@ static struct nci_driver_ops fdp_prop_ops[] = {
        },
 };
 
-static struct nci_ops nci_ops = {
+static const struct nci_ops nci_ops = {
        .open = fdp_nci_open,
        .close = fdp_nci_close,
        .send = fdp_nci_send,
@@ -687,10 +688,10 @@ static struct nci_ops nci_ops = {
        .n_core_ops = ARRAY_SIZE(fdp_core_ops),
 };
 
-int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
+int fdp_nci_probe(struct fdp_i2c_phy *phy, const struct nfc_phy_ops *phy_ops,
                        struct nci_dev **ndevp, int tx_headroom,
                        int tx_tailroom, u8 clock_type, u32 clock_freq,
-                       u8 *fw_vsc_cfg)
+                       const u8 *fw_vsc_cfg)
 {
        struct device *dev = &phy->i2c_dev->dev;
        struct fdp_nci_info *info;
@@ -718,6 +719,7 @@ int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
                    NFC_PROTO_NFC_DEP_MASK |
                    NFC_PROTO_ISO15693_MASK;
 
+       BUILD_BUG_ON(ARRAY_SIZE(fdp_prop_ops) > NCI_MAX_PROPRIETARY_CMD);
        ndev = nci_allocate_device(&nci_ops, protocols, tx_headroom,
                                   tx_tailroom);
        if (!ndev) {
index ead3b21..2e9161a 100644 (file)
@@ -21,9 +21,9 @@ struct fdp_i2c_phy {
        uint16_t next_read_size;
 };
 
-int fdp_nci_probe(struct fdp_i2c_phy *phy, struct nfc_phy_ops *phy_ops,
+int fdp_nci_probe(struct fdp_i2c_phy *phy, const struct nfc_phy_ops *phy_ops,
                  struct nci_dev **ndev, int tx_headroom, int tx_tailroom,
-                 u8 clock_type, u32 clock_freq, u8 *fw_vsc_cfg);
+                 u8 clock_type, u32 clock_freq, const u8 *fw_vsc_cfg);
 void fdp_nci_remove(struct nci_dev *ndev);
 
 #endif /* __LOCAL_FDP_H_ */
index c5596e5..051c43a 100644 (file)
@@ -36,7 +36,7 @@
        print_hex_dump(KERN_DEBUG, prefix": ", DUMP_PREFIX_OFFSET,      \
                       16, 1, (skb)->data, (skb)->len, 0)
 
-static void fdp_nci_i2c_reset(struct fdp_i2c_phy *phy)
+static void fdp_nci_i2c_reset(const struct fdp_i2c_phy *phy)
 {
        /* Reset RST/WakeUP for at least 100 micro-second */
        gpiod_set_value_cansleep(phy->power_gpio, FDP_POWER_OFF);
@@ -47,7 +47,7 @@ static void fdp_nci_i2c_reset(struct fdp_i2c_phy *phy)
 
 static int fdp_nci_i2c_enable(void *phy_id)
 {
-       struct fdp_i2c_phy *phy = phy_id;
+       const struct fdp_i2c_phy *phy = phy_id;
 
        fdp_nci_i2c_reset(phy);
 
@@ -56,7 +56,7 @@ static int fdp_nci_i2c_enable(void *phy_id)
 
 static void fdp_nci_i2c_disable(void *phy_id)
 {
-       struct fdp_i2c_phy *phy = phy_id;
+       const struct fdp_i2c_phy *phy = phy_id;
 
        fdp_nci_i2c_reset(phy);
 }
@@ -120,7 +120,7 @@ static int fdp_nci_i2c_write(void *phy_id, struct sk_buff *skb)
        return r;
 }
 
-static struct nfc_phy_ops i2c_phy_ops = {
+static const struct nfc_phy_ops i2c_phy_ops = {
        .write = fdp_nci_i2c_write,
        .enable = fdp_nci_i2c_enable,
        .disable = fdp_nci_i2c_disable,
index e56cea7..f9cca88 100644 (file)
@@ -202,7 +202,7 @@ err:
        return r;
 }
 
-static int mei_nfc_send(struct nfc_mei_phy *phy, u8 *buf, size_t length)
+static int mei_nfc_send(struct nfc_mei_phy *phy, const u8 *buf, size_t length)
 {
        struct mei_nfc_hdr *hdr;
        u8 *mei_buf;
@@ -362,7 +362,7 @@ static void nfc_mei_phy_disable(void *phy_id)
        phy->powered = 0;
 }
 
-struct nfc_phy_ops mei_phy_ops = {
+const struct nfc_phy_ops mei_phy_ops = {
        .write = nfc_mei_phy_write,
        .enable = nfc_mei_phy_enable,
        .disable = nfc_mei_phy_disable,
index 51bd44f..2b1edb3 100644 (file)
@@ -45,7 +45,7 @@ struct nfc_mei_phy {
        int hard_fault;
 };
 
-extern struct nfc_phy_ops mei_phy_ops;
+extern const struct nfc_phy_ops mei_phy_ops;
 
 struct nfc_mei_phy *nfc_mei_phy_alloc(struct mei_cl_device *device);
 void nfc_mei_phy_free(struct nfc_mei_phy *phy);
index dd78d98..86f593c 100644 (file)
@@ -73,7 +73,7 @@ static void microread_i2c_remove_len_crc(struct sk_buff *skb)
        skb_trim(skb, MICROREAD_I2C_FRAME_TAILROOM);
 }
 
-static int check_crc(struct sk_buff *skb)
+static int check_crc(const struct sk_buff *skb)
 {
        int i;
        u8 crc = 0;
@@ -225,7 +225,7 @@ static irqreturn_t microread_i2c_irq_thread_fn(int irq, void *phy_id)
        return IRQ_HANDLED;
 }
 
-static struct nfc_phy_ops i2c_phy_ops = {
+static const struct nfc_phy_ops i2c_phy_ops = {
        .write = microread_i2c_write,
        .enable = microread_i2c_enable,
        .disable = microread_i2c_disable,
index 8fa7771..8edf761 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
 #include <linux/nfc.h>
-#include <net/nfc/hci.h>
 #include <net/nfc/llc.h>
 
 #include "../mei_phy.h"
index b1d3975..bb4d029 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/nfc.h>
 #include <net/nfc/nfc.h>
 #include <net/nfc/hci.h>
-#include <net/nfc/llc.h>
 
 #include "microread.h"
 
 #define MICROREAD_ELT_ID_SE2 0x04
 #define MICROREAD_ELT_ID_SE3 0x05
 
-static struct nfc_hci_gate microread_gates[] = {
+static const struct nfc_hci_gate microread_gates[] = {
        {MICROREAD_GATE_ID_ADM, MICROREAD_PIPE_ID_ADMIN},
        {MICROREAD_GATE_ID_LOOPBACK, MICROREAD_PIPE_ID_HDS_LOOPBACK},
        {MICROREAD_GATE_ID_IDT, MICROREAD_PIPE_ID_HDS_IDT},
@@ -152,7 +151,7 @@ static struct nfc_hci_gate microread_gates[] = {
 #define MICROREAD_CMD_TAILROOM 2
 
 struct microread_info {
-       struct nfc_phy_ops *phy_ops;
+       const struct nfc_phy_ops *phy_ops;
        void *phy_id;
 
        struct nfc_hci_dev *hdev;
@@ -358,7 +357,7 @@ static int microread_complete_target_discovered(struct nfc_hci_dev *hdev,
 static void microread_im_transceive_cb(void *context, struct sk_buff *skb,
                                       int err)
 {
-       struct microread_info *info = context;
+       const struct microread_info *info = context;
 
        switch (info->async_cb_type) {
        case MICROREAD_CB_TYPE_READER_ALL:
@@ -625,7 +624,7 @@ static int microread_event_received(struct nfc_hci_dev *hdev, u8 pipe,
        return r;
 }
 
-static struct nfc_hci_ops microread_hci_ops = {
+static const struct nfc_hci_ops microread_hci_ops = {
        .open = microread_open,
        .close = microread_close,
        .hci_ready = microread_hci_ready,
@@ -641,9 +640,9 @@ static struct nfc_hci_ops microread_hci_ops = {
        .event_received = microread_event_received,
 };
 
-int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
-                   int phy_headroom, int phy_tailroom, int phy_payload,
-                   struct nfc_hci_dev **hdev)
+int microread_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+                   const char *llc_name, int phy_headroom, int phy_tailroom,
+                   int phy_payload, struct nfc_hci_dev **hdev)
 {
        struct microread_info *info;
        unsigned long quirks = 0;
index 044f5e4..2ee7ccf 100644 (file)
@@ -10,9 +10,9 @@
 
 #define DRIVER_DESC "NFC driver for microread"
 
-int microread_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
-                   int phy_headroom, int phy_tailroom, int phy_payload,
-                   struct nfc_hci_dev **hdev);
+int microread_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+                   const char *llc_name, int phy_headroom, int phy_tailroom,
+                   int phy_payload, struct nfc_hci_dev **hdev);
 
 void microread_remove(struct nfc_hci_dev *hdev);
 
index aaccb8b..edac56b 100644 (file)
@@ -129,7 +129,7 @@ static void fw_dnld_timeout(struct timer_list *t)
 }
 
 static int process_state_reset(struct nfcmrvl_private *priv,
-                              struct sk_buff *skb)
+                              const struct sk_buff *skb)
 {
        if (sizeof(nci_pattern_core_reset_ntf) != skb->len ||
            memcmp(skb->data, nci_pattern_core_reset_ntf,
@@ -145,7 +145,8 @@ static int process_state_reset(struct nfcmrvl_private *priv,
        return 0;
 }
 
-static int process_state_init(struct nfcmrvl_private *priv, struct sk_buff *skb)
+static int process_state_init(struct nfcmrvl_private *priv,
+                             const struct sk_buff *skb)
 {
        struct nci_core_set_config_cmd cmd;
 
@@ -175,7 +176,7 @@ static void create_lc(struct nfcmrvl_private *priv)
 }
 
 static int process_state_set_ref_clock(struct nfcmrvl_private *priv,
-                                      struct sk_buff *skb)
+                                      const struct sk_buff *skb)
 {
        struct nci_core_set_config_cmd cmd;
 
@@ -221,7 +222,7 @@ static int process_state_set_ref_clock(struct nfcmrvl_private *priv,
 }
 
 static int process_state_set_hi_config(struct nfcmrvl_private *priv,
-                                      struct sk_buff *skb)
+                                      const struct sk_buff *skb)
 {
        if (sizeof(nci_pattern_core_set_config_rsp) != skb->len ||
            memcmp(skb->data, nci_pattern_core_set_config_rsp, skb->len))
@@ -232,7 +233,7 @@ static int process_state_set_hi_config(struct nfcmrvl_private *priv,
 }
 
 static int process_state_open_lc(struct nfcmrvl_private *priv,
-                                struct sk_buff *skb)
+                                const struct sk_buff *skb)
 {
        if (sizeof(nci_pattern_core_conn_create_rsp) >= skb->len ||
            memcmp(skb->data, nci_pattern_core_conn_create_rsp,
@@ -347,7 +348,7 @@ static int process_state_fw_dnld(struct nfcmrvl_private *priv,
 }
 
 static int process_state_close_lc(struct nfcmrvl_private *priv,
-                                 struct sk_buff *skb)
+                                 const struct sk_buff *skb)
 {
        if (sizeof(nci_pattern_core_conn_close_rsp) != skb->len ||
            memcmp(skb->data, nci_pattern_core_conn_close_rsp, skb->len))
@@ -358,7 +359,8 @@ static int process_state_close_lc(struct nfcmrvl_private *priv,
        return 0;
 }
 
-static int process_state_boot(struct nfcmrvl_private *priv, struct sk_buff *skb)
+static int process_state_boot(struct nfcmrvl_private *priv,
+                             const struct sk_buff *skb)
 {
        if (sizeof(nci_pattern_proprietary_boot_rsp) != skb->len ||
            memcmp(skb->data, nci_pattern_proprietary_boot_rsp, skb->len))
index 59a529e..ceef81d 100644 (file)
@@ -8,12 +8,9 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
-#include <linux/pm_runtime.h>
 #include <linux/nfc.h>
-#include <linux/gpio.h>
 #include <linux/delay.h>
 #include <linux/of_irq.h>
-#include <linux/of_gpio.h>
 #include <net/nfc/nci.h>
 #include <net/nfc/nci_core.h>
 #include "nfcmrvl.h"
@@ -146,7 +143,7 @@ static void nfcmrvl_i2c_nci_update_config(struct nfcmrvl_private *priv,
 {
 }
 
-static struct nfcmrvl_if_ops i2c_ops = {
+static const struct nfcmrvl_if_ops i2c_ops = {
        .nci_open = nfcmrvl_i2c_nci_open,
        .nci_close = nfcmrvl_i2c_nci_close,
        .nci_send = nfcmrvl_i2c_nci_send,
@@ -182,8 +179,8 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node,
 static int nfcmrvl_i2c_probe(struct i2c_client *client,
                             const struct i2c_device_id *id)
 {
+       const struct nfcmrvl_platform_data *pdata;
        struct nfcmrvl_i2c_drv_data *drv_data;
-       struct nfcmrvl_platform_data *pdata;
        struct nfcmrvl_platform_data config;
        int ret;
 
index a4620b4..2fcf545 100644 (file)
@@ -81,7 +81,7 @@ static int nfcmrvl_nci_fw_download(struct nci_dev *ndev,
        return nfcmrvl_fw_dnld_start(ndev, firmware_name);
 }
 
-static struct nci_ops nfcmrvl_nci_ops = {
+static const struct nci_ops nfcmrvl_nci_ops = {
        .open = nfcmrvl_nci_open,
        .close = nfcmrvl_nci_close,
        .send = nfcmrvl_nci_send,
@@ -91,9 +91,9 @@ static struct nci_ops nfcmrvl_nci_ops = {
 
 struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
                                void *drv_data,
-                               struct nfcmrvl_if_ops *ops,
+                               const struct nfcmrvl_if_ops *ops,
                                struct device *dev,
-                               struct nfcmrvl_platform_data *pdata)
+                               const struct nfcmrvl_platform_data *pdata)
 {
        struct nfcmrvl_private *priv;
        int rc;
index a715543..165bd0a 100644 (file)
@@ -77,7 +77,7 @@ struct nfcmrvl_private {
        /* PHY type */
        enum nfcmrvl_phy phy;
        /* Low level driver ops */
-       struct nfcmrvl_if_ops *if_ops;
+       const struct nfcmrvl_if_ops *if_ops;
 };
 
 struct nfcmrvl_if_ops {
@@ -92,9 +92,9 @@ void nfcmrvl_nci_unregister_dev(struct nfcmrvl_private *priv);
 int nfcmrvl_nci_recv_frame(struct nfcmrvl_private *priv, struct sk_buff *skb);
 struct nfcmrvl_private *nfcmrvl_nci_register_dev(enum nfcmrvl_phy phy,
                                void *drv_data,
-                               struct nfcmrvl_if_ops *ops,
+                               const struct nfcmrvl_if_ops *ops,
                                struct device *dev,
-                               struct nfcmrvl_platform_data *pdata);
+                               const struct nfcmrvl_platform_data *pdata);
 
 
 void nfcmrvl_chip_reset(struct nfcmrvl_private *priv);
index 6669632..5b833a9 100644 (file)
@@ -7,11 +7,8 @@
 
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/pm_runtime.h>
 #include <linux/nfc.h>
-#include <linux/gpio.h>
 #include <linux/of_irq.h>
-#include <linux/of_gpio.h>
 #include <net/nfc/nci.h>
 #include <net/nfc/nci_core.h>
 #include <linux/spi/spi.h>
@@ -99,7 +96,7 @@ static void nfcmrvl_spi_nci_update_config(struct nfcmrvl_private *priv,
        drv_data->nci_spi->xfer_speed_hz = config->clk;
 }
 
-static struct nfcmrvl_if_ops spi_ops = {
+static const struct nfcmrvl_if_ops spi_ops = {
        .nci_open = nfcmrvl_spi_nci_open,
        .nci_close = nfcmrvl_spi_nci_close,
        .nci_send = nfcmrvl_spi_nci_send,
@@ -129,7 +126,7 @@ static int nfcmrvl_spi_parse_dt(struct device_node *node,
 
 static int nfcmrvl_spi_probe(struct spi_device *spi)
 {
-       struct nfcmrvl_platform_data *pdata;
+       const struct nfcmrvl_platform_data *pdata;
        struct nfcmrvl_platform_data config;
        struct nfcmrvl_spi_drv_data *drv_data;
        int ret = 0;
index 50d86c9..9c92cbd 100644 (file)
@@ -49,7 +49,7 @@ static void nfcmrvl_uart_nci_update_config(struct nfcmrvl_private *priv,
                            config->flow_control);
 }
 
-static struct nfcmrvl_if_ops uart_ops = {
+static const struct nfcmrvl_if_ops uart_ops = {
        .nci_open = nfcmrvl_uart_nci_open,
        .nci_close = nfcmrvl_uart_nci_close,
        .nci_send = nfcmrvl_uart_nci_send,
@@ -98,8 +98,8 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
 static int nfcmrvl_nci_uart_open(struct nci_uart *nu)
 {
        struct nfcmrvl_private *priv;
-       struct nfcmrvl_platform_data *pdata = NULL;
        struct nfcmrvl_platform_data config;
+       const struct nfcmrvl_platform_data *pdata = NULL;
        struct device *dev = nu->tty->dev;
 
        /*
index 9d649b4..a99aedf 100644 (file)
@@ -264,7 +264,7 @@ done:
        return err;
 }
 
-static struct nfcmrvl_if_ops usb_ops = {
+static const struct nfcmrvl_if_ops usb_ops = {
        .nci_open = nfcmrvl_usb_nci_open,
        .nci_close = nfcmrvl_usb_nci_close,
        .nci_send = nfcmrvl_usb_nci_send,
index dd27c85..85bf8d5 100644 (file)
@@ -239,7 +239,7 @@ static int nfcsim_send(struct nfc_digital_dev *ddev, struct sk_buff *skb,
 
 static void nfcsim_abort_cmd(struct nfc_digital_dev *ddev)
 {
-       struct nfcsim *dev = nfc_digital_get_drvdata(ddev);
+       const struct nfcsim *dev = nfc_digital_get_drvdata(ddev);
 
        nfcsim_link_recv_cancel(dev->link_in);
 }
@@ -319,7 +319,7 @@ static int nfcsim_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
        return nfcsim_send(ddev, NULL, timeout, cb, arg);
 }
 
-static struct nfc_digital_ops nfcsim_digital_ops = {
+static const struct nfc_digital_ops nfcsim_digital_ops = {
        .in_configure_hw = nfcsim_in_configure_hw,
        .in_send_cmd = nfcsim_in_send_cmd,
 
index 2b0c723..518e2af 100644 (file)
@@ -83,7 +83,7 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
        return r;
 }
 
-static struct nci_ops nxp_nci_ops = {
+static const struct nci_ops nxp_nci_ops = {
        .open = nxp_nci_open,
        .close = nxp_nci_close,
        .send = nxp_nci_send,
index cd64bfe..2f3f3fe 100644 (file)
@@ -2623,7 +2623,7 @@ static int pn533_dev_down(struct nfc_dev *nfc_dev)
        return ret;
 }
 
-static struct nfc_ops pn533_nfc_ops = {
+static const struct nfc_ops pn533_nfc_ops = {
        .dev_up = pn533_dev_up,
        .dev_down = pn533_dev_down,
        .dep_link_up = pn533_dep_link_up,
index de59e43..37d26f0 100644 (file)
@@ -515,7 +515,7 @@ static irqreturn_t pn544_hci_i2c_irq_thread_fn(int irq, void *phy_id)
        return IRQ_HANDLED;
 }
 
-static struct nfc_phy_ops i2c_phy_ops = {
+static const struct nfc_phy_ops i2c_phy_ops = {
        .write = pn544_hci_i2c_write,
        .enable = pn544_hci_i2c_enable,
        .disable = pn544_hci_i2c_disable,
index b788870..32a61a1 100644 (file)
@@ -13,7 +13,6 @@
 
 #include <linux/nfc.h>
 #include <net/nfc/hci.h>
-#include <net/nfc/llc.h>
 
 #include "pn544.h"
 
@@ -86,7 +85,7 @@ enum pn544_state {
 #define PN544_HCI_CMD_ATTREQUEST               0x12
 #define PN544_HCI_CMD_CONTINUE_ACTIVATION      0x13
 
-static struct nfc_hci_gate pn544_gates[] = {
+static const struct nfc_hci_gate pn544_gates[] = {
        {NFC_HCI_ADMIN_GATE, NFC_HCI_INVALID_PIPE},
        {NFC_HCI_LOOPBACK_GATE, NFC_HCI_INVALID_PIPE},
        {NFC_HCI_ID_MGMT_GATE, NFC_HCI_INVALID_PIPE},
@@ -108,7 +107,7 @@ static struct nfc_hci_gate pn544_gates[] = {
 #define PN544_CMDS_HEADROOM    2
 
 struct pn544_hci_info {
-       struct nfc_phy_ops *phy_ops;
+       const struct nfc_phy_ops *phy_ops;
        void *phy_id;
 
        struct nfc_hci_dev *hdev;
@@ -809,7 +808,7 @@ static int pn544_hci_discover_se(struct nfc_hci_dev *hdev)
 #define PN544_SE_MODE_ON       0x01
 static int pn544_hci_enable_se(struct nfc_hci_dev *hdev, u32 se_idx)
 {
-       struct nfc_se *se;
+       const struct nfc_se *se;
        u8 enable = PN544_SE_MODE_ON;
        static struct uicc_gatelist {
                u8 head;
@@ -864,7 +863,7 @@ static int pn544_hci_enable_se(struct nfc_hci_dev *hdev, u32 se_idx)
 
 static int pn544_hci_disable_se(struct nfc_hci_dev *hdev, u32 se_idx)
 {
-       struct nfc_se *se;
+       const struct nfc_se *se;
        u8 disable = PN544_SE_MODE_OFF;
 
        se = nfc_find_se(hdev->ndev, se_idx);
@@ -881,7 +880,7 @@ static int pn544_hci_disable_se(struct nfc_hci_dev *hdev, u32 se_idx)
        }
 }
 
-static struct nfc_hci_ops pn544_hci_ops = {
+static const struct nfc_hci_ops pn544_hci_ops = {
        .open = pn544_hci_open,
        .close = pn544_hci_close,
        .hci_ready = pn544_hci_ready,
@@ -901,9 +900,10 @@ static struct nfc_hci_ops pn544_hci_ops = {
        .disable_se = pn544_hci_disable_se,
 };
 
-int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
-                   int phy_headroom, int phy_tailroom, int phy_payload,
-                   fw_download_t fw_download, struct nfc_hci_dev **hdev)
+int pn544_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+                   char *llc_name, int phy_headroom, int phy_tailroom,
+                   int phy_payload, fw_download_t fw_download,
+                   struct nfc_hci_dev **hdev)
 {
        struct pn544_hci_info *info;
        u32 protocols;
index 5634ba2..c6fe3e1 100644 (file)
 typedef int (*fw_download_t)(void *context, const char *firmware_name,
                                u8 hw_variant);
 
-int pn544_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops, char *llc_name,
-                   int phy_headroom, int phy_tailroom, int phy_payload,
-                   fw_download_t fw_download, struct nfc_hci_dev **hdev);
+int pn544_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+                   char *llc_name, int phy_headroom, int phy_tailroom,
+                   int phy_payload, fw_download_t fw_download,
+                   struct nfc_hci_dev **hdev);
 void pn544_hci_remove(struct nfc_hci_dev *hdev);
 
 #endif /* __LOCAL_PN544_H_ */
index 4df926c..517376c 100644 (file)
@@ -217,7 +217,7 @@ struct port100_protocol {
        u8 value;
 } __packed;
 
-static struct port100_protocol
+static const struct port100_protocol
 in_protocols[][PORT100_IN_MAX_NUM_PROTOCOLS + 1] = {
        [NFC_DIGITAL_FRAMING_NFCA_SHORT] = {
                { PORT100_IN_PROT_INITIAL_GUARD_TIME,      6 },
@@ -391,7 +391,7 @@ in_protocols[][PORT100_IN_MAX_NUM_PROTOCOLS + 1] = {
        },
 };
 
-static struct port100_protocol
+static const struct port100_protocol
 tg_protocols[][PORT100_TG_MAX_NUM_PROTOCOLS + 1] = {
        [NFC_DIGITAL_FRAMING_NFCA_SHORT] = {
                { PORT100_TG_PROT_END, 0 },
@@ -526,7 +526,7 @@ static inline u8 port100_checksum(u16 value)
 }
 
 /* The rule: sum(data elements) + checksum = 0 */
-static u8 port100_data_checksum(u8 *data, int datalen)
+static u8 port100_data_checksum(const u8 *data, int datalen)
 {
        u8 sum = 0;
        int i;
@@ -568,10 +568,10 @@ static void port100_tx_update_payload_len(void *_frame, int len)
        le16_add_cpu(&frame->datalen, len);
 }
 
-static bool port100_rx_frame_is_valid(void *_frame)
+static bool port100_rx_frame_is_valid(const void *_frame)
 {
        u8 checksum;
-       struct port100_frame *frame = _frame;
+       const struct port100_frame *frame = _frame;
 
        if (frame->start_frame != cpu_to_be16(PORT100_FRAME_SOF) ||
            frame->extended_frame != cpu_to_be16(PORT100_FRAME_EXT))
@@ -589,23 +589,24 @@ static bool port100_rx_frame_is_valid(void *_frame)
        return true;
 }
 
-static bool port100_rx_frame_is_ack(struct port100_ack_frame *frame)
+static bool port100_rx_frame_is_ack(const struct port100_ack_frame *frame)
 {
        return (frame->start_frame == cpu_to_be16(PORT100_FRAME_SOF) &&
                frame->ack_frame == cpu_to_be16(PORT100_FRAME_ACK));
 }
 
-static inline int port100_rx_frame_size(void *frame)
+static inline int port100_rx_frame_size(const void *frame)
 {
-       struct port100_frame *f = frame;
+       const struct port100_frame *f = frame;
 
        return sizeof(struct port100_frame) + le16_to_cpu(f->datalen) +
               PORT100_FRAME_TAIL_LEN;
 }
 
-static bool port100_rx_frame_is_cmd_response(struct port100 *dev, void *frame)
+static bool port100_rx_frame_is_cmd_response(const struct port100 *dev,
+                                            const void *frame)
 {
-       struct port100_frame *f = frame;
+       const struct port100_frame *f = frame;
 
        return (PORT100_FRAME_CMD(f) == PORT100_CMD_RESPONSE(dev->cmd->code));
 }
@@ -655,7 +656,8 @@ sched_wq:
        schedule_work(&dev->cmd_complete_work);
 }
 
-static int port100_submit_urb_for_response(struct port100 *dev, gfp_t flags)
+static int port100_submit_urb_for_response(const struct port100 *dev,
+                                          gfp_t flags)
 {
        dev->in_urb->complete = port100_recv_response;
 
@@ -666,7 +668,7 @@ static void port100_recv_ack(struct urb *urb)
 {
        struct port100 *dev = urb->context;
        struct port100_cmd *cmd = dev->cmd;
-       struct port100_ack_frame *in_frame;
+       const struct port100_ack_frame *in_frame;
        int rc;
 
        cmd->status = urb->status;
@@ -708,7 +710,7 @@ sched_wq:
        schedule_work(&dev->cmd_complete_work);
 }
 
-static int port100_submit_urb_for_ack(struct port100 *dev, gfp_t flags)
+static int port100_submit_urb_for_ack(const struct port100 *dev, gfp_t flags)
 {
        dev->in_urb->complete = port100_recv_ack;
 
@@ -753,8 +755,9 @@ static int port100_send_ack(struct port100 *dev)
        return rc;
 }
 
-static int port100_send_frame_async(struct port100 *dev, struct sk_buff *out,
-                                   struct sk_buff *in, int in_len)
+static int port100_send_frame_async(struct port100 *dev,
+                                   const struct sk_buff *out,
+                                   const struct sk_buff *in, int in_len)
 {
        int rc;
 
@@ -960,7 +963,7 @@ static void port100_abort_cmd(struct nfc_digital_dev *ddev)
        usb_kill_urb(dev->in_urb);
 }
 
-static struct sk_buff *port100_alloc_skb(struct port100 *dev, unsigned int size)
+static struct sk_buff *port100_alloc_skb(const struct port100 *dev, unsigned int size)
 {
        struct sk_buff *skb;
 
@@ -1098,7 +1101,7 @@ static int port100_in_set_rf(struct nfc_digital_dev *ddev, u8 rf)
 static int port100_in_set_framing(struct nfc_digital_dev *ddev, int param)
 {
        struct port100 *dev = nfc_digital_get_drvdata(ddev);
-       struct port100_protocol *protocols;
+       const struct port100_protocol *protocols;
        struct sk_buff *skb;
        struct sk_buff *resp;
        int num_protocols;
@@ -1152,7 +1155,7 @@ static int port100_in_configure_hw(struct nfc_digital_dev *ddev, int type,
 static void port100_in_comm_rf_complete(struct port100 *dev, void *arg,
                                       struct sk_buff *resp)
 {
-       struct port100_cb_arg *cb_arg = arg;
+       const struct port100_cb_arg *cb_arg = arg;
        nfc_digital_cmd_complete_t cb = cb_arg->complete_cb;
        u32 status;
        int rc;
@@ -1255,7 +1258,7 @@ static int port100_tg_set_rf(struct nfc_digital_dev *ddev, u8 rf)
 static int port100_tg_set_framing(struct nfc_digital_dev *ddev, int param)
 {
        struct port100 *dev = nfc_digital_get_drvdata(ddev);
-       struct port100_protocol *protocols;
+       const struct port100_protocol *protocols;
        struct sk_buff *skb;
        struct sk_buff *resp;
        int rc;
@@ -1330,7 +1333,7 @@ static void port100_tg_comm_rf_complete(struct port100 *dev, void *arg,
                                        struct sk_buff *resp)
 {
        u32 status;
-       struct port100_cb_arg *cb_arg = arg;
+       const struct port100_cb_arg *cb_arg = arg;
        nfc_digital_cmd_complete_t cb = cb_arg->complete_cb;
        struct port100_tg_comm_rf_res *hdr;
 
@@ -1453,7 +1456,7 @@ static int port100_listen_mdaa(struct nfc_digital_dev *ddev,
 static int port100_listen(struct nfc_digital_dev *ddev, u16 timeout,
                          nfc_digital_cmd_complete_t cb, void *arg)
 {
-       struct port100 *dev = nfc_digital_get_drvdata(ddev);
+       const struct port100 *dev = nfc_digital_get_drvdata(ddev);
        struct sk_buff *skb;
 
        skb = port100_alloc_skb(dev, 0);
@@ -1463,7 +1466,7 @@ static int port100_listen(struct nfc_digital_dev *ddev, u16 timeout,
        return port100_tg_send_cmd(ddev, skb, timeout, cb, arg);
 }
 
-static struct nfc_digital_ops port100_digital_ops = {
+static const struct nfc_digital_ops port100_digital_ops = {
        .in_configure_hw = port100_in_configure_hw,
        .in_send_cmd = port100_in_send_cmd,
 
index 865d3e3..1c41200 100644 (file)
@@ -143,11 +143,13 @@ static int s3fwrn5_nci_post_setup(struct nci_dev *ndev)
        return nci_core_init(info->ndev);
 }
 
-static struct nci_ops s3fwrn5_nci_ops = {
+static const struct nci_ops s3fwrn5_nci_ops = {
        .open = s3fwrn5_nci_open,
        .close = s3fwrn5_nci_close,
        .send = s3fwrn5_nci_send,
        .post_setup = s3fwrn5_nci_post_setup,
+       .prop_ops = s3fwrn5_nci_prop_ops,
+       .n_prop_ops = ARRAY_SIZE(s3fwrn5_nci_prop_ops),
 };
 
 int s3fwrn5_probe(struct nci_dev **ndev, void *phy_id, struct device *pdev,
@@ -167,9 +169,6 @@ int s3fwrn5_probe(struct nci_dev **ndev, void *phy_id, struct device *pdev,
 
        s3fwrn5_set_mode(info, S3FWRN5_MODE_COLD);
 
-       s3fwrn5_nci_get_prop_ops(&s3fwrn5_nci_ops.prop_ops,
-               &s3fwrn5_nci_ops.n_prop_ops);
-
        info->ndev = nci_allocate_device(&s3fwrn5_nci_ops,
                S3FWRN5_NFC_PROTOCOLS, 0, 0);
        if (!info->ndev)
index e3e72b8..1af7a1e 100644 (file)
@@ -421,10 +421,9 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
 
        tfm = crypto_alloc_shash("sha1", 0, 0);
        if (IS_ERR(tfm)) {
-               ret = PTR_ERR(tfm);
                dev_err(&fw_info->ndev->nfc_dev->dev,
                        "Cannot allocate shash (code=%pe)\n", tfm);
-               goto out;
+               return PTR_ERR(tfm);
        }
 
        ret = crypto_shash_tfm_digest(tfm, fw->image, image_size, hash_data);
@@ -433,7 +432,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
        if (ret) {
                dev_err(&fw_info->ndev->nfc_dev->dev,
                        "Cannot compute hash (code=%d)\n", ret);
-               goto out;
+               return ret;
        }
 
        /* Firmware update process */
@@ -446,7 +445,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
        if (ret < 0) {
                dev_err(&fw_info->ndev->nfc_dev->dev,
                        "Unable to enter update mode\n");
-               goto out;
+               return ret;
        }
 
        for (off = 0; off < image_size; off += fw_info->sector_size) {
@@ -455,7 +454,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
                if (ret < 0) {
                        dev_err(&fw_info->ndev->nfc_dev->dev,
                                "Firmware update error (code=%d)\n", ret);
-                       goto out;
+                       return ret;
                }
        }
 
@@ -463,13 +462,12 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
        if (ret < 0) {
                dev_err(&fw_info->ndev->nfc_dev->dev,
                        "Unable to complete update mode\n");
-               goto out;
+               return ret;
        }
 
        dev_info(&fw_info->ndev->nfc_dev->dev,
                "Firmware update: success\n");
 
-out:
        return ret;
 }
 
index f042d3e..e374e67 100644 (file)
@@ -20,7 +20,7 @@ static int s3fwrn5_nci_prop_rsp(struct nci_dev *ndev, struct sk_buff *skb)
        return 0;
 }
 
-static struct nci_driver_ops s3fwrn5_nci_prop_ops[] = {
+const struct nci_driver_ops s3fwrn5_nci_prop_ops[4] = {
        {
                .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY,
                                NCI_PROP_SET_RFREG),
@@ -43,12 +43,6 @@ static struct nci_driver_ops s3fwrn5_nci_prop_ops[] = {
        },
 };
 
-void s3fwrn5_nci_get_prop_ops(struct nci_driver_ops **ops, size_t *n)
-{
-       *ops = s3fwrn5_nci_prop_ops;
-       *n = ARRAY_SIZE(s3fwrn5_nci_prop_ops);
-}
-
 #define S3FWRN5_RFREG_SECTION_SIZE 252
 
 int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name)
index a80f0fb..c2d9065 100644 (file)
@@ -50,7 +50,7 @@ struct nci_prop_fw_cfg_rsp {
        __u8 status;
 };
 
-void s3fwrn5_nci_get_prop_ops(struct nci_driver_ops **ops, size_t *n);
+extern const struct nci_driver_ops s3fwrn5_nci_prop_ops[4];
 int s3fwrn5_nci_rf_configure(struct s3fwrn5_info *info, const char *fw_name);
 
 #endif /* __LOCAL_S3FWRN5_NCI_H_ */
index 110ff12..a367136 100644 (file)
@@ -9,8 +9,6 @@
 #include <linux/nfc.h>
 #include <net/nfc/nci.h>
 #include <net/nfc/nci_core.h>
-#include <linux/gpio.h>
-#include <linux/delay.h>
 
 #include "st-nci.h"
 
@@ -86,7 +84,7 @@ static int st_nci_prop_rsp_packet(struct nci_dev *ndev,
        return 0;
 }
 
-static struct nci_driver_ops st_nci_prop_ops[] = {
+static const struct nci_driver_ops st_nci_prop_ops[] = {
        {
                .opcode = nci_opcode_pack(NCI_GID_PROPRIETARY,
                                          ST_NCI_CORE_PROP),
@@ -94,7 +92,7 @@ static struct nci_driver_ops st_nci_prop_ops[] = {
        },
 };
 
-static struct nci_ops st_nci_ops = {
+static const struct nci_ops st_nci_ops = {
        .init = st_nci_init,
        .open = st_nci_open,
        .close = st_nci_close,
@@ -131,6 +129,7 @@ int st_nci_probe(struct llt_ndlc *ndlc, int phy_headroom,
                | NFC_PROTO_ISO15693_MASK
                | NFC_PROTO_NFC_DEP_MASK;
 
+       BUILD_BUG_ON(ARRAY_SIZE(st_nci_prop_ops) > NCI_MAX_PROPRIETARY_CMD);
        ndlc->ndev = nci_allocate_device(&st_nci_ops, protocols,
                                        phy_headroom, phy_tailroom);
        if (!ndlc->ndev) {
index 4698140..ccf6152 100644 (file)
@@ -186,7 +186,7 @@ static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id)
        return IRQ_HANDLED;
 }
 
-static struct nfc_phy_ops i2c_phy_ops = {
+static const struct nfc_phy_ops i2c_phy_ops = {
        .write = st_nci_i2c_write,
        .enable = st_nci_i2c_enable,
        .disable = st_nci_i2c_disable,
index 5d74c67..e9dc313 100644 (file)
@@ -253,9 +253,9 @@ static void ndlc_t2_timeout(struct timer_list *t)
        schedule_work(&ndlc->sm_work);
 }
 
-int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev,
-              int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id,
-              struct st_nci_se_status *se_status)
+int ndlc_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+              struct device *dev, int phy_headroom, int phy_tailroom,
+              struct llt_ndlc **ndlc_id, struct st_nci_se_status *se_status)
 {
        struct llt_ndlc *ndlc;
 
index 066e2fd..c24ce9b 100644 (file)
@@ -16,7 +16,7 @@ struct st_nci_se_status;
 /* Low Level Transport description */
 struct llt_ndlc {
        struct nci_dev *ndev;
-       struct nfc_phy_ops *ops;
+       const struct nfc_phy_ops *ops;
        void *phy_id;
 
        struct timer_list t1_timer;
@@ -45,8 +45,8 @@ int ndlc_open(struct llt_ndlc *ndlc);
 void ndlc_close(struct llt_ndlc *ndlc);
 int ndlc_send(struct llt_ndlc *ndlc, struct sk_buff *skb);
 void ndlc_recv(struct llt_ndlc *ndlc, struct sk_buff *skb);
-int ndlc_probe(void *phy_id, struct nfc_phy_ops *phy_ops, struct device *dev,
-              int phy_headroom, int phy_tailroom, struct llt_ndlc **ndlc_id,
-              struct st_nci_se_status *se_status);
+int ndlc_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
+              struct device *dev, int phy_headroom, int phy_tailroom,
+              struct llt_ndlc **ndlc_id, struct st_nci_se_status *se_status);
 void ndlc_remove(struct llt_ndlc *ndlc);
 #endif /* __LOCAL_NDLC_H__ */
index 250d56f..a620c34 100644 (file)
@@ -198,7 +198,7 @@ static irqreturn_t st_nci_irq_thread_fn(int irq, void *phy_id)
        return IRQ_HANDLED;
 }
 
-static struct nfc_phy_ops spi_phy_ops = {
+static const struct nfc_phy_ops spi_phy_ops = {
        .write = st_nci_spi_write,
        .enable = st_nci_spi_enable,
        .disable = st_nci_spi_disable,
index 94b6000..30d2912 100644 (file)
@@ -371,7 +371,7 @@ static int st_nci_manufacturer_specific(struct nfc_dev *dev, void *data,
        return nfc_vendor_cmd_reply(msg);
 }
 
-static struct nfc_vendor_cmd st_nci_vendor_cmds[] = {
+static const struct nfc_vendor_cmd st_nci_vendor_cmds[] = {
        {
                .vendor_id = ST_NCI_VENDOR_OUI,
                .subcmd = FACTORY_MODE,
index 6ca0d2f..161caf2 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/module.h>
 #include <linux/nfc.h>
 #include <net/nfc/hci.h>
-#include <net/nfc/llc.h>
 
 #include "st21nfca.h"
 
@@ -72,7 +71,7 @@
 
 static DECLARE_BITMAP(dev_mask, ST21NFCA_NUM_DEVICES);
 
-static struct nfc_hci_gate st21nfca_gates[] = {
+static const struct nfc_hci_gate st21nfca_gates[] = {
        {NFC_HCI_ADMIN_GATE, NFC_HCI_ADMIN_PIPE},
        {NFC_HCI_LINK_MGMT_GATE, NFC_HCI_LINK_MGMT_PIPE},
        {ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE},
@@ -912,7 +911,7 @@ static int st21nfca_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe,
        }
 }
 
-static struct nfc_hci_ops st21nfca_hci_ops = {
+static const struct nfc_hci_ops st21nfca_hci_ops = {
        .open = st21nfca_hci_open,
        .close = st21nfca_hci_close,
        .load_session = st21nfca_hci_load_session,
@@ -935,7 +934,7 @@ static struct nfc_hci_ops st21nfca_hci_ops = {
        .se_io = st21nfca_hci_se_io,
 };
 
-int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
+int st21nfca_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
                       char *llc_name, int phy_headroom, int phy_tailroom,
                       int phy_payload, struct nfc_hci_dev **hdev,
                           struct st21nfca_se_status *se_status)
index 7a9f4d7..279d881 100644 (file)
@@ -18,8 +18,6 @@
 #include <linux/nfc.h>
 #include <linux/firmware.h>
 
-#include <asm/unaligned.h>
-
 #include <net/nfc/hci.h>
 #include <net/nfc/llc.h>
 #include <net/nfc/nfc.h>
@@ -76,8 +74,8 @@ struct st21nfca_i2c_phy {
        struct mutex phy_lock;
 };
 
-static u8 len_seq[] = { 16, 24, 12, 29 };
-static u16 wait_tab[] = { 2, 3, 5, 15, 20, 40};
+static const u8 len_seq[] = { 16, 24, 12, 29 };
+static const u16 wait_tab[] = { 2, 3, 5, 15, 20, 40};
 
 #define I2C_DUMP_SKB(info, skb)                                        \
 do {                                                           \
@@ -482,7 +480,7 @@ static irqreturn_t st21nfca_hci_irq_thread_fn(int irq, void *phy_id)
        return IRQ_HANDLED;
 }
 
-static struct nfc_phy_ops i2c_phy_ops = {
+static const struct nfc_phy_ops i2c_phy_ops = {
        .write = st21nfca_hci_i2c_write,
        .enable = st21nfca_hci_i2c_enable,
        .disable = st21nfca_hci_i2c_disable,
index 5e0de0f..cb6ad91 100644 (file)
@@ -144,7 +144,7 @@ struct st21nfca_se_info {
 };
 
 struct st21nfca_hci_info {
-       struct nfc_phy_ops *phy_ops;
+       const struct nfc_phy_ops *phy_ops;
        void *phy_id;
 
        struct nfc_hci_dev *hdev;
@@ -163,7 +163,7 @@ struct st21nfca_hci_info {
        struct st21nfca_vendor_info vendor_info;
 };
 
-int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
+int st21nfca_hci_probe(void *phy_id, const struct nfc_phy_ops *phy_ops,
                       char *llc_name, int phy_headroom, int phy_tailroom,
                       int phy_payload, struct nfc_hci_dev **hdev,
                       struct st21nfca_se_status *se_status);
index 62332ca..7488286 100644 (file)
@@ -295,7 +295,7 @@ exit:
        return r;
 }
 
-static struct nfc_vendor_cmd st21nfca_vendor_cmds[] = {
+static const struct nfc_vendor_cmd st21nfca_vendor_cmds[] = {
        {
                .vendor_id = ST21NFCA_VENDOR_OUI,
                .subcmd = FACTORY_MODE,
index 2dc788c..d16cf3f 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/nfc.h>
 #include <linux/of_gpio.h>
 #include <linux/of.h>
-#include <linux/of_irq.h>
 #include <linux/property.h>
 #include <linux/regulator/consumer.h>
 #include <linux/wait.h>
@@ -1037,7 +1036,7 @@ static void st95hf_abort_cmd(struct nfc_digital_dev *ddev)
 {
 }
 
-static struct nfc_digital_ops st95hf_nfc_digital_ops = {
+static const struct nfc_digital_ops st95hf_nfc_digital_ops = {
        .in_configure_hw = st95hf_in_configure_hw,
        .in_send_cmd = st95hf_in_send_cmd,
 
index 3397802..8890fcd 100644 (file)
@@ -643,7 +643,7 @@ static void trf7970a_send_err_upstream(struct trf7970a *trf, int errno)
 }
 
 static int trf7970a_transmit(struct trf7970a *trf, struct sk_buff *skb,
-                            unsigned int len, u8 *prefix,
+                            unsigned int len, const u8 *prefix,
                             unsigned int prefix_len)
 {
        struct spi_transfer t[2];
@@ -1387,9 +1387,10 @@ static int trf7970a_is_iso15693_write_or_lock(u8 cmd)
        }
 }
 
-static int trf7970a_per_cmd_config(struct trf7970a *trf, struct sk_buff *skb)
+static int trf7970a_per_cmd_config(struct trf7970a *trf,
+                                  const struct sk_buff *skb)
 {
-       u8 *req = skb->data;
+       const u8 *req = skb->data;
        u8 special_fcn_reg1, iso_ctrl;
        int ret;
 
@@ -1791,7 +1792,7 @@ out_err:
 static int trf7970a_tg_listen(struct nfc_digital_dev *ddev, u16 timeout,
                              nfc_digital_cmd_complete_t cb, void *arg)
 {
-       struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
+       const struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
 
        dev_dbg(trf->dev, "Listen - state: %d, timeout: %d ms\n",
                trf->state, timeout);
@@ -1803,7 +1804,7 @@ static int trf7970a_tg_listen_md(struct nfc_digital_dev *ddev,
                                 u16 timeout, nfc_digital_cmd_complete_t cb,
                                 void *arg)
 {
-       struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
+       const struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
        int ret;
 
        dev_dbg(trf->dev, "Listen MD - state: %d, timeout: %d ms\n",
@@ -1824,7 +1825,7 @@ static int trf7970a_tg_listen_md(struct nfc_digital_dev *ddev,
 
 static int trf7970a_tg_get_rf_tech(struct nfc_digital_dev *ddev, u8 *rf_tech)
 {
-       struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
+       const struct trf7970a *trf = nfc_digital_get_drvdata(ddev);
 
        dev_dbg(trf->dev, "Get RF Tech - state: %d, rf_tech: %d\n",
                trf->state, trf->md_rf_tech);
@@ -1861,7 +1862,7 @@ static void trf7970a_abort_cmd(struct nfc_digital_dev *ddev)
        mutex_unlock(&trf->lock);
 }
 
-static struct nfc_digital_ops trf7970a_nfc_ops = {
+static const struct nfc_digital_ops trf7970a_nfc_ops = {
        .in_configure_hw        = trf7970a_in_configure_hw,
        .in_send_cmd            = trf7970a_send_cmd,
        .tg_configure_hw        = trf7970a_tg_configure_hw,
@@ -1974,7 +1975,7 @@ static void trf7970a_shutdown(struct trf7970a *trf)
        trf7970a_power_down(trf);
 }
 
-static int trf7970a_get_autosuspend_delay(struct device_node *np)
+static int trf7970a_get_autosuspend_delay(const struct device_node *np)
 {
        int autosuspend_delay, ret;
 
@@ -1987,7 +1988,7 @@ static int trf7970a_get_autosuspend_delay(struct device_node *np)
 
 static int trf7970a_probe(struct spi_device *spi)
 {
-       struct device_node *np = spi->dev.of_node;
+       const struct device_node *np = spi->dev.of_node;
        struct trf7970a *trf;
        int uvolts, autosuspend_delay, ret;
        u32 clk_freq = TRF7970A_13MHZ_CLOCK_FREQUENCY;
index f73ee0b..221fa3b 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/miscdevice.h>
 #include <linux/mutex.h>
+#include <linux/wait.h>
 #include <net/nfc/nci_core.h>
 
 enum virtual_ncidev_mode {
@@ -27,6 +28,7 @@ enum virtual_ncidev_mode {
                                 NFC_PROTO_ISO15693_MASK)
 
 static enum virtual_ncidev_mode state;
+static DECLARE_WAIT_QUEUE_HEAD(wq);
 static struct miscdevice miscdev;
 static struct sk_buff *send_buff;
 static struct nci_dev *ndev;
@@ -61,11 +63,12 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
        }
        send_buff = skb_copy(skb, GFP_KERNEL);
        mutex_unlock(&nci_mutex);
+       wake_up_interruptible(&wq);
 
        return 0;
 }
 
-static struct nci_ops virtual_nci_ops = {
+static const struct nci_ops virtual_nci_ops = {
        .open = virtual_nci_open,
        .close = virtual_nci_close,
        .send = virtual_nci_send
@@ -77,9 +80,11 @@ static ssize_t virtual_ncidev_read(struct file *file, char __user *buf,
        size_t actual_len;
 
        mutex_lock(&nci_mutex);
-       if (!send_buff) {
+       while (!send_buff) {
                mutex_unlock(&nci_mutex);
-               return 0;
+               if (wait_event_interruptible(wq, send_buff))
+                       return -EFAULT;
+               mutex_lock(&nci_mutex);
        }
 
        actual_len = min_t(size_t, count, send_buff->len);
@@ -170,7 +175,7 @@ static int virtual_ncidev_close(struct inode *inode, struct file *file)
 static long virtual_ncidev_ioctl(struct file *flip, unsigned int cmd,
                                 unsigned long arg)
 {
-       struct nfc_dev *nfc_dev = ndev->nfc_dev;
+       const struct nfc_dev *nfc_dev = ndev->nfc_dev;
        void __user *p = (void __user *)arg;
 
        if (cmd != IOCTL_GET_NCIDEV_IDX)
index 93dcdd4..2f52110 100644 (file)
@@ -597,11 +597,8 @@ static inline void pcie_ecrc_get_policy(char *str) { }
 
 #ifdef CONFIG_PCIE_PTM
 void pci_ptm_init(struct pci_dev *dev);
-int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
 #else
 static inline void pci_ptm_init(struct pci_dev *dev) { }
-static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
-{ return -EINVAL; }
 #endif
 
 struct pci_dev_reset_methods {
index 95d4eef..8a4ad97 100644 (file)
@@ -204,3 +204,12 @@ int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
        return 0;
 }
 EXPORT_SYMBOL(pci_enable_ptm);
+
+bool pcie_ptm_enabled(struct pci_dev *dev)
+{
+       if (!dev)
+               return false;
+
+       return dev->ptm_enabled;
+}
+EXPORT_SYMBOL(pcie_ptm_enabled);
index 810f25a..6781488 100644 (file)
 
 #define COMPHY_FW_MODE_SATA                    0x1
 #define COMPHY_FW_MODE_SGMII                   0x2
-#define COMPHY_FW_MODE_HS_SGMII                        0x3
+#define COMPHY_FW_MODE_2500BASEX               0x3
 #define COMPHY_FW_MODE_USB3H                   0x4
 #define COMPHY_FW_MODE_USB3D                   0x5
 #define COMPHY_FW_MODE_PCIE                    0x6
-#define COMPHY_FW_MODE_RXAUI                   0x7
-#define COMPHY_FW_MODE_XFI                     0x8
-#define COMPHY_FW_MODE_SFI                     0x9
 #define COMPHY_FW_MODE_USB3                    0xa
 
 #define COMPHY_FW_SPEED_1_25G                  0 /* SGMII 1G */
 #define COMPHY_FW_SPEED_2_5G                   1
-#define COMPHY_FW_SPEED_3_125G                 2 /* SGMII 2.5G */
+#define COMPHY_FW_SPEED_3_125G                 2 /* 2500BASE-X */
 #define COMPHY_FW_SPEED_5G                     3
-#define COMPHY_FW_SPEED_5_15625G               4 /* XFI 5G */
-#define COMPHY_FW_SPEED_6G                     5
-#define COMPHY_FW_SPEED_10_3125G               6 /* XFI 10G */
 #define COMPHY_FW_SPEED_MAX                    0x3F
 
 #define COMPHY_FW_MODE(mode)                   ((mode) << 12)
@@ -84,14 +78,14 @@ static const struct mvebu_a3700_comphy_conf mvebu_a3700_comphy_modes[] = {
        MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_SGMII, 1,
                                    COMPHY_FW_MODE_SGMII),
        MVEBU_A3700_COMPHY_CONF_ETH(0, PHY_INTERFACE_MODE_2500BASEX, 1,
-                                   COMPHY_FW_MODE_HS_SGMII),
+                                   COMPHY_FW_MODE_2500BASEX),
        /* lane 1 */
        MVEBU_A3700_COMPHY_CONF_GEN(1, PHY_MODE_PCIE, 0,
                                    COMPHY_FW_MODE_PCIE),
        MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_SGMII, 0,
                                    COMPHY_FW_MODE_SGMII),
        MVEBU_A3700_COMPHY_CONF_ETH(1, PHY_INTERFACE_MODE_2500BASEX, 0,
-                                   COMPHY_FW_MODE_HS_SGMII),
+                                   COMPHY_FW_MODE_2500BASEX),
        /* lane 2 */
        MVEBU_A3700_COMPHY_CONF_GEN(2, PHY_MODE_SATA, 0,
                                    COMPHY_FW_MODE_SATA),
@@ -205,7 +199,7 @@ static int mvebu_a3700_comphy_power_on(struct phy *phy)
                                                 COMPHY_FW_SPEED_1_25G);
                        break;
                case PHY_INTERFACE_MODE_2500BASEX:
-                       dev_dbg(lane->dev, "set lane %d to HS SGMII mode\n",
+                       dev_dbg(lane->dev, "set lane %d to 2500BASEX mode\n",
                                lane->id);
                        fw_param = COMPHY_FW_NET(fw_mode, lane->port,
                                                 COMPHY_FW_SPEED_3_125G);
index 53ad127..bbd6f2a 100644 (file)
 
 #define COMPHY_FW_MODE_SATA            0x1
 #define COMPHY_FW_MODE_SGMII           0x2 /* SGMII 1G */
-#define COMPHY_FW_MODE_HS_SGMII                0x3 /* SGMII 2.5G */
+#define COMPHY_FW_MODE_2500BASEX       0x3 /* 2500BASE-X */
 #define COMPHY_FW_MODE_USB3H           0x4
 #define COMPHY_FW_MODE_USB3D           0x5
 #define COMPHY_FW_MODE_PCIE            0x6
@@ -207,7 +207,7 @@ static const struct mvebu_comphy_conf mvebu_comphy_cp110_modes[] = {
        /* lane 0 */
        GEN_CONF(0, 0, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
        ETH_CONF(0, 1, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
-       ETH_CONF(0, 1, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_HS_SGMII),
+       ETH_CONF(0, 1, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_2500BASEX),
        GEN_CONF(0, 1, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
        /* lane 1 */
        GEN_CONF(1, 0, PHY_MODE_USB_HOST_SS, COMPHY_FW_MODE_USB3H),
@@ -215,10 +215,10 @@ static const struct mvebu_comphy_conf mvebu_comphy_cp110_modes[] = {
        GEN_CONF(1, 0, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
        GEN_CONF(1, 0, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
        ETH_CONF(1, 2, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
-       ETH_CONF(1, 2, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_HS_SGMII),
+       ETH_CONF(1, 2, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_2500BASEX),
        /* lane 2 */
        ETH_CONF(2, 0, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
-       ETH_CONF(2, 0, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_HS_SGMII),
+       ETH_CONF(2, 0, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_2500BASEX),
        ETH_CONF(2, 0, PHY_INTERFACE_MODE_RXAUI, 0x1, COMPHY_FW_MODE_RXAUI),
        ETH_CONF(2, 0, PHY_INTERFACE_MODE_10GBASER, 0x1, COMPHY_FW_MODE_XFI),
        GEN_CONF(2, 0, PHY_MODE_USB_HOST_SS, COMPHY_FW_MODE_USB3H),
@@ -227,26 +227,26 @@ static const struct mvebu_comphy_conf mvebu_comphy_cp110_modes[] = {
        /* lane 3 */
        GEN_CONF(3, 0, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
        ETH_CONF(3, 1, PHY_INTERFACE_MODE_SGMII, 0x2, COMPHY_FW_MODE_SGMII),
-       ETH_CONF(3, 1, PHY_INTERFACE_MODE_2500BASEX, 0x2, COMPHY_FW_MODE_HS_SGMII),
+       ETH_CONF(3, 1, PHY_INTERFACE_MODE_2500BASEX, 0x2, COMPHY_FW_MODE_2500BASEX),
        ETH_CONF(3, 1, PHY_INTERFACE_MODE_RXAUI, 0x1, COMPHY_FW_MODE_RXAUI),
        GEN_CONF(3, 1, PHY_MODE_USB_HOST_SS, COMPHY_FW_MODE_USB3H),
        GEN_CONF(3, 1, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
        /* lane 4 */
        ETH_CONF(4, 0, PHY_INTERFACE_MODE_SGMII, 0x2, COMPHY_FW_MODE_SGMII),
-       ETH_CONF(4, 0, PHY_INTERFACE_MODE_2500BASEX, 0x2, COMPHY_FW_MODE_HS_SGMII),
+       ETH_CONF(4, 0, PHY_INTERFACE_MODE_2500BASEX, 0x2, COMPHY_FW_MODE_2500BASEX),
        ETH_CONF(4, 0, PHY_INTERFACE_MODE_10GBASER, 0x2, COMPHY_FW_MODE_XFI),
        ETH_CONF(4, 0, PHY_INTERFACE_MODE_RXAUI, 0x2, COMPHY_FW_MODE_RXAUI),
        GEN_CONF(4, 0, PHY_MODE_USB_DEVICE_SS, COMPHY_FW_MODE_USB3D),
        GEN_CONF(4, 1, PHY_MODE_USB_HOST_SS, COMPHY_FW_MODE_USB3H),
        GEN_CONF(4, 1, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
        ETH_CONF(4, 1, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
-       ETH_CONF(4, 1, PHY_INTERFACE_MODE_2500BASEX, -1, COMPHY_FW_MODE_HS_SGMII),
+       ETH_CONF(4, 1, PHY_INTERFACE_MODE_2500BASEX, -1, COMPHY_FW_MODE_2500BASEX),
        ETH_CONF(4, 1, PHY_INTERFACE_MODE_10GBASER, -1, COMPHY_FW_MODE_XFI),
        /* lane 5 */
        ETH_CONF(5, 1, PHY_INTERFACE_MODE_RXAUI, 0x2, COMPHY_FW_MODE_RXAUI),
        GEN_CONF(5, 1, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
        ETH_CONF(5, 2, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
-       ETH_CONF(5, 2, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_HS_SGMII),
+       ETH_CONF(5, 2, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_2500BASEX),
        GEN_CONF(5, 2, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
 };
 
index e085c25..f02bedf 100644 (file)
@@ -8,6 +8,7 @@ menu "PTP clock support"
 config PTP_1588_CLOCK
        tristate "PTP clock support"
        depends on NET && POSIX_TIMERS
+       default ETHERNET
        select PPS
        select NET_PTP_CLASSIFY
        help
@@ -26,6 +27,18 @@ config PTP_1588_CLOCK
          To compile this driver as a module, choose M here: the module
          will be called ptp.
 
+config PTP_1588_CLOCK_OPTIONAL
+       tristate
+       default y if PTP_1588_CLOCK=n
+       default PTP_1588_CLOCK
+       help
+         Drivers that can optionally use the PTP_1588_CLOCK framework
+         should depend on this symbol to prevent them from being built
+         into vmlinux while the PTP support itself is in a loadable
+         module.
+         If PTP support is disabled, this dependency will still be
+         met, and drivers refer to dummy helpers.
+
 config PTP_1588_CLOCK_DTE
        tristate "Broadcom DTE as PTP clock"
        depends on PTP_1588_CLOCK
@@ -92,7 +105,7 @@ config PTP_1588_CLOCK_PCH
        depends on X86_32 || COMPILE_TEST
        depends on HAS_IOMEM && PCI
        depends on NET
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK
        help
          This driver adds support for using the PCH EG20T as a PTP
          clock. The hardware supports time stamping of PTP packets
@@ -158,7 +171,10 @@ config PTP_1588_CLOCK_OCP
        tristate "OpenCompute TimeCard as PTP clock"
        depends on PTP_1588_CLOCK
        depends on HAS_IOMEM && PCI
-       default n
+       depends on I2C && MTD
+       depends on SERIAL_8250
+       depends on !S390
+       select NET_DEVLINK
        help
          This driver adds support for an OpenCompute time card.
 
index 0d1034e..caf9b37 100644 (file)
@@ -6,15 +6,29 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/serial_8250.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
 #include <linux/ptp_clock_kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/xilinx_spi.h>
+#include <net/devlink.h>
+#include <linux/i2c.h>
+#include <linux/mtd/mtd.h>
 
-static const struct pci_device_id ptp_ocp_pcidev_id[] = {
-       { PCI_DEVICE(0x1d9b, 0x0400) },
-       { 0 }
-};
-MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id);
+#ifndef PCI_VENDOR_ID_FACEBOOK
+#define PCI_VENDOR_ID_FACEBOOK 0x1d9b
+#endif
 
-#define OCP_REGISTER_OFFSET    0x01000000
+#ifndef PCI_DEVICE_ID_FACEBOOK_TIMECARD
+#define PCI_DEVICE_ID_FACEBOOK_TIMECARD 0x0400
+#endif
+
+static struct class timecard_class = {
+       .owner          = THIS_MODULE,
+       .name           = "timecard",
+};
 
 struct ocp_reg {
        u32     ctrl;
@@ -29,18 +43,29 @@ struct ocp_reg {
        u32     __pad1[2];
        u32     offset_ns;
        u32     offset_window_ns;
+       u32     __pad2[2];
+       u32     drift_ns;
+       u32     drift_window_ns;
+       u32     __pad3[6];
+       u32     servo_offset_p;
+       u32     servo_offset_i;
+       u32     servo_drift_p;
+       u32     servo_drift_i;
 };
 
 #define OCP_CTRL_ENABLE                BIT(0)
 #define OCP_CTRL_ADJUST_TIME   BIT(1)
 #define OCP_CTRL_ADJUST_OFFSET BIT(2)
+#define OCP_CTRL_ADJUST_DRIFT  BIT(3)
+#define OCP_CTRL_ADJUST_SERVO  BIT(8)
 #define OCP_CTRL_READ_TIME_REQ BIT(30)
 #define OCP_CTRL_READ_TIME_DONE        BIT(31)
 
 #define OCP_STATUS_IN_SYNC     BIT(0)
+#define OCP_STATUS_IN_HOLDOVER BIT(1)
 
 #define OCP_SELECT_CLK_NONE    0
-#define OCP_SELECT_CLK_REG     6
+#define OCP_SELECT_CLK_REG     0xfe
 
 struct tod_reg {
        u32     ctrl;
@@ -55,8 +80,6 @@ struct tod_reg {
        u32     leap;
 };
 
-#define TOD_REGISTER_OFFSET    0x01050000
-
 #define TOD_CTRL_PROTOCOL      BIT(28)
 #define TOD_CTRL_DISABLE_FMT_A BIT(17)
 #define TOD_CTRL_DISABLE_FMT_B BIT(16)
@@ -68,16 +91,264 @@ struct tod_reg {
 #define TOD_STATUS_UTC_VALID   BIT(8)
 #define TOD_STATUS_LEAP_VALID  BIT(16)
 
+struct ts_reg {
+       u32     enable;
+       u32     error;
+       u32     polarity;
+       u32     version;
+       u32     __pad0[4];
+       u32     cable_delay;
+       u32     __pad1[3];
+       u32     intr;
+       u32     intr_mask;
+       u32     event_count;
+       u32     __pad2[1];
+       u32     ts_count;
+       u32     time_ns;
+       u32     time_sec;
+       u32     data_width;
+       u32     data;
+};
+
+struct pps_reg {
+       u32     ctrl;
+       u32     status;
+       u32     __pad0[6];
+       u32     cable_delay;
+};
+
+#define PPS_STATUS_FILTER_ERR  BIT(0)
+#define PPS_STATUS_SUPERV_ERR  BIT(1)
+
+struct img_reg {
+       u32     version;
+};
+
+struct ptp_ocp_flash_info {
+       const char *name;
+       int pci_offset;
+       int data_size;
+       void *data;
+};
+
+struct ptp_ocp_ext_info {
+       const char *name;
+       int index;
+       irqreturn_t (*irq_fcn)(int irq, void *priv);
+       int (*enable)(void *priv, bool enable);
+};
+
+struct ptp_ocp_ext_src {
+       void __iomem            *mem;
+       struct ptp_ocp          *bp;
+       struct ptp_ocp_ext_info *info;
+       int                     irq_vec;
+};
+
 struct ptp_ocp {
        struct pci_dev          *pdev;
+       struct device           dev;
        spinlock_t              lock;
-       void __iomem            *base;
        struct ocp_reg __iomem  *reg;
        struct tod_reg __iomem  *tod;
+       struct pps_reg __iomem  *pps_to_ext;
+       struct pps_reg __iomem  *pps_to_clk;
+       struct ptp_ocp_ext_src  *pps;
+       struct ptp_ocp_ext_src  *ts0;
+       struct ptp_ocp_ext_src  *ts1;
+       struct img_reg __iomem  *image;
        struct ptp_clock        *ptp;
        struct ptp_clock_info   ptp_info;
+       struct platform_device  *i2c_ctrl;
+       struct platform_device  *spi_flash;
+       struct clk_hw           *i2c_clk;
+       struct timer_list       watchdog;
+       time64_t                gnss_lost;
+       int                     id;
+       int                     n_irqs;
+       int                     gnss_port;
+       int                     mac_port;       /* miniature atomic clock */
+       u8                      serial[6];
+       int                     flash_start;
+       bool                    has_serial;
 };
 
+struct ocp_resource {
+       unsigned long offset;
+       int size;
+       int irq_vec;
+       int (*setup)(struct ptp_ocp *bp, struct ocp_resource *r);
+       void *extra;
+       unsigned long bp_offset;
+};
+
+static int ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r);
+static irqreturn_t ptp_ocp_ts_irq(int irq, void *priv);
+static int ptp_ocp_ts_enable(void *priv, bool enable);
+
+#define bp_assign_entry(bp, res, val) ({                               \
+       uintptr_t addr = (uintptr_t)(bp) + (res)->bp_offset;            \
+       *(typeof(val) *)addr = val;                                     \
+})
+
+#define OCP_RES_LOCATION(member) \
+       .bp_offset = offsetof(struct ptp_ocp, member)
+
+#define OCP_MEM_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_mem
+
+#define OCP_SERIAL_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_serial
+
+#define OCP_I2C_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_i2c
+
+#define OCP_SPI_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_spi
+
+#define OCP_EXT_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_ext
+
+/* This is the MSI vector mapping used.
+ * 0: N/C
+ * 1: TS0
+ * 2: TS1
+ * 3: GPS
+ * 4: GPS2 (n/c)
+ * 5: MAC
+ * 6: SPI IMU (inertial measurement unit)
+ * 7: I2C oscillator
+ * 8: HWICAP
+ * 9: SPI Flash
+ */
+
+static struct ocp_resource ocp_fb_resource[] = {
+       {
+               OCP_MEM_RESOURCE(reg),
+               .offset = 0x01000000, .size = 0x10000,
+       },
+       {
+               OCP_EXT_RESOURCE(ts0),
+               .offset = 0x01010000, .size = 0x10000, .irq_vec = 1,
+               .extra = &(struct ptp_ocp_ext_info) {
+                       .name = "ts0", .index = 0,
+                       .irq_fcn = ptp_ocp_ts_irq,
+                       .enable = ptp_ocp_ts_enable,
+               },
+       },
+       {
+               OCP_EXT_RESOURCE(ts1),
+               .offset = 0x01020000, .size = 0x10000, .irq_vec = 2,
+               .extra = &(struct ptp_ocp_ext_info) {
+                       .name = "ts1", .index = 1,
+                       .irq_fcn = ptp_ocp_ts_irq,
+                       .enable = ptp_ocp_ts_enable,
+               },
+       },
+       {
+               OCP_MEM_RESOURCE(pps_to_ext),
+               .offset = 0x01030000, .size = 0x10000,
+       },
+       {
+               OCP_MEM_RESOURCE(pps_to_clk),
+               .offset = 0x01040000, .size = 0x10000,
+       },
+       {
+               OCP_MEM_RESOURCE(tod),
+               .offset = 0x01050000, .size = 0x10000,
+       },
+       {
+               OCP_MEM_RESOURCE(image),
+               .offset = 0x00020000, .size = 0x1000,
+       },
+       {
+               OCP_I2C_RESOURCE(i2c_ctrl),
+               .offset = 0x00150000, .size = 0x10000, .irq_vec = 7,
+       },
+       {
+               OCP_SERIAL_RESOURCE(gnss_port),
+               .offset = 0x00160000 + 0x1000, .irq_vec = 3,
+       },
+       {
+               OCP_SERIAL_RESOURCE(mac_port),
+               .offset = 0x00180000 + 0x1000, .irq_vec = 5,
+       },
+       {
+               OCP_SPI_RESOURCE(spi_flash),
+               .offset = 0x00310000, .size = 0x10000, .irq_vec = 9,
+               .extra = &(struct ptp_ocp_flash_info) {
+                       .name = "xilinx_spi", .pci_offset = 0,
+                       .data_size = sizeof(struct xspi_platform_data),
+                       .data = &(struct xspi_platform_data) {
+                               .num_chipselect = 1,
+                               .bits_per_word = 8,
+                               .num_devices = 1,
+                               .devices = &(struct spi_board_info) {
+                                       .modalias = "spi-nor",
+                               },
+                       },
+               },
+       },
+       {
+               .setup = ptp_ocp_fb_board_init,
+       },
+       { }
+};
+
+static const struct pci_device_id ptp_ocp_pcidev_id[] = {
+       { PCI_DEVICE_DATA(FACEBOOK, TIMECARD, &ocp_fb_resource) },
+       { 0 }
+};
+MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id);
+
+static DEFINE_MUTEX(ptp_ocp_lock);
+static DEFINE_IDR(ptp_ocp_idr);
+
+static struct {
+       const char *name;
+       int value;
+} ptp_ocp_clock[] = {
+       { .name = "NONE",       .value = 0 },
+       { .name = "TOD",        .value = 1 },
+       { .name = "IRIG",       .value = 2 },
+       { .name = "PPS",        .value = 3 },
+       { .name = "PTP",        .value = 4 },
+       { .name = "RTC",        .value = 5 },
+       { .name = "DCF",        .value = 6 },
+       { .name = "REGS",       .value = 0xfe },
+       { .name = "EXT",        .value = 0xff },
+};
+
+static const char *
+ptp_ocp_clock_name_from_val(int val)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++)
+               if (ptp_ocp_clock[i].value == val)
+                       return ptp_ocp_clock[i].name;
+       return NULL;
+}
+
+static int
+ptp_ocp_clock_val_from_name(const char *name)
+{
+       const char *clk;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) {
+               clk = ptp_ocp_clock[i].name;
+               if (!strncasecmp(name, clk, strlen(clk)))
+                       return ptp_ocp_clock[i].value;
+       }
+       return -EINVAL;
+}
+
 static int
 __ptp_ocp_gettime_locked(struct ptp_ocp *bp, struct timespec64 *ts,
                         struct ptp_system_timestamp *sts)
@@ -192,6 +463,45 @@ ptp_ocp_null_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm)
        return -EOPNOTSUPP;
 }
 
+static int
+ptp_ocp_adjphase(struct ptp_clock_info *ptp_info, s32 phase_ns)
+{
+       return -EOPNOTSUPP;
+}
+
+static int
+ptp_ocp_enable(struct ptp_clock_info *ptp_info, struct ptp_clock_request *rq,
+              int on)
+{
+       struct ptp_ocp *bp = container_of(ptp_info, struct ptp_ocp, ptp_info);
+       struct ptp_ocp_ext_src *ext = NULL;
+       int err;
+
+       switch (rq->type) {
+       case PTP_CLK_REQ_EXTTS:
+               switch (rq->extts.index) {
+               case 0:
+                       ext = bp->ts0;
+                       break;
+               case 1:
+                       ext = bp->ts1;
+                       break;
+               }
+               break;
+       case PTP_CLK_REQ_PPS:
+               ext = bp->pps;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       err = -ENXIO;
+       if (ext)
+               err = ext->info->enable(ext, on);
+
+       return err;
+}
+
 static const struct ptp_clock_info ptp_ocp_clock_info = {
        .owner          = THIS_MODULE,
        .name           = KBUILD_MODNAME,
@@ -200,10 +510,57 @@ static const struct ptp_clock_info ptp_ocp_clock_info = {
        .settime64      = ptp_ocp_settime,
        .adjtime        = ptp_ocp_adjtime,
        .adjfine        = ptp_ocp_null_adjfine,
+       .adjphase       = ptp_ocp_adjphase,
+       .enable         = ptp_ocp_enable,
+       .pps            = true,
+       .n_ext_ts       = 2,
 };
 
+static void
+__ptp_ocp_clear_drift_locked(struct ptp_ocp *bp)
+{
+       u32 ctrl, select;
+
+       select = ioread32(&bp->reg->select);
+       iowrite32(OCP_SELECT_CLK_REG, &bp->reg->select);
+
+       iowrite32(0, &bp->reg->drift_ns);
+
+       ctrl = ioread32(&bp->reg->ctrl);
+       ctrl |= OCP_CTRL_ADJUST_DRIFT;
+       iowrite32(ctrl, &bp->reg->ctrl);
+
+       /* restore clock selection */
+       iowrite32(select >> 16, &bp->reg->select);
+}
+
+static void
+ptp_ocp_watchdog(struct timer_list *t)
+{
+       struct ptp_ocp *bp = from_timer(bp, t, watchdog);
+       unsigned long flags;
+       u32 status;
+
+       status = ioread32(&bp->pps_to_clk->status);
+
+       if (status & PPS_STATUS_SUPERV_ERR) {
+               iowrite32(status, &bp->pps_to_clk->status);
+               if (!bp->gnss_lost) {
+                       spin_lock_irqsave(&bp->lock, flags);
+                       __ptp_ocp_clear_drift_locked(bp);
+                       spin_unlock_irqrestore(&bp->lock, flags);
+                       bp->gnss_lost = ktime_get_real_seconds();
+               }
+
+       } else if (bp->gnss_lost) {
+               bp->gnss_lost = 0;
+       }
+
+       mod_timer(&bp->watchdog, jiffies + HZ);
+}
+
 static int
-ptp_ocp_check_clock(struct ptp_ocp *bp)
+ptp_ocp_init_clock(struct ptp_ocp *bp)
 {
        struct timespec64 ts;
        bool sync;
@@ -214,6 +571,17 @@ ptp_ocp_check_clock(struct ptp_ocp *bp)
        ctrl |= OCP_CTRL_ENABLE;
        iowrite32(ctrl, &bp->reg->ctrl);
 
+       /* NO DRIFT Correction */
+       /* offset_p:i 1/8, offset_i: 1/16, drift_p: 0, drift_i: 0 */
+       iowrite32(0x2000, &bp->reg->servo_offset_p);
+       iowrite32(0x1000, &bp->reg->servo_offset_i);
+       iowrite32(0,      &bp->reg->servo_drift_p);
+       iowrite32(0,      &bp->reg->servo_drift_i);
+
+       /* latch servo values */
+       ctrl |= OCP_CTRL_ADJUST_SERVO;
+       iowrite32(ctrl, &bp->reg->ctrl);
+
        if ((ioread32(&bp->reg->ctrl) & OCP_CTRL_ENABLE) == 0) {
                dev_err(&bp->pdev->dev, "clock not enabled\n");
                return -ENODEV;
@@ -229,6 +597,9 @@ ptp_ocp_check_clock(struct ptp_ocp *bp)
                         ts.tv_sec, ts.tv_nsec,
                         sync ? "in-sync" : "UNSYNCED");
 
+       timer_setup(&bp->watchdog, ptp_ocp_watchdog, 0);
+       mod_timer(&bp->watchdog, jiffies + HZ);
+
        return 0;
 }
 
@@ -278,82 +649,840 @@ ptp_ocp_tod_info(struct ptp_ocp *bp)
                 reg & TOD_STATUS_LEAP_VALID ? 1 : 0);
 }
 
+static int
+ptp_ocp_firstchild(struct device *dev, void *data)
+{
+       return 1;
+}
+
+static int
+ptp_ocp_read_i2c(struct i2c_adapter *adap, u8 addr, u8 reg, u8 sz, u8 *data)
+{
+       struct i2c_msg msgs[2] = {
+               {
+                       .addr = addr,
+                       .len = 1,
+                       .buf = &reg,
+               },
+               {
+                       .addr = addr,
+                       .flags = I2C_M_RD,
+                       .len = 2,
+                       .buf = data,
+               },
+       };
+       int err;
+       u8 len;
+
+       /* xiic-i2c for some stupid reason only does 2 byte reads. */
+       while (sz) {
+               len = min_t(u8, sz, 2);
+               msgs[1].len = len;
+               err = i2c_transfer(adap, msgs, 2);
+               if (err != msgs[1].len)
+                       return err;
+               msgs[1].buf += len;
+               reg += len;
+               sz -= len;
+       }
+       return 0;
+}
+
+static void
+ptp_ocp_get_serial_number(struct ptp_ocp *bp)
+{
+       struct i2c_adapter *adap;
+       struct device *dev;
+       int err;
+
+       dev = device_find_child(&bp->i2c_ctrl->dev, NULL, ptp_ocp_firstchild);
+       if (!dev) {
+               dev_err(&bp->pdev->dev, "Can't find I2C adapter\n");
+               return;
+       }
+
+       adap = i2c_verify_adapter(dev);
+       if (!adap) {
+               dev_err(&bp->pdev->dev, "device '%s' isn't an I2C adapter\n",
+                       dev_name(dev));
+               goto out;
+       }
+
+       err = ptp_ocp_read_i2c(adap, 0x58, 0x9A, 6, bp->serial);
+       if (err) {
+               dev_err(&bp->pdev->dev, "could not read eeprom: %d\n", err);
+               goto out;
+       }
+
+       bp->has_serial = true;
+
+out:
+       put_device(dev);
+}
+
 static void
 ptp_ocp_info(struct ptp_ocp *bp)
 {
-       static const char * const clock_name[] = {
-               "NO", "TOD", "IRIG", "PPS", "PTP", "RTC", "REGS", "EXT"
-       };
        u32 version, select;
 
        version = ioread32(&bp->reg->version);
        select = ioread32(&bp->reg->select);
        dev_info(&bp->pdev->dev, "Version %d.%d.%d, clock %s, device ptp%d\n",
                 version >> 24, (version >> 16) & 0xff, version & 0xffff,
-                clock_name[select & 7],
+                ptp_ocp_clock_name_from_val(select >> 16),
                 ptp_clock_index(bp->ptp));
 
        ptp_ocp_tod_info(bp);
 }
 
+static struct device *
+ptp_ocp_find_flash(struct ptp_ocp *bp)
+{
+       struct device *dev, *last;
+
+       last = NULL;
+       dev = &bp->spi_flash->dev;
+
+       while ((dev = device_find_child(dev, NULL, ptp_ocp_firstchild))) {
+               if (!strcmp("mtd", dev_bus_name(dev)))
+                       break;
+               put_device(last);
+               last = dev;
+       }
+       put_device(last);
+
+       return dev;
+}
+
 static int
-ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ptp_ocp_devlink_flash(struct devlink *devlink, struct device *dev,
+                     const struct firmware *fw)
 {
-       struct ptp_ocp *bp;
+       struct mtd_info *mtd = dev_get_drvdata(dev);
+       struct ptp_ocp *bp = devlink_priv(devlink);
+       size_t off, len, resid, wrote;
+       struct erase_info erase;
+       size_t base, blksz;
+       int err = 0;
+
+       off = 0;
+       base = bp->flash_start;
+       blksz = 4096;
+       resid = fw->size;
+
+       while (resid) {
+               devlink_flash_update_status_notify(devlink, "Flashing",
+                                                  NULL, off, fw->size);
+
+               len = min_t(size_t, resid, blksz);
+               erase.addr = base + off;
+               erase.len = blksz;
+
+               err = mtd_erase(mtd, &erase);
+               if (err)
+                       goto out;
+
+               err = mtd_write(mtd, base + off, len, &wrote, &fw->data[off]);
+               if (err)
+                       goto out;
+
+               off += blksz;
+               resid -= len;
+       }
+out:
+       return err;
+}
+
+static int
+ptp_ocp_devlink_flash_update(struct devlink *devlink,
+                            struct devlink_flash_update_params *params,
+                            struct netlink_ext_ack *extack)
+{
+       struct ptp_ocp *bp = devlink_priv(devlink);
+       struct device *dev;
+       const char *msg;
+       int err;
+
+       dev = ptp_ocp_find_flash(bp);
+       if (!dev) {
+               dev_err(&bp->pdev->dev, "Can't find Flash SPI adapter\n");
+               return -ENODEV;
+       }
+
+       devlink_flash_update_status_notify(devlink, "Preparing to flash",
+                                          NULL, 0, 0);
+
+       err = ptp_ocp_devlink_flash(devlink, dev, params->fw);
+
+       msg = err ? "Flash error" : "Flash complete";
+       devlink_flash_update_status_notify(devlink, msg, NULL, 0, 0);
+
+       put_device(dev);
+       return err;
+}
+
+static int
+ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+                        struct netlink_ext_ack *extack)
+{
+       struct ptp_ocp *bp = devlink_priv(devlink);
+       char buf[32];
+       int err;
+
+       err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+       if (err)
+               return err;
+
+       if (bp->image) {
+               u32 ver = ioread32(&bp->image->version);
+
+               if (ver & 0xffff) {
+                       sprintf(buf, "%d", ver);
+                       err = devlink_info_version_running_put(req,
+                                                              "fw",
+                                                              buf);
+               } else {
+                       sprintf(buf, "%d", ver >> 16);
+                       err = devlink_info_version_running_put(req,
+                                                              "loader",
+                                                              buf);
+               }
+               if (err)
+                       return err;
+       }
+
+       if (!bp->has_serial)
+               ptp_ocp_get_serial_number(bp);
+
+       if (bp->has_serial) {
+               sprintf(buf, "%pM", bp->serial);
+               err = devlink_info_serial_number_put(req, buf);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static const struct devlink_ops ptp_ocp_devlink_ops = {
+       .flash_update = ptp_ocp_devlink_flash_update,
+       .info_get = ptp_ocp_devlink_info_get,
+};
+
+static void __iomem *
+__ptp_ocp_get_mem(struct ptp_ocp *bp, unsigned long start, int size)
+{
+       struct resource res = DEFINE_RES_MEM_NAMED(start, size, "ptp_ocp");
+
+       return devm_ioremap_resource(&bp->pdev->dev, &res);
+}
+
+static void __iomem *
+ptp_ocp_get_mem(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       unsigned long start;
+
+       start = pci_resource_start(bp->pdev, 0) + r->offset;
+       return __ptp_ocp_get_mem(bp, start, r->size);
+}
+
+static void
+ptp_ocp_set_irq_resource(struct resource *res, int irq)
+{
+       struct resource r = DEFINE_RES_IRQ(irq);
+       *res = r;
+}
+
+static void
+ptp_ocp_set_mem_resource(struct resource *res, unsigned long start, int size)
+{
+       struct resource r = DEFINE_RES_MEM(start, size);
+       *res = r;
+}
+
+static int
+ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct ptp_ocp_flash_info *info;
+       struct pci_dev *pdev = bp->pdev;
+       struct platform_device *p;
+       struct resource res[2];
+       unsigned long start;
+       int id;
+
+       /* XXX hack to work around old FPGA */
+       if (bp->n_irqs < 10) {
+               dev_err(&bp->pdev->dev, "FPGA does not have SPI devices\n");
+               return 0;
+       }
+
+       if (r->irq_vec > bp->n_irqs) {
+               dev_err(&bp->pdev->dev, "spi device irq %d out of range\n",
+                       r->irq_vec);
+               return 0;
+       }
+
+       start = pci_resource_start(pdev, 0) + r->offset;
+       ptp_ocp_set_mem_resource(&res[0], start, r->size);
+       ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec));
+
+       info = r->extra;
+       id = pci_dev_id(pdev) << 1;
+       id += info->pci_offset;
+
+       p = platform_device_register_resndata(&pdev->dev, info->name, id,
+                                             res, 2, info->data,
+                                             info->data_size);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+
+       bp_assign_entry(bp, r, p);
+
+       return 0;
+}
+
+static struct platform_device *
+ptp_ocp_i2c_bus(struct pci_dev *pdev, struct ocp_resource *r, int id)
+{
+       struct resource res[2];
+       unsigned long start;
+
+       start = pci_resource_start(pdev, 0) + r->offset;
+       ptp_ocp_set_mem_resource(&res[0], start, r->size);
+       ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec));
+
+       return platform_device_register_resndata(&pdev->dev, "xiic-i2c",
+                                                id, res, 2, NULL, 0);
+}
+
+static int
+ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct pci_dev *pdev = bp->pdev;
+       struct platform_device *p;
+       struct clk_hw *clk;
+       char buf[32];
+       int id;
+
+       if (r->irq_vec > bp->n_irqs) {
+               dev_err(&bp->pdev->dev, "i2c device irq %d out of range\n",
+                       r->irq_vec);
+               return 0;
+       }
+
+       id = pci_dev_id(bp->pdev);
+
+       sprintf(buf, "AXI.%d", id);
+       clk = clk_hw_register_fixed_rate(&pdev->dev, buf, NULL, 0, 50000000);
+       if (IS_ERR(clk))
+               return PTR_ERR(clk);
+       bp->i2c_clk = clk;
+
+       sprintf(buf, "xiic-i2c.%d", id);
+       devm_clk_hw_register_clkdev(&pdev->dev, clk, NULL, buf);
+       p = ptp_ocp_i2c_bus(bp->pdev, r, id);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+
+       bp_assign_entry(bp, r, p);
+
+       return 0;
+}
+
+static irqreturn_t
+ptp_ocp_ts_irq(int irq, void *priv)
+{
+       struct ptp_ocp_ext_src *ext = priv;
+       struct ts_reg __iomem *reg = ext->mem;
+       struct ptp_clock_event ev;
+       u32 sec, nsec;
+
+       /* XXX should fix API - this converts s/ns -> ts -> s/ns */
+       sec = ioread32(&reg->time_sec);
+       nsec = ioread32(&reg->time_ns);
+
+       ev.type = PTP_CLOCK_EXTTS;
+       ev.index = ext->info->index;
+       ev.timestamp = sec * 1000000000ULL + nsec;
+
+       ptp_clock_event(ext->bp->ptp, &ev);
+
+       iowrite32(1, &reg->intr);       /* write 1 to ack */
+
+       return IRQ_HANDLED;
+}
+
+static int
+ptp_ocp_ts_enable(void *priv, bool enable)
+{
+       struct ptp_ocp_ext_src *ext = priv;
+       struct ts_reg __iomem *reg = ext->mem;
+
+       if (enable) {
+               iowrite32(1, &reg->enable);
+               iowrite32(1, &reg->intr_mask);
+               iowrite32(1, &reg->intr);
+       } else {
+               iowrite32(0, &reg->intr_mask);
+               iowrite32(0, &reg->enable);
+       }
+
+       return 0;
+}
+
+static void
+ptp_ocp_unregister_ext(struct ptp_ocp_ext_src *ext)
+{
+       ext->info->enable(ext, false);
+       pci_free_irq(ext->bp->pdev, ext->irq_vec, ext);
+       kfree(ext);
+}
+
+static int
+ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct pci_dev *pdev = bp->pdev;
+       struct ptp_ocp_ext_src *ext;
        int err;
 
-       bp = kzalloc(sizeof(*bp), GFP_KERNEL);
-       if (!bp)
+       ext = kzalloc(sizeof(*ext), GFP_KERNEL);
+       if (!ext)
                return -ENOMEM;
+
+       err = -EINVAL;
+       ext->mem = ptp_ocp_get_mem(bp, r);
+       if (!ext->mem)
+               goto out;
+
+       ext->bp = bp;
+       ext->info = r->extra;
+       ext->irq_vec = r->irq_vec;
+
+       err = pci_request_irq(pdev, r->irq_vec, ext->info->irq_fcn, NULL,
+                             ext, "ocp%d.%s", bp->id, ext->info->name);
+       if (err) {
+               dev_err(&pdev->dev, "Could not get irq %d\n", r->irq_vec);
+               goto out;
+       }
+
+       bp_assign_entry(bp, r, ext);
+
+       return 0;
+
+out:
+       kfree(ext);
+       return err;
+}
+
+static int
+ptp_ocp_serial_line(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct pci_dev *pdev = bp->pdev;
+       struct uart_8250_port uart;
+
+       /* Setting UPF_IOREMAP and leaving port.membase unspecified lets
+        * the serial port device claim and release the pci resource.
+        */
+       memset(&uart, 0, sizeof(uart));
+       uart.port.dev = &pdev->dev;
+       uart.port.iotype = UPIO_MEM;
+       uart.port.regshift = 2;
+       uart.port.mapbase = pci_resource_start(pdev, 0) + r->offset;
+       uart.port.irq = pci_irq_vector(pdev, r->irq_vec);
+       uart.port.uartclk = 50000000;
+       uart.port.flags = UPF_FIXED_TYPE | UPF_IOREMAP;
+       uart.port.type = PORT_16550A;
+
+       return serial8250_register_8250_port(&uart);
+}
+
+static int
+ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       int port;
+
+       if (r->irq_vec > bp->n_irqs) {
+               dev_err(&bp->pdev->dev, "serial device irq %d out of range\n",
+                       r->irq_vec);
+               return 0;
+       }
+
+       port = ptp_ocp_serial_line(bp, r);
+       if (port < 0)
+               return port;
+
+       bp_assign_entry(bp, r, port);
+
+       return 0;
+}
+
+static int
+ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       void __iomem *mem;
+
+       mem = ptp_ocp_get_mem(bp, r);
+       if (!mem)
+               return -EINVAL;
+
+       bp_assign_entry(bp, r, mem);
+
+       return 0;
+}
+
+/* FB specific board initializers; last "resource" registered. */
+static int
+ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       bp->flash_start = 1024 * 4096;
+
+       return ptp_ocp_init_clock(bp);
+}
+
+static int
+ptp_ocp_register_resources(struct ptp_ocp *bp, kernel_ulong_t driver_data)
+{
+       struct ocp_resource *r, *table;
+       int err = 0;
+
+       table = (struct ocp_resource *)driver_data;
+       for (r = table; r->setup; r++) {
+               err = r->setup(bp, r);
+               if (err)
+                       break;
+       }
+       return err;
+}
+
+static ssize_t
+serialnum_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+
+       if (!bp->has_serial)
+               ptp_ocp_get_serial_number(bp);
+
+       return sysfs_emit(buf, "%pM\n", bp->serial);
+}
+static DEVICE_ATTR_RO(serialnum);
+
+static ssize_t
+gnss_sync_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+       ssize_t ret;
+
+       if (bp->gnss_lost)
+               ret = sysfs_emit(buf, "LOST @ %ptT\n", &bp->gnss_lost);
+       else
+               ret = sysfs_emit(buf, "SYNC\n");
+
+       return ret;
+}
+static DEVICE_ATTR_RO(gnss_sync);
+
+static ssize_t
+clock_source_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+       const char *p;
+       u32 select;
+
+       select = ioread32(&bp->reg->select);
+       p = ptp_ocp_clock_name_from_val(select >> 16);
+
+       return sysfs_emit(buf, "%s\n", p);
+}
+
+static ssize_t
+clock_source_store(struct device *dev, struct device_attribute *attr,
+                  const char *buf, size_t count)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+       unsigned long flags;
+       int val;
+
+       val = ptp_ocp_clock_val_from_name(buf);
+       if (val < 0)
+               return val;
+
+       spin_lock_irqsave(&bp->lock, flags);
+       iowrite32(val, &bp->reg->select);
+       spin_unlock_irqrestore(&bp->lock, flags);
+
+       return count;
+}
+static DEVICE_ATTR_RW(clock_source);
+
+static ssize_t
+available_clock_sources_show(struct device *dev,
+                            struct device_attribute *attr, char *buf)
+{
+       const char *clk;
+       ssize_t count;
+       int i;
+
+       count = 0;
+       for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) {
+               clk = ptp_ocp_clock[i].name;
+               count += sysfs_emit_at(buf, count, "%s ", clk);
+       }
+       if (count)
+               count--;
+       count += sysfs_emit_at(buf, count, "\n");
+       return count;
+}
+static DEVICE_ATTR_RO(available_clock_sources);
+
+static struct attribute *timecard_attrs[] = {
+       &dev_attr_serialnum.attr,
+       &dev_attr_gnss_sync.attr,
+       &dev_attr_clock_source.attr,
+       &dev_attr_available_clock_sources.attr,
+       NULL,
+};
+ATTRIBUTE_GROUPS(timecard);
+
+static void
+ptp_ocp_dev_release(struct device *dev)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+
+       mutex_lock(&ptp_ocp_lock);
+       idr_remove(&ptp_ocp_idr, bp->id);
+       mutex_unlock(&ptp_ocp_lock);
+}
+
+static int
+ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev)
+{
+       int err;
+
+       mutex_lock(&ptp_ocp_lock);
+       err = idr_alloc(&ptp_ocp_idr, bp, 0, 0, GFP_KERNEL);
+       mutex_unlock(&ptp_ocp_lock);
+       if (err < 0) {
+               dev_err(&pdev->dev, "idr_alloc failed: %d\n", err);
+               return err;
+       }
+       bp->id = err;
+
+       bp->ptp_info = ptp_ocp_clock_info;
+       spin_lock_init(&bp->lock);
+       bp->gnss_port = -1;
+       bp->mac_port = -1;
        bp->pdev = pdev;
+
+       device_initialize(&bp->dev);
+       dev_set_name(&bp->dev, "ocp%d", bp->id);
+       bp->dev.class = &timecard_class;
+       bp->dev.parent = &pdev->dev;
+       bp->dev.release = ptp_ocp_dev_release;
+       dev_set_drvdata(&bp->dev, bp);
+
+       err = device_add(&bp->dev);
+       if (err) {
+               dev_err(&bp->dev, "device add failed: %d\n", err);
+               goto out;
+       }
+
        pci_set_drvdata(pdev, bp);
 
+       return 0;
+
+out:
+       ptp_ocp_dev_release(&bp->dev);
+       put_device(&bp->dev);
+       return err;
+}
+
+static void
+ptp_ocp_symlink(struct ptp_ocp *bp, struct device *child, const char *link)
+{
+       struct device *dev = &bp->dev;
+
+       if (sysfs_create_link(&dev->kobj, &child->kobj, link))
+               dev_err(dev, "%s symlink failed\n", link);
+}
+
+static void
+ptp_ocp_link_child(struct ptp_ocp *bp, const char *name, const char *link)
+{
+       struct device *dev, *child;
+
+       dev = &bp->pdev->dev;
+
+       child = device_find_child_by_name(dev, name);
+       if (!child) {
+               dev_err(dev, "Could not find device %s\n", name);
+               return;
+       }
+
+       ptp_ocp_symlink(bp, child, link);
+       put_device(child);
+}
+
+static int
+ptp_ocp_complete(struct ptp_ocp *bp)
+{
+       struct pps_device *pps;
+       char buf[32];
+
+       if (bp->gnss_port != -1) {
+               sprintf(buf, "ttyS%d", bp->gnss_port);
+               ptp_ocp_link_child(bp, buf, "ttyGNSS");
+       }
+       if (bp->mac_port != -1) {
+               sprintf(buf, "ttyS%d", bp->mac_port);
+               ptp_ocp_link_child(bp, buf, "ttyMAC");
+       }
+       sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp));
+       ptp_ocp_link_child(bp, buf, "ptp");
+
+       pps = pps_lookup_dev(bp->ptp);
+       if (pps)
+               ptp_ocp_symlink(bp, pps->dev, "pps");
+
+       if (device_add_groups(&bp->dev, timecard_groups))
+               pr_err("device add groups failed\n");
+
+       return 0;
+}
+
+static void
+ptp_ocp_resource_summary(struct ptp_ocp *bp)
+{
+       struct device *dev = &bp->pdev->dev;
+
+       if (bp->image) {
+               u32 ver = ioread32(&bp->image->version);
+
+               dev_info(dev, "version %x\n", ver);
+               if (ver & 0xffff)
+                       dev_info(dev, "regular image, version %d\n",
+                                ver & 0xffff);
+               else
+                       dev_info(dev, "golden image, version %d\n",
+                                ver >> 16);
+       }
+       if (bp->gnss_port != -1)
+               dev_info(dev, "GNSS @ /dev/ttyS%d 115200\n", bp->gnss_port);
+       if (bp->mac_port != -1)
+               dev_info(dev, "MAC @ /dev/ttyS%d   57600\n", bp->mac_port);
+}
+
+static void
+ptp_ocp_detach_sysfs(struct ptp_ocp *bp)
+{
+       struct device *dev = &bp->dev;
+
+       sysfs_remove_link(&dev->kobj, "ttyGNSS");
+       sysfs_remove_link(&dev->kobj, "ttyMAC");
+       sysfs_remove_link(&dev->kobj, "ptp");
+       sysfs_remove_link(&dev->kobj, "pps");
+       device_remove_groups(dev, timecard_groups);
+}
+
+static void
+ptp_ocp_detach(struct ptp_ocp *bp)
+{
+       ptp_ocp_detach_sysfs(bp);
+       if (timer_pending(&bp->watchdog))
+               del_timer_sync(&bp->watchdog);
+       if (bp->ts0)
+               ptp_ocp_unregister_ext(bp->ts0);
+       if (bp->ts1)
+               ptp_ocp_unregister_ext(bp->ts1);
+       if (bp->pps)
+               ptp_ocp_unregister_ext(bp->pps);
+       if (bp->gnss_port != -1)
+               serial8250_unregister_port(bp->gnss_port);
+       if (bp->mac_port != -1)
+               serial8250_unregister_port(bp->mac_port);
+       if (bp->spi_flash)
+               platform_device_unregister(bp->spi_flash);
+       if (bp->i2c_ctrl)
+               platform_device_unregister(bp->i2c_ctrl);
+       if (bp->i2c_clk)
+               clk_hw_unregister_fixed_rate(bp->i2c_clk);
+       if (bp->n_irqs)
+               pci_free_irq_vectors(bp->pdev);
+       if (bp->ptp)
+               ptp_clock_unregister(bp->ptp);
+       device_unregister(&bp->dev);
+}
+
+static int
+ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct devlink *devlink;
+       struct ptp_ocp *bp;
+       int err;
+
+       devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev);
+       if (!devlink) {
+               dev_err(&pdev->dev, "devlink_alloc failed\n");
+               return -ENOMEM;
+       }
+
+       err = devlink_register(devlink);
+       if (err)
+               goto out_free;
+
        err = pci_enable_device(pdev);
        if (err) {
                dev_err(&pdev->dev, "pci_enable_device\n");
-               goto out_free;
+               goto out_unregister;
        }
 
-       err = pci_request_regions(pdev, KBUILD_MODNAME);
-       if (err) {
-               dev_err(&pdev->dev, "pci_request_region\n");
+       bp = devlink_priv(devlink);
+       err = ptp_ocp_device_init(bp, pdev);
+       if (err)
                goto out_disable;
-       }
 
-       bp->base = pci_ioremap_bar(pdev, 0);
-       if (!bp->base) {
-               dev_err(&pdev->dev, "io_remap bar0\n");
-               err = -ENOMEM;
-               goto out_release_regions;
+       /* compat mode.
+        * Older FPGA firmware only returns 2 irq's.
+        * allow this - if not all of the IRQ's are returned, skip the
+        * extra devices and just register the clock.
+        */
+       err = pci_alloc_irq_vectors(pdev, 1, 10, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+       if (err < 0) {
+               dev_err(&pdev->dev, "alloc_irq_vectors err: %d\n", err);
+               goto out;
        }
-       bp->reg = bp->base + OCP_REGISTER_OFFSET;
-       bp->tod = bp->base + TOD_REGISTER_OFFSET;
-       bp->ptp_info = ptp_ocp_clock_info;
-       spin_lock_init(&bp->lock);
+       bp->n_irqs = err;
+       pci_set_master(pdev);
 
-       err = ptp_ocp_check_clock(bp);
+       err = ptp_ocp_register_resources(bp, id->driver_data);
        if (err)
                goto out;
 
        bp->ptp = ptp_clock_register(&bp->ptp_info, &pdev->dev);
        if (IS_ERR(bp->ptp)) {
-               dev_err(&pdev->dev, "ptp_clock_register\n");
                err = PTR_ERR(bp->ptp);
+               dev_err(&pdev->dev, "ptp_clock_register: %d\n", err);
+               bp->ptp = NULL;
                goto out;
        }
 
+       err = ptp_ocp_complete(bp);
+       if (err)
+               goto out;
+
        ptp_ocp_info(bp);
+       ptp_ocp_resource_summary(bp);
 
        return 0;
 
 out:
-       pci_iounmap(pdev, bp->base);
-out_release_regions:
-       pci_release_regions(pdev);
+       ptp_ocp_detach(bp);
+       pci_set_drvdata(pdev, NULL);
 out_disable:
        pci_disable_device(pdev);
+out_unregister:
+       devlink_unregister(devlink);
 out_free:
-       kfree(bp);
+       devlink_free(devlink);
 
        return err;
 }
@@ -362,13 +1491,14 @@ static void
 ptp_ocp_remove(struct pci_dev *pdev)
 {
        struct ptp_ocp *bp = pci_get_drvdata(pdev);
+       struct devlink *devlink = priv_to_devlink(bp);
 
-       ptp_clock_unregister(bp->ptp);
-       pci_iounmap(pdev, bp->base);
-       pci_release_regions(pdev);
-       pci_disable_device(pdev);
+       ptp_ocp_detach(bp);
        pci_set_drvdata(pdev, NULL);
-       kfree(bp);
+       pci_disable_device(pdev);
+
+       devlink_unregister(devlink);
+       devlink_free(devlink);
 }
 
 static struct pci_driver ptp_ocp_driver = {
@@ -378,19 +1508,84 @@ static struct pci_driver ptp_ocp_driver = {
        .remove         = ptp_ocp_remove,
 };
 
+static int
+ptp_ocp_i2c_notifier_call(struct notifier_block *nb,
+                         unsigned long action, void *data)
+{
+       struct device *dev, *child = data;
+       struct ptp_ocp *bp;
+       bool add;
+
+       switch (action) {
+       case BUS_NOTIFY_ADD_DEVICE:
+       case BUS_NOTIFY_DEL_DEVICE:
+               add = action == BUS_NOTIFY_ADD_DEVICE;
+               break;
+       default:
+               return 0;
+       }
+
+       if (!i2c_verify_adapter(child))
+               return 0;
+
+       dev = child;
+       while ((dev = dev->parent))
+               if (dev->driver && !strcmp(dev->driver->name, KBUILD_MODNAME))
+                       goto found;
+       return 0;
+
+found:
+       bp = dev_get_drvdata(dev);
+       if (add)
+               ptp_ocp_symlink(bp, child, "i2c");
+       else
+               sysfs_remove_link(&bp->dev.kobj, "i2c");
+
+       return 0;
+}
+
+static struct notifier_block ptp_ocp_i2c_notifier = {
+       .notifier_call = ptp_ocp_i2c_notifier_call,
+};
+
 static int __init
 ptp_ocp_init(void)
 {
+       const char *what;
        int err;
 
+       what = "timecard class";
+       err = class_register(&timecard_class);
+       if (err)
+               goto out;
+
+       what = "i2c notifier";
+       err = bus_register_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
+       if (err)
+               goto out_notifier;
+
+       what = "ptp_ocp driver";
        err = pci_register_driver(&ptp_ocp_driver);
+       if (err)
+               goto out_register;
+
+       return 0;
+
+out_register:
+       bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
+out_notifier:
+       class_unregister(&timecard_class);
+out:
+       pr_err(KBUILD_MODNAME ": failed to register %s: %d\n", what, err);
        return err;
 }
 
 static void __exit
 ptp_ocp_fini(void)
 {
+       bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
        pci_unregister_driver(&ptp_ocp_driver);
+       class_unregister(&timecard_class);
 }
 
 module_init(ptp_ocp_init);
index e0f87c5..baee037 100644 (file)
@@ -149,6 +149,7 @@ void ptp_vclock_unregister(struct ptp_vclock *vclock)
        kfree(vclock);
 }
 
+#if IS_BUILTIN(CONFIG_PTP_1588_CLOCK)
 int ptp_get_vclocks_index(int pclock_index, int **vclock_index)
 {
        char name[PTP_CLOCK_NAME_LEN] = "";
@@ -217,3 +218,4 @@ void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
        hwtstamps->hwtstamp = ns_to_ktime(ns);
 }
 EXPORT_SYMBOL(ptp_convert_timestamp);
+#endif
index 9748165..acbe76a 100644 (file)
@@ -503,28 +503,6 @@ void ccwgroup_driver_unregister(struct ccwgroup_driver *cdriver)
 }
 EXPORT_SYMBOL(ccwgroup_driver_unregister);
 
-/**
- * get_ccwgroupdev_by_busid() - obtain device from a bus id
- * @gdrv: driver the device is owned by
- * @bus_id: bus id of the device to be searched
- *
- * This function searches all devices owned by @gdrv for a device with a bus
- * id matching @bus_id.
- * Returns:
- *  If a match is found, its reference count of the found device is increased
- *  and it is returned; else %NULL is returned.
- */
-struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv,
-                                                char *bus_id)
-{
-       struct device *dev;
-
-       dev = driver_find_device_by_name(&gdrv->driver, bus_id);
-
-       return dev ? to_ccwgroupdev(dev) : NULL;
-}
-EXPORT_SYMBOL_GPL(get_ccwgroupdev_by_busid);
-
 /**
  * ccwgroup_probe_ccwdev() - probe function for slave devices
  * @cdev: ccw device to be probed
index bf236d4..9c67b97 100644 (file)
@@ -74,6 +74,7 @@ config QETH_L2
        def_tristate y
        prompt "qeth layer 2 device support"
        depends on QETH
+       depends on BRIDGE || BRIDGE=n
        help
          Select this option to be able to run qeth devices in layer 2 mode.
          To compile as a module, choose M. The module name is qeth_l2.
@@ -88,15 +89,6 @@ config QETH_L3
          To compile as a module choose M. The module name is qeth_l3.
          If unsure, choose Y.
 
-config QETH_OSN
-       def_bool !HAVE_MARCH_Z14_FEATURES
-       prompt "qeth OSN device support"
-       depends on QETH
-       help
-         This enables the qeth driver to support devices in OSN mode.
-         This feature will be removed in 2021.
-         If unsure, choose N.
-
 config QETH_OSX
        def_bool !HAVE_MARCH_Z15_FEATURES
        prompt "qeth OSX device support"
index 377e368..06281a0 100644 (file)
@@ -1444,7 +1444,7 @@ again:
                        if (do_debug_ccw)
                        ctcmpc_dumpit((char *)&ch->ccw[0],
                                        sizeof(struct ccw1) * 3);
-               dolock = !in_irq();
+               dolock = !in_hardirq();
                if (dolock)
                        spin_lock_irqsave(
                                get_ccwdev_lock(ch->cdev), saveflags);
index 19ee91a..f0436f5 100644 (file)
@@ -1773,7 +1773,7 @@ static void mpc_action_side_xid(fsm_instance *fsm, void *arg, int side)
        CTCM_D3_DUMP((char *)ch->xid, XID2_LENGTH);
        CTCM_D3_DUMP((char *)ch->xid_id, 4);
 
-       if (!in_irq()) {
+       if (!in_hardirq()) {
                         /* Such conditional locking is a known problem for
                          * sparse because its static undeterministic.
                          * Warnings should be ignored here. */
index f4d554e..535a60b 100644 (file)
@@ -259,22 +259,10 @@ struct qeth_hdr_layer2 {
        __u8 reserved2[16];
 } __attribute__ ((packed));
 
-struct qeth_hdr_osn {
-       __u8 id;
-       __u8 reserved;
-       __u16 seq_no;
-       __u16 reserved2;
-       __u16 control_flags;
-       __u16 pdu_length;
-       __u8 reserved3[18];
-       __u32 ccid;
-} __attribute__ ((packed));
-
 struct qeth_hdr {
        union {
                struct qeth_hdr_layer2 l2;
                struct qeth_hdr_layer3 l3;
-               struct qeth_hdr_osn    osn;
        } hdr;
 } __attribute__ ((packed));
 
@@ -341,7 +329,6 @@ enum qeth_header_ids {
        QETH_HEADER_TYPE_LAYER3 = 0x01,
        QETH_HEADER_TYPE_LAYER2 = 0x02,
        QETH_HEADER_TYPE_L3_TSO = 0x03,
-       QETH_HEADER_TYPE_OSN    = 0x04,
        QETH_HEADER_TYPE_L2_TSO = 0x06,
        QETH_HEADER_MASK_INVAL  = 0x80,
 };
@@ -779,18 +766,13 @@ enum qeth_threads {
        QETH_RECOVER_THREAD = 1,
 };
 
-struct qeth_osn_info {
-       int (*assist_cb)(struct net_device *dev, void *data);
-       int (*data_cb)(struct sk_buff *skb);
-};
-
 struct qeth_discipline {
-       const struct device_type *devtype;
        int (*setup) (struct ccwgroup_device *);
        void (*remove) (struct ccwgroup_device *);
        int (*set_online)(struct qeth_card *card, bool carrier_ok);
        void (*set_offline)(struct qeth_card *card);
-       int (*do_ioctl)(struct net_device *dev, struct ifreq *rq, int cmd);
+       int (*do_ioctl)(struct net_device *dev, struct ifreq *rq,
+                       void __user *data, int cmd);
        int (*control_event_handler)(struct qeth_card *card,
                                        struct qeth_ipa_cmd *cmd);
 };
@@ -865,7 +847,6 @@ struct qeth_card {
        /* QDIO buffer handling */
        struct qeth_qdio_info qdio;
        int read_or_write_problem;
-       struct qeth_osn_info osn_info;
        const struct qeth_discipline *discipline;
        atomic_t force_alloc_skb;
        struct service_level qeth_service_level;
@@ -1058,10 +1039,7 @@ int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb);
 extern const struct qeth_discipline qeth_l2_discipline;
 extern const struct qeth_discipline qeth_l3_discipline;
 extern const struct ethtool_ops qeth_ethtool_ops;
-extern const struct ethtool_ops qeth_osn_ethtool_ops;
 extern const struct attribute_group *qeth_dev_groups[];
-extern const struct attribute_group *qeth_osn_dev_groups[];
-extern const struct device_type qeth_generic_devtype;
 
 const char *qeth_get_cardname_short(struct qeth_card *);
 int qeth_resize_buffer_pool(struct qeth_card *card, unsigned int count);
@@ -1069,11 +1047,9 @@ int qeth_setup_discipline(struct qeth_card *card, enum qeth_discipline_id disc);
 void qeth_remove_discipline(struct qeth_card *card);
 
 /* exports for qeth discipline device drivers */
-extern struct kmem_cache *qeth_core_header_cache;
 extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS];
 
 struct net_device *qeth_clone_netdev(struct net_device *orig);
-struct qeth_card *qeth_get_card_by_busid(char *bus_id);
 void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads,
                              int clear_start_mask);
 int qeth_threads_running(struct qeth_card *, unsigned long);
@@ -1088,9 +1064,6 @@ struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card,
                                           enum qeth_ipa_cmds cmd_code,
                                           enum qeth_prot_versions prot,
                                           unsigned int data_length);
-struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
-                                      unsigned int length, unsigned int ccws,
-                                      long timeout);
 struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
                                                 enum qeth_ipa_funcs ipa_func,
                                                 u16 cmd_code,
@@ -1099,18 +1072,12 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
 struct qeth_cmd_buffer *qeth_get_diag_cmd(struct qeth_card *card,
                                          enum qeth_diags_cmds sub_cmd,
                                          unsigned int data_length);
-void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason);
-void qeth_put_cmd(struct qeth_cmd_buffer *iob);
 
 int qeth_schedule_recovery(struct qeth_card *card);
 int qeth_poll(struct napi_struct *napi, int budget);
 void qeth_setadp_promisc_mode(struct qeth_card *card, bool enable);
 int qeth_setadpparms_change_macaddr(struct qeth_card *);
 void qeth_tx_timeout(struct net_device *, unsigned int txqueue);
-void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
-                         u16 cmd_length,
-                         bool (*match)(struct qeth_cmd_buffer *iob,
-                                       struct qeth_cmd_buffer *reply));
 int qeth_query_switch_attributes(struct qeth_card *card,
                                  struct qeth_switch_info *sw_info);
 int qeth_query_card_info(struct qeth_card *card,
@@ -1118,12 +1085,9 @@ int qeth_query_card_info(struct qeth_card *card,
 int qeth_setadpparms_set_access_ctrl(struct qeth_card *card,
                                     enum qeth_ipa_isolation_modes mode);
 
-unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset);
-int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
-                       struct sk_buff *skb, struct qeth_hdr *hdr,
-                       unsigned int offset, unsigned int hd_len,
-                       int elements_needed);
 int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+int qeth_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                       void __user *data, int cmd);
 void qeth_dbf_longtext(debug_info_t *id, int level, char *text, ...);
 int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
 int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
@@ -1148,11 +1112,4 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
                                  struct qeth_hdr *hdr, struct sk_buff *skb,
                                  __be16 proto, unsigned int data_len));
 
-/* exports for OSN */
-int qeth_osn_assist(struct net_device *, void *, int);
-int qeth_osn_register(unsigned char *read_dev_no, struct net_device **,
-               int (*assist_cb)(struct net_device *, void *),
-               int (*data_cb)(struct sk_buff *));
-void qeth_osn_deregister(struct net_device *);
-
 #endif /* __QETH_CORE_H__ */
index 62f88cc..5b973f3 100644 (file)
@@ -57,8 +57,7 @@ struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS] = {
 };
 EXPORT_SYMBOL_GPL(qeth_dbf);
 
-struct kmem_cache *qeth_core_header_cache;
-EXPORT_SYMBOL_GPL(qeth_core_header_cache);
+static struct kmem_cache *qeth_core_header_cache;
 static struct kmem_cache *qeth_qdio_outbuf_cache;
 
 static struct device *qeth_core_root_dev;
@@ -101,8 +100,6 @@ static const char *qeth_get_cardname(struct qeth_card *card)
                        return " OSD Express";
                case QETH_CARD_TYPE_IQD:
                        return " HiperSockets";
-               case QETH_CARD_TYPE_OSN:
-                       return " OSN QDIO";
                case QETH_CARD_TYPE_OSM:
                        return " OSM QDIO";
                case QETH_CARD_TYPE_OSX:
@@ -157,8 +154,6 @@ const char *qeth_get_cardname_short(struct qeth_card *card)
                        }
                case QETH_CARD_TYPE_IQD:
                        return "HiperSockets";
-               case QETH_CARD_TYPE_OSN:
-                       return "OSN";
                case QETH_CARD_TYPE_OSM:
                        return "OSM_1000";
                case QETH_CARD_TYPE_OSX:
@@ -431,6 +426,13 @@ static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15,
        return n;
 }
 
+static void qeth_put_cmd(struct qeth_cmd_buffer *iob)
+{
+       if (refcount_dec_and_test(&iob->ref_count)) {
+               kfree(iob->data);
+               kfree(iob);
+       }
+}
 static void qeth_setup_ccw(struct ccw1 *ccw, u8 cmd_code, u8 flags, u32 len,
                           void *data)
 {
@@ -499,12 +501,11 @@ static void qeth_dequeue_cmd(struct qeth_card *card,
        spin_unlock_irq(&card->lock);
 }
 
-void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason)
+static void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason)
 {
        iob->rc = reason;
        complete(&iob->done);
 }
-EXPORT_SYMBOL_GPL(qeth_notify_cmd);
 
 static void qeth_flush_local_addrs4(struct qeth_card *card)
 {
@@ -781,10 +782,7 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
        QETH_CARD_TEXT(card, 5, "chkipad");
 
        if (IS_IPA_REPLY(cmd)) {
-               if (cmd->hdr.command != IPA_CMD_SETCCID &&
-                   cmd->hdr.command != IPA_CMD_DELCCID &&
-                   cmd->hdr.command != IPA_CMD_MODCCID &&
-                   cmd->hdr.command != IPA_CMD_SET_DIAG_ASS)
+               if (cmd->hdr.command != IPA_CMD_SET_DIAG_ASS)
                        qeth_issue_ipa_msg(cmd, cmd->hdr.return_code, card);
                return cmd;
        }
@@ -819,8 +817,6 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
                if (card->discipline->control_event_handler(card, cmd))
                        return cmd;
                return NULL;
-       case IPA_CMD_MODCCID:
-               return cmd;
        case IPA_CMD_REGISTER_LOCAL_ADDR:
                if (cmd->hdr.prot_version == QETH_PROT_IPV4)
                        qeth_add_local_addrs4(card, &cmd->data.local_addrs4);
@@ -877,15 +873,6 @@ static int qeth_check_idx_response(struct qeth_card *card,
        return 0;
 }
 
-void qeth_put_cmd(struct qeth_cmd_buffer *iob)
-{
-       if (refcount_dec_and_test(&iob->ref_count)) {
-               kfree(iob->data);
-               kfree(iob);
-       }
-}
-EXPORT_SYMBOL_GPL(qeth_put_cmd);
-
 static void qeth_release_buffer_cb(struct qeth_card *card,
                                   struct qeth_cmd_buffer *iob,
                                   unsigned int data_length)
@@ -899,9 +886,9 @@ static void qeth_cancel_cmd(struct qeth_cmd_buffer *iob, int rc)
        qeth_put_cmd(iob);
 }
 
-struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
-                                      unsigned int length, unsigned int ccws,
-                                      long timeout)
+static struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
+                                             unsigned int length,
+                                             unsigned int ccws, long timeout)
 {
        struct qeth_cmd_buffer *iob;
 
@@ -927,7 +914,6 @@ struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
        iob->length = length;
        return iob;
 }
-EXPORT_SYMBOL_GPL(qeth_alloc_cmd);
 
 static void qeth_issue_next_read_cb(struct qeth_card *card,
                                    struct qeth_cmd_buffer *iob,
@@ -958,11 +944,6 @@ static void qeth_issue_next_read_cb(struct qeth_card *card,
                cmd = qeth_check_ipa_data(card, cmd);
                if (!cmd)
                        goto out;
-               if (IS_OSN(card) && card->osn_info.assist_cb &&
-                   cmd->hdr.command != IPA_CMD_STARTLAN) {
-                       card->osn_info.assist_cb(card->dev, cmd);
-                       goto out;
-               }
        }
 
        /* match against pending cmd requests */
@@ -1835,7 +1816,7 @@ static enum qeth_discipline_id qeth_enforce_discipline(struct qeth_card *card)
 {
        enum qeth_discipline_id disc = QETH_DISCIPLINE_UNDETERMINED;
 
-       if (IS_OSM(card) || IS_OSN(card))
+       if (IS_OSM(card))
                disc = QETH_DISCIPLINE_LAYER2;
        else if (IS_VM_NIC(card))
                disc = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 :
@@ -1885,7 +1866,6 @@ static void qeth_idx_init(struct qeth_card *card)
                card->info.func_level = QETH_IDX_FUNC_LEVEL_IQD;
                break;
        case QETH_CARD_TYPE_OSD:
-       case QETH_CARD_TYPE_OSN:
                card->info.func_level = QETH_IDX_FUNC_LEVEL_OSD;
                break;
        default:
@@ -2442,9 +2422,7 @@ static int qeth_ulp_enable_cb(struct qeth_card *card, struct qeth_reply *reply,
 
 static u8 qeth_mpc_select_prot_type(struct qeth_card *card)
 {
-       if (IS_OSN(card))
-               return QETH_PROT_OSN2;
-       return IS_LAYER2(card) ? QETH_PROT_LAYER2 : QETH_PROT_TCPIP;
+       return IS_LAYER2(card) ? QETH_MPC_PROT_L2 : QETH_MPC_PROT_L3;
 }
 
 static int qeth_ulp_enable(struct qeth_card *card)
@@ -3000,10 +2978,8 @@ static void qeth_ipa_finalize_cmd(struct qeth_card *card,
        __ipa_cmd(iob)->hdr.seqno = card->seqno.ipa++;
 }
 
-void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
-                         u16 cmd_length,
-                         bool (*match)(struct qeth_cmd_buffer *iob,
-                                       struct qeth_cmd_buffer *reply))
+static void qeth_prepare_ipa_cmd(struct qeth_card *card,
+                                struct qeth_cmd_buffer *iob, u16 cmd_length)
 {
        u8 prot_type = qeth_mpc_select_prot_type(card);
        u16 total_length = iob->length;
@@ -3011,7 +2987,6 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
        qeth_setup_ccw(__ccw_from_cmd(iob), CCW_CMD_WRITE, 0, total_length,
                       iob->data);
        iob->finalize = qeth_ipa_finalize_cmd;
-       iob->match = match;
 
        memcpy(iob->data, IPA_PDU_HEADER, IPA_PDU_HEADER_SIZE);
        memcpy(QETH_IPA_PDU_LEN_TOTAL(iob->data), &total_length, 2);
@@ -3022,7 +2997,6 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
               &card->token.ulp_connection_r, QETH_MPC_TOKEN_LENGTH);
        memcpy(QETH_IPA_PDU_LEN_PDU3(iob->data), &cmd_length, 2);
 }
-EXPORT_SYMBOL_GPL(qeth_prepare_ipa_cmd);
 
 static bool qeth_ipa_match_reply(struct qeth_cmd_buffer *iob,
                                 struct qeth_cmd_buffer *reply)
@@ -3046,7 +3020,8 @@ struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card,
        if (!iob)
                return NULL;
 
-       qeth_prepare_ipa_cmd(card, iob, data_length, qeth_ipa_match_reply);
+       qeth_prepare_ipa_cmd(card, iob, data_length);
+       iob->match = qeth_ipa_match_reply;
 
        hdr = &__ipa_cmd(iob)->hdr;
        hdr->command = cmd_code;
@@ -3894,7 +3869,8 @@ static int qeth_get_elements_for_frags(struct sk_buff *skb)
  * Returns the number of pages, and thus QDIO buffer elements, needed to map the
  * skb's data (both its linear part and paged fragments).
  */
-unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset)
+static unsigned int qeth_count_elements(struct sk_buff *skb,
+                                       unsigned int data_offset)
 {
        unsigned int elements = qeth_get_elements_for_frags(skb);
        addr_t end = (addr_t)skb->data + skb_headlen(skb);
@@ -3904,7 +3880,6 @@ unsigned int qeth_count_elements(struct sk_buff *skb, unsigned int data_offset)
                elements += qeth_get_elements_for_range(start, end);
        return elements;
 }
-EXPORT_SYMBOL_GPL(qeth_count_elements);
 
 #define QETH_HDR_CACHE_OBJ_SIZE                (sizeof(struct qeth_hdr_tso) + \
                                         MAX_TCP_HEADER)
@@ -4192,10 +4167,11 @@ static int __qeth_xmit(struct qeth_card *card, struct qeth_qdio_out_q *queue,
        return 0;
 }
 
-int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
-                       struct sk_buff *skb, struct qeth_hdr *hdr,
-                       unsigned int offset, unsigned int hd_len,
-                       int elements_needed)
+static int qeth_do_send_packet(struct qeth_card *card,
+                              struct qeth_qdio_out_q *queue,
+                              struct sk_buff *skb, struct qeth_hdr *hdr,
+                              unsigned int offset, unsigned int hd_len,
+                              unsigned int elements_needed)
 {
        unsigned int start_index = queue->next_buf_to_fill;
        struct qeth_qdio_out_buffer *buffer;
@@ -4275,7 +4251,6 @@ out:
                netif_tx_start_queue(txq);
        return rc;
 }
-EXPORT_SYMBOL_GPL(qeth_do_send_packet);
 
 static void qeth_fill_tso_ext(struct qeth_hdr_tso *hdr,
                              unsigned int payload_len, struct sk_buff *skb,
@@ -4554,7 +4529,6 @@ static int qeth_mdio_read(struct net_device *dev, int phy_id, int regnum)
        case MII_BMCR: /* Basic mode control register */
                rc = BMCR_FULLDPLX;
                if ((card->info.link_type != QETH_LINK_TYPE_GBIT_ETH) &&
-                   (card->info.link_type != QETH_LINK_TYPE_OSN) &&
                    (card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH) &&
                    (card->info.link_type != QETH_LINK_TYPE_25GBIT_ETH))
                        rc |= BMCR_SPEED100;
@@ -5266,10 +5240,6 @@ static struct ccw_device_id qeth_ids[] = {
                                        .driver_info = QETH_CARD_TYPE_OSD},
        {CCW_DEVICE_DEVTYPE(0x1731, 0x05, 0x1732, 0x05),
                                        .driver_info = QETH_CARD_TYPE_IQD},
-#ifdef CONFIG_QETH_OSN
-       {CCW_DEVICE_DEVTYPE(0x1731, 0x06, 0x1732, 0x06),
-                                       .driver_info = QETH_CARD_TYPE_OSN},
-#endif
        {CCW_DEVICE_DEVTYPE(0x1731, 0x02, 0x1732, 0x03),
                                        .driver_info = QETH_CARD_TYPE_OSM},
 #ifdef CONFIG_QETH_OSX
@@ -5628,14 +5598,6 @@ static void qeth_receive_skb(struct qeth_card *card, struct sk_buff *skb,
        bool is_cso;
 
        switch (hdr->hdr.l2.id) {
-       case QETH_HEADER_TYPE_OSN:
-               skb_push(skb, sizeof(*hdr));
-               skb_copy_to_linear_data(skb, hdr, sizeof(*hdr));
-               QETH_CARD_STAT_ADD(card, rx_bytes, skb->len);
-               QETH_CARD_STAT_INC(card, rx_packets);
-
-               card->osn_info.data_cb(skb);
-               return;
 #if IS_ENABLED(CONFIG_QETH_L3)
        case QETH_HEADER_TYPE_LAYER3:
                qeth_l3_rebuild_skb(card, skb, hdr);
@@ -5750,16 +5712,6 @@ next_packet:
                        linear_len = sizeof(struct iphdr);
                headroom = ETH_HLEN;
                break;
-       case QETH_HEADER_TYPE_OSN:
-               skb_len = hdr->hdr.osn.pdu_length;
-               if (!IS_OSN(card)) {
-                       QETH_CARD_STAT_INC(card, rx_dropped_notsupp);
-                       goto walk_packet;
-               }
-
-               linear_len = skb_len;
-               headroom = sizeof(struct qeth_hdr);
-               break;
        default:
                if (hdr->hdr.l2.id & QETH_HEADER_MASK_INVAL)
                        QETH_CARD_STAT_INC(card, rx_frame_errors);
@@ -5777,8 +5729,7 @@ next_packet:
 
        use_rx_sg = (card->options.cq == QETH_CQ_ENABLED) ||
                    (skb_len > READ_ONCE(priv->rx_copybreak) &&
-                    !atomic_read(&card->force_alloc_skb) &&
-                    !IS_OSN(card));
+                    !atomic_read(&card->force_alloc_skb));
 
        if (use_rx_sg) {
                /* QETH_CQ_ENABLED only: */
@@ -6335,14 +6286,9 @@ void qeth_remove_discipline(struct qeth_card *card)
        card->discipline = NULL;
 }
 
-const struct device_type qeth_generic_devtype = {
+static const struct device_type qeth_generic_devtype = {
        .name = "qeth_generic",
 };
-EXPORT_SYMBOL_GPL(qeth_generic_devtype);
-
-static const struct device_type qeth_osn_devtype = {
-       .name = "qeth_osn",
-};
 
 #define DBF_NAME_LEN   20
 
@@ -6425,10 +6371,6 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card)
        case QETH_CARD_TYPE_OSM:
                dev = alloc_etherdev(sizeof(*priv));
                break;
-       case QETH_CARD_TYPE_OSN:
-               dev = alloc_netdev(sizeof(*priv), "osn%d", NET_NAME_UNKNOWN,
-                                  ether_setup);
-               break;
        default:
                dev = alloc_etherdev_mqs(sizeof(*priv), QETH_MAX_OUT_QUEUES, 1);
        }
@@ -6442,23 +6384,19 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card)
 
        dev->ml_priv = card;
        dev->watchdog_timeo = QETH_TX_TIMEOUT;
-       dev->min_mtu = IS_OSN(card) ? 64 : 576;
+       dev->min_mtu = 576;
         /* initialized when device first goes online: */
        dev->max_mtu = 0;
        dev->mtu = 0;
        SET_NETDEV_DEV(dev, &card->gdev->dev);
        netif_carrier_off(dev);
 
-       if (IS_OSN(card)) {
-               dev->ethtool_ops = &qeth_osn_ethtool_ops;
-       } else {
-               dev->ethtool_ops = &qeth_ethtool_ops;
-               dev->priv_flags &= ~IFF_TX_SKB_SHARING;
-               dev->hw_features |= NETIF_F_SG;
-               dev->vlan_features |= NETIF_F_SG;
-               if (IS_IQD(card))
-                       dev->features |= NETIF_F_SG;
-       }
+       dev->ethtool_ops = &qeth_ethtool_ops;
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+       dev->hw_features |= NETIF_F_SG;
+       dev->vlan_features |= NETIF_F_SG;
+       if (IS_IQD(card))
+               dev->features |= NETIF_F_SG;
 
        return dev;
 }
@@ -6521,10 +6459,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
        if (rc)
                goto err_chp_desc;
 
-       if (IS_OSN(card))
-               gdev->dev.groups = qeth_osn_dev_groups;
-       else
-               gdev->dev.groups = qeth_dev_groups;
+       gdev->dev.groups = qeth_dev_groups;
 
        enforced_disc = qeth_enforce_discipline(card);
        switch (enforced_disc) {
@@ -6538,8 +6473,6 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
                if (rc)
                        goto err_setup_disc;
 
-               gdev->dev.type = IS_OSN(card) ? &qeth_osn_devtype :
-                                               card->discipline->devtype;
                break;
        }
 
@@ -6657,36 +6590,42 @@ static struct ccwgroup_driver qeth_core_ccwgroup_driver = {
        .shutdown = qeth_core_shutdown,
 };
 
-struct qeth_card *qeth_get_card_by_busid(char *bus_id)
-{
-       struct ccwgroup_device *gdev;
-       struct qeth_card *card;
-
-       gdev = get_ccwgroupdev_by_busid(&qeth_core_ccwgroup_driver, bus_id);
-       if (!gdev)
-               return NULL;
-
-       card = dev_get_drvdata(&gdev->dev);
-       put_device(&gdev->dev);
-       return card;
-}
-EXPORT_SYMBOL_GPL(qeth_get_card_by_busid);
-
-int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+int qeth_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd)
 {
        struct qeth_card *card = dev->ml_priv;
-       struct mii_ioctl_data *mii_data;
        int rc = 0;
 
        switch (cmd) {
        case SIOC_QETH_ADP_SET_SNMP_CONTROL:
-               rc = qeth_snmp_command(card, rq->ifr_ifru.ifru_data);
+               rc = qeth_snmp_command(card, data);
                break;
        case SIOC_QETH_GET_CARD_TYPE:
                if ((IS_OSD(card) || IS_OSM(card) || IS_OSX(card)) &&
                    !IS_VM_NIC(card))
                        return 1;
                return 0;
+       case SIOC_QETH_QUERY_OAT:
+               rc = qeth_query_oat_command(card, data);
+               break;
+       default:
+               if (card->discipline->do_ioctl)
+                       rc = card->discipline->do_ioctl(dev, rq, data, cmd);
+               else
+                       rc = -EOPNOTSUPP;
+       }
+       if (rc)
+               QETH_CARD_TEXT_(card, 2, "ioce%x", rc);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(qeth_siocdevprivate);
+
+int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+       struct qeth_card *card = dev->ml_priv;
+       struct mii_ioctl_data *mii_data;
+       int rc = 0;
+
+       switch (cmd) {
        case SIOCGMIIPHY:
                mii_data = if_mii(rq);
                mii_data->phy_id = 0;
@@ -6699,14 +6638,8 @@ int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
                        mii_data->val_out = qeth_mdio_read(dev,
                                mii_data->phy_id, mii_data->reg_num);
                break;
-       case SIOC_QETH_QUERY_OAT:
-               rc = qeth_query_oat_command(card, rq->ifr_ifru.ifru_data);
-               break;
        default:
-               if (card->discipline->do_ioctl)
-                       rc = card->discipline->do_ioctl(dev, rq, cmd);
-               else
-                       rc = -EOPNOTSUPP;
+               return -EOPNOTSUPP;
        }
        if (rc)
                QETH_CARD_TEXT_(card, 2, "ioce%x", rc);
index 68c2588..d9266f7 100644 (file)
@@ -232,9 +232,6 @@ static const struct ipa_cmd_names qeth_ipa_cmd_names[] = {
        {IPA_CMD_DELVLAN,       "delvlan"},
        {IPA_CMD_VNICC,         "vnic_characteristics"},
        {IPA_CMD_SETBRIDGEPORT_OSA,     "set_bridge_port(osa)"},
-       {IPA_CMD_SETCCID,       "setccid"},
-       {IPA_CMD_DELCCID,       "delccid"},
-       {IPA_CMD_MODCCID,       "modccid"},
        {IPA_CMD_SETIP,         "setip"},
        {IPA_CMD_QIPASSIST,     "qipassist"},
        {IPA_CMD_SETASSPARMS,   "setassparms"},
index e4bde7d..6257f00 100644 (file)
@@ -34,8 +34,6 @@ extern const unsigned char IPA_PDU_HEADER[];
 /*****************************************************************************/
 #define IPA_CMD_INITIATOR_HOST  0x00
 #define IPA_CMD_INITIATOR_OSA   0x01
-#define IPA_CMD_INITIATOR_HOST_REPLY  0x80
-#define IPA_CMD_INITIATOR_OSA_REPLY   0x81
 #define IPA_CMD_PRIM_VERSION_NO 0x01
 
 struct qeth_ipa_caps {
@@ -66,7 +64,6 @@ static inline bool qeth_ipa_caps_enabled(struct qeth_ipa_caps *caps, u32 mask)
 enum qeth_card_types {
        QETH_CARD_TYPE_OSD     = 1,
        QETH_CARD_TYPE_IQD     = 5,
-       QETH_CARD_TYPE_OSN     = 6,
        QETH_CARD_TYPE_OSM     = 3,
        QETH_CARD_TYPE_OSX     = 2,
 };
@@ -75,12 +72,6 @@ enum qeth_card_types {
 #define IS_OSD(card)   ((card)->info.type == QETH_CARD_TYPE_OSD)
 #define IS_OSM(card)   ((card)->info.type == QETH_CARD_TYPE_OSM)
 
-#ifdef CONFIG_QETH_OSN
-#define IS_OSN(card)   ((card)->info.type == QETH_CARD_TYPE_OSN)
-#else
-#define IS_OSN(card)   false
-#endif
-
 #ifdef CONFIG_QETH_OSX
 #define IS_OSX(card)   ((card)->info.type == QETH_CARD_TYPE_OSX)
 #else
@@ -95,7 +86,6 @@ enum qeth_link_types {
        QETH_LINK_TYPE_FAST_ETH     = 0x01,
        QETH_LINK_TYPE_HSTR         = 0x02,
        QETH_LINK_TYPE_GBIT_ETH     = 0x03,
-       QETH_LINK_TYPE_OSN          = 0x04,
        QETH_LINK_TYPE_10GBIT_ETH   = 0x10,
        QETH_LINK_TYPE_25GBIT_ETH   = 0x12,
        QETH_LINK_TYPE_LANE_ETH100  = 0x81,
@@ -126,9 +116,6 @@ enum qeth_ipa_cmds {
        IPA_CMD_DELVLAN                 = 0x26,
        IPA_CMD_VNICC                   = 0x2a,
        IPA_CMD_SETBRIDGEPORT_OSA       = 0x2b,
-       IPA_CMD_SETCCID                 = 0x41,
-       IPA_CMD_DELCCID                 = 0x42,
-       IPA_CMD_MODCCID                 = 0x43,
        IPA_CMD_SETIP                   = 0xb1,
        IPA_CMD_QIPASSIST               = 0xb2,
        IPA_CMD_SETASSPARMS             = 0xb3,
@@ -879,8 +866,7 @@ extern const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc);
 extern const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd);
 
 /* Helper functions */
-#define IS_IPA_REPLY(cmd) ((cmd->hdr.initiator == IPA_CMD_INITIATOR_HOST) || \
-                          (cmd->hdr.initiator == IPA_CMD_INITIATOR_OSA_REPLY))
+#define IS_IPA_REPLY(cmd) ((cmd)->hdr.initiator == IPA_CMD_INITIATOR_HOST)
 
 /*****************************************************************************/
 /* END OF   IP Assist related definitions                                    */
@@ -919,10 +905,9 @@ extern const unsigned char ULP_ENABLE[];
                (PDU_ENCAPSULATION(buffer) + 0x17)
 #define QETH_ULP_ENABLE_RESP_LINK_TYPE(buffer) \
                (PDU_ENCAPSULATION(buffer) + 0x2b)
-/* Layer 2 definitions */
-#define QETH_PROT_LAYER2 0x08
-#define QETH_PROT_TCPIP  0x03
-#define QETH_PROT_OSN2   0x0a
+
+#define QETH_MPC_PROT_L2       0x08
+#define QETH_MPC_PROT_L3       0x03
 #define QETH_ULP_ENABLE_PROT_TYPE(buffer) (buffer + 0x50)
 #define QETH_IPA_CMD_PROT_TYPE(buffer) (buffer + 0x19)
 
index 5815114..406be16 100644 (file)
@@ -671,11 +671,6 @@ static const struct attribute_group qeth_dev_group = {
        .attrs = qeth_dev_attrs,
 };
 
-const struct attribute_group *qeth_osn_dev_groups[] = {
-       &qeth_dev_group,
-       NULL,
-};
-
 const struct attribute_group *qeth_dev_groups[] = {
        &qeth_dev_group,
        &qeth_dev_extended_group,
index 2c4cb30..46d0fe0 100644 (file)
@@ -123,7 +123,9 @@ static void __qeth_set_coalesce(struct net_device *dev,
 }
 
 static int qeth_set_coalesce(struct net_device *dev,
-                            struct ethtool_coalesce *coal)
+                            struct ethtool_coalesce *coal,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct qeth_card *card = dev->ml_priv;
        struct qeth_qdio_out_q *queue;
@@ -469,10 +471,3 @@ const struct ethtool_ops qeth_ethtool_ops = {
        .set_per_queue_coalesce = qeth_set_per_queue_coalesce,
        .get_link_ksettings = qeth_get_link_ksettings,
 };
-
-const struct ethtool_ops qeth_osn_ethtool_ops = {
-       .get_strings = qeth_get_strings,
-       .get_ethtool_stats = qeth_get_ethtool_stats,
-       .get_sset_count = qeth_get_sset_count,
-       .get_drvinfo = qeth_get_drvinfo,
-};
index d7cdd9c..72e84ff 100644 (file)
@@ -309,17 +309,16 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
                /* fall back to alternative mechanism: */
        }
 
-       if (!IS_OSN(card)) {
-               rc = qeth_setadpparms_change_macaddr(card);
-               if (!rc)
-                       goto out;
-               QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n",
-                                CARD_DEVID(card), rc);
-               QETH_CARD_TEXT_(card, 2, "1err%04x", rc);
-               /* fall back once more: */
-       }
+       rc = qeth_setadpparms_change_macaddr(card);
+       if (!rc)
+               goto out;
+       QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n",
+                        CARD_DEVID(card), rc);
+       QETH_CARD_TEXT_(card, 2, "1err%04x", rc);
 
-       /* some devices don't support a custom MAC address: */
+       /* Fall back once more, but some devices don't support a custom MAC
+        * address:
+        */
        if (IS_OSM(card) || IS_OSX(card))
                return (rc) ? rc : -EADDRNOTAVAIL;
        eth_hw_addr_random(card->dev);
@@ -334,7 +333,7 @@ static void qeth_l2_register_dev_addr(struct qeth_card *card)
        if (!is_valid_ether_addr(card->dev->dev_addr))
                qeth_l2_request_initial_mac(card);
 
-       if (!IS_OSN(card) && !qeth_l2_send_setmac(card, card->dev->dev_addr))
+       if (!qeth_l2_send_setmac(card, card->dev->dev_addr))
                card->info.dev_addr_is_registered = 1;
        else
                card->info.dev_addr_is_registered = 0;
@@ -496,44 +495,6 @@ static void qeth_l2_rx_mode_work(struct work_struct *work)
        qeth_l2_set_promisc_mode(card);
 }
 
-static int qeth_l2_xmit_osn(struct qeth_card *card, struct sk_buff *skb,
-                           struct qeth_qdio_out_q *queue)
-{
-       gfp_t gfp = GFP_ATOMIC | (skb_pfmemalloc(skb) ? __GFP_MEMALLOC : 0);
-       struct qeth_hdr *hdr = (struct qeth_hdr *)skb->data;
-       addr_t end = (addr_t)(skb->data + sizeof(*hdr));
-       addr_t start = (addr_t)skb->data;
-       unsigned int elements = 0;
-       unsigned int hd_len = 0;
-       int rc;
-
-       if (skb->protocol == htons(ETH_P_IPV6))
-               return -EPROTONOSUPPORT;
-
-       if (qeth_get_elements_for_range(start, end) > 1) {
-               /* Misaligned HW header, move it to its own buffer element. */
-               hdr = kmem_cache_alloc(qeth_core_header_cache, gfp);
-               if (!hdr)
-                       return -ENOMEM;
-               hd_len = sizeof(*hdr);
-               skb_copy_from_linear_data(skb, (char *)hdr, hd_len);
-               elements++;
-       }
-
-       elements += qeth_count_elements(skb, hd_len);
-       if (elements > queue->max_elements) {
-               rc = -E2BIG;
-               goto out;
-       }
-
-       rc = qeth_do_send_packet(card, queue, skb, hdr, hd_len, hd_len,
-                                elements);
-out:
-       if (rc && hd_len)
-               kmem_cache_free(qeth_core_header_cache, hdr);
-       return rc;
-}
-
 static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
                                           struct net_device *dev)
 {
@@ -548,12 +509,8 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb,
                txq = qeth_iqd_translate_txq(dev, txq);
        queue = card->qdio.out_qs[txq];
 
-       if (IS_OSN(card))
-               rc = qeth_l2_xmit_osn(card, skb, queue);
-       else
-               rc = qeth_xmit(card, skb, queue, vlan_get_protocol(skb),
-                              qeth_l2_fill_header);
-
+       rc = qeth_xmit(card, skb, queue, vlan_get_protocol(skb),
+                      qeth_l2_fill_header);
        if (!rc)
                return NETDEV_TX_OK;
 
@@ -760,6 +717,227 @@ static int qeth_l2_dev2br_an_set(struct qeth_card *card, bool enable)
        return rc;
 }
 
+struct qeth_l2_br2dev_event_work {
+       struct work_struct work;
+       struct net_device *br_dev;
+       struct net_device *lsync_dev;
+       struct net_device *dst_dev;
+       unsigned long event;
+       unsigned char addr[ETH_ALEN];
+};
+
+static const struct net_device_ops qeth_l2_netdev_ops;
+
+static bool qeth_l2_must_learn(struct net_device *netdev,
+                              struct net_device *dstdev)
+{
+       struct qeth_priv *priv;
+
+       priv = netdev_priv(netdev);
+       return (netdev != dstdev &&
+               (priv->brport_features & BR_LEARNING_SYNC) &&
+               !(br_port_flag_is_set(netdev, BR_ISOLATED) &&
+                 br_port_flag_is_set(dstdev, BR_ISOLATED)) &&
+               netdev->netdev_ops == &qeth_l2_netdev_ops);
+}
+
+/**
+ *     qeth_l2_br2dev_worker() - update local MACs
+ *     @work: bridge to device FDB update
+ *
+ *     Update local MACs of a learning_sync bridgeport so it can receive
+ *     messages for a destination port.
+ *     In case of an isolated learning_sync port, also update its isolated
+ *     siblings.
+ */
+static void qeth_l2_br2dev_worker(struct work_struct *work)
+{
+       struct qeth_l2_br2dev_event_work *br2dev_event_work =
+               container_of(work, struct qeth_l2_br2dev_event_work, work);
+       struct net_device *lsyncdev = br2dev_event_work->lsync_dev;
+       struct net_device *dstdev = br2dev_event_work->dst_dev;
+       struct net_device *brdev = br2dev_event_work->br_dev;
+       unsigned long event = br2dev_event_work->event;
+       unsigned char *addr = br2dev_event_work->addr;
+       struct qeth_card *card = lsyncdev->ml_priv;
+       struct net_device *lowerdev;
+       struct list_head *iter;
+       int err = 0;
+
+       kfree(br2dev_event_work);
+       QETH_CARD_TEXT_(card, 4, "b2dw%04x", event);
+       QETH_CARD_TEXT_(card, 4, "ma%012lx", ether_addr_to_u64(addr));
+
+       rcu_read_lock();
+       /* Verify preconditions are still valid: */
+       if (!netif_is_bridge_port(lsyncdev) ||
+           brdev != netdev_master_upper_dev_get_rcu(lsyncdev))
+               goto unlock;
+       if (!qeth_l2_must_learn(lsyncdev, dstdev))
+               goto unlock;
+
+       if (br_port_flag_is_set(lsyncdev, BR_ISOLATED)) {
+               /* Update lsyncdev and its isolated sibling(s): */
+               iter = &brdev->adj_list.lower;
+               lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+               while (lowerdev) {
+                       if (br_port_flag_is_set(lowerdev, BR_ISOLATED)) {
+                               switch (event) {
+                               case SWITCHDEV_FDB_ADD_TO_DEVICE:
+                                       err = dev_uc_add(lowerdev, addr);
+                                       break;
+                               case SWITCHDEV_FDB_DEL_TO_DEVICE:
+                                       err = dev_uc_del(lowerdev, addr);
+                                       break;
+                               default:
+                                       break;
+                               }
+                               if (err) {
+                                       QETH_CARD_TEXT(card, 2, "b2derris");
+                                       QETH_CARD_TEXT_(card, 2,
+                                                       "err%02x%03d", event,
+                                                       lowerdev->ifindex);
+                               }
+                       }
+                       lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+               }
+       } else {
+               switch (event) {
+               case SWITCHDEV_FDB_ADD_TO_DEVICE:
+                       err = dev_uc_add(lsyncdev, addr);
+                       break;
+               case SWITCHDEV_FDB_DEL_TO_DEVICE:
+                       err = dev_uc_del(lsyncdev, addr);
+                       break;
+               default:
+                       break;
+               }
+               if (err)
+                       QETH_CARD_TEXT_(card, 2, "b2derr%02x", event);
+       }
+
+unlock:
+       rcu_read_unlock();
+       dev_put(brdev);
+       dev_put(lsyncdev);
+       dev_put(dstdev);
+}
+
+static int qeth_l2_br2dev_queue_work(struct net_device *brdev,
+                                    struct net_device *lsyncdev,
+                                    struct net_device *dstdev,
+                                    unsigned long event,
+                                    const unsigned char *addr)
+{
+       struct qeth_l2_br2dev_event_work *worker_data;
+       struct qeth_card *card;
+
+       worker_data = kzalloc(sizeof(*worker_data), GFP_ATOMIC);
+       if (!worker_data)
+               return -ENOMEM;
+       INIT_WORK(&worker_data->work, qeth_l2_br2dev_worker);
+       worker_data->br_dev = brdev;
+       worker_data->lsync_dev = lsyncdev;
+       worker_data->dst_dev = dstdev;
+       worker_data->event = event;
+       ether_addr_copy(worker_data->addr, addr);
+
+       card = lsyncdev->ml_priv;
+       /* Take a reference on the sw port devices and the bridge */
+       dev_hold(brdev);
+       dev_hold(lsyncdev);
+       dev_hold(dstdev);
+       queue_work(card->event_wq, &worker_data->work);
+       return 0;
+}
+
+/* Called under rtnl_lock */
+static int qeth_l2_switchdev_event(struct notifier_block *unused,
+                                  unsigned long event, void *ptr)
+{
+       struct net_device *dstdev, *brdev, *lowerdev;
+       struct switchdev_notifier_fdb_info *fdb_info;
+       struct switchdev_notifier_info *info = ptr;
+       struct list_head *iter;
+       struct qeth_card *card;
+       int rc;
+
+       if (!(event == SWITCHDEV_FDB_ADD_TO_DEVICE ||
+             event == SWITCHDEV_FDB_DEL_TO_DEVICE))
+               return NOTIFY_DONE;
+
+       dstdev = switchdev_notifier_info_to_dev(info);
+       brdev = netdev_master_upper_dev_get_rcu(dstdev);
+       if (!brdev || !netif_is_bridge_master(brdev))
+               return NOTIFY_DONE;
+       fdb_info = container_of(info,
+                               struct switchdev_notifier_fdb_info,
+                               info);
+       iter = &brdev->adj_list.lower;
+       lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+       while (lowerdev) {
+               if (qeth_l2_must_learn(lowerdev, dstdev)) {
+                       card = lowerdev->ml_priv;
+                       QETH_CARD_TEXT_(card, 4, "b2dqw%03x", event);
+                       rc = qeth_l2_br2dev_queue_work(brdev, lowerdev,
+                                                      dstdev, event,
+                                                      fdb_info->addr);
+                       if (rc) {
+                               QETH_CARD_TEXT(card, 2, "b2dqwerr");
+                               return NOTIFY_BAD;
+                       }
+               }
+               lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+       }
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block qeth_l2_sw_notifier = {
+               .notifier_call = qeth_l2_switchdev_event,
+};
+
+static refcount_t qeth_l2_switchdev_notify_refcnt;
+
+/* Called under rtnl_lock */
+static void qeth_l2_br2dev_get(void)
+{
+       int rc;
+
+       if (!refcount_inc_not_zero(&qeth_l2_switchdev_notify_refcnt)) {
+               rc = register_switchdev_notifier(&qeth_l2_sw_notifier);
+               if (rc) {
+                       QETH_DBF_MESSAGE(2,
+                                        "failed to register qeth_l2_sw_notifier: %d\n",
+                                        rc);
+               } else {
+                       refcount_set(&qeth_l2_switchdev_notify_refcnt, 1);
+                       QETH_DBF_MESSAGE(2, "qeth_l2_sw_notifier registered\n");
+               }
+       }
+       QETH_DBF_TEXT_(SETUP, 2, "b2d+%04d",
+                      qeth_l2_switchdev_notify_refcnt.refs.counter);
+}
+
+/* Called under rtnl_lock */
+static void qeth_l2_br2dev_put(void)
+{
+       int rc;
+
+       if (refcount_dec_and_test(&qeth_l2_switchdev_notify_refcnt)) {
+               rc = unregister_switchdev_notifier(&qeth_l2_sw_notifier);
+               if (rc) {
+                       QETH_DBF_MESSAGE(2,
+                                        "failed to unregister qeth_l2_sw_notifier: %d\n",
+                                        rc);
+               } else {
+                       QETH_DBF_MESSAGE(2,
+                                        "qeth_l2_sw_notifier unregistered\n");
+               }
+       }
+       QETH_DBF_TEXT_(SETUP, 2, "b2d-%04d",
+                      qeth_l2_switchdev_notify_refcnt.refs.counter);
+}
+
 static int qeth_l2_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
                                  struct net_device *dev, u32 filter_mask,
                                  int nlflags)
@@ -853,16 +1031,19 @@ static int qeth_l2_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
        } else if (enable) {
                qeth_l2_set_pnso_mode(card, QETH_PNSO_ADDR_INFO);
                rc = qeth_l2_dev2br_an_set(card, true);
-               if (rc)
+               if (rc) {
                        qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
-               else
+               } else {
                        priv->brport_features |= BR_LEARNING_SYNC;
+                       qeth_l2_br2dev_get();
+               }
        } else {
                rc = qeth_l2_dev2br_an_set(card, false);
                if (!rc) {
                        qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
                        priv->brport_features ^= BR_LEARNING_SYNC;
                        qeth_l2_dev2br_fdb_flush(card);
+                       qeth_l2_br2dev_put();
                }
        }
        mutex_unlock(&card->sbp_lock);
@@ -879,7 +1060,8 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
        .ndo_select_queue       = qeth_l2_select_queue,
        .ndo_validate_addr      = qeth_l2_validate_addr,
        .ndo_set_rx_mode        = qeth_l2_set_rx_mode,
-       .ndo_do_ioctl           = qeth_do_ioctl,
+       .ndo_eth_ioctl          = qeth_do_ioctl,
+       .ndo_siocdevprivate     = qeth_siocdevprivate,
        .ndo_set_mac_address    = qeth_l2_set_mac_address,
        .ndo_vlan_rx_add_vid    = qeth_l2_vlan_rx_add_vid,
        .ndo_vlan_rx_kill_vid   = qeth_l2_vlan_rx_kill_vid,
@@ -890,23 +1072,8 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
        .ndo_bridge_setlink     = qeth_l2_bridge_setlink,
 };
 
-static const struct net_device_ops qeth_osn_netdev_ops = {
-       .ndo_open               = qeth_open,
-       .ndo_stop               = qeth_stop,
-       .ndo_get_stats64        = qeth_get_stats64,
-       .ndo_start_xmit         = qeth_l2_hard_start_xmit,
-       .ndo_validate_addr      = eth_validate_addr,
-       .ndo_tx_timeout         = qeth_tx_timeout,
-};
-
 static int qeth_l2_setup_netdev(struct qeth_card *card)
 {
-       if (IS_OSN(card)) {
-               card->dev->netdev_ops = &qeth_osn_netdev_ops;
-               card->dev->flags |= IFF_NOARP;
-               goto add_napi;
-       }
-
        card->dev->needed_headroom = sizeof(struct qeth_hdr);
        card->dev->netdev_ops = &qeth_l2_netdev_ops;
        card->dev->priv_flags |= IFF_UNICAST_FLT;
@@ -952,7 +1119,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
                                       PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1));
        }
 
-add_napi:
        netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
        return register_netdev(card->dev);
 }
@@ -1044,84 +1210,6 @@ static void qeth_l2_enable_brport_features(struct qeth_card *card)
        }
 }
 
-#ifdef CONFIG_QETH_OSN
-static void qeth_osn_assist_cb(struct qeth_card *card,
-                              struct qeth_cmd_buffer *iob,
-                              unsigned int data_length)
-{
-       qeth_notify_cmd(iob, 0);
-       qeth_put_cmd(iob);
-}
-
-int qeth_osn_assist(struct net_device *dev, void *data, int data_len)
-{
-       struct qeth_cmd_buffer *iob;
-       struct qeth_card *card;
-
-       if (data_len < 0)
-               return -EINVAL;
-       if (!dev)
-               return -ENODEV;
-       card = dev->ml_priv;
-       if (!card)
-               return -ENODEV;
-       QETH_CARD_TEXT(card, 2, "osnsdmc");
-       if (!qeth_card_hw_is_reachable(card))
-               return -ENODEV;
-
-       iob = qeth_alloc_cmd(&card->write, IPA_PDU_HEADER_SIZE + data_len, 1,
-                            QETH_IPA_TIMEOUT);
-       if (!iob)
-               return -ENOMEM;
-
-       qeth_prepare_ipa_cmd(card, iob, (u16) data_len, NULL);
-
-       memcpy(__ipa_cmd(iob), data, data_len);
-       iob->callback = qeth_osn_assist_cb;
-       return qeth_send_ipa_cmd(card, iob, NULL, NULL);
-}
-EXPORT_SYMBOL(qeth_osn_assist);
-
-int qeth_osn_register(unsigned char *read_dev_no, struct net_device **dev,
-                 int (*assist_cb)(struct net_device *, void *),
-                 int (*data_cb)(struct sk_buff *))
-{
-       struct qeth_card *card;
-       char bus_id[16];
-       u16 devno;
-
-       memcpy(&devno, read_dev_no, 2);
-       sprintf(bus_id, "0.0.%04x", devno);
-       card = qeth_get_card_by_busid(bus_id);
-       if (!card || !IS_OSN(card))
-               return -ENODEV;
-       *dev = card->dev;
-
-       QETH_CARD_TEXT(card, 2, "osnreg");
-       if ((assist_cb == NULL) || (data_cb == NULL))
-               return -EINVAL;
-       card->osn_info.assist_cb = assist_cb;
-       card->osn_info.data_cb = data_cb;
-       return 0;
-}
-EXPORT_SYMBOL(qeth_osn_register);
-
-void qeth_osn_deregister(struct net_device *dev)
-{
-       struct qeth_card *card;
-
-       if (!dev)
-               return;
-       card = dev->ml_priv;
-       if (!card)
-               return;
-       QETH_CARD_TEXT(card, 2, "osndereg");
-       card->osn_info.assist_cb = NULL;
-       card->osn_info.data_cb = NULL;
-}
-EXPORT_SYMBOL(qeth_osn_deregister);
-#endif
-
 /* SETBRIDGEPORT support, async notifications */
 
 enum qeth_an_event_type {anev_reg_unreg, anev_abort, anev_reset};
@@ -2190,16 +2278,15 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
        struct qeth_card *card = dev_get_drvdata(&gdev->dev);
        int rc;
 
-       if (IS_OSN(card))
-               dev_notice(&gdev->dev, "OSN support will be dropped in 2021\n");
-
        qeth_l2_vnicc_set_defaults(card);
        mutex_init(&card->sbp_lock);
 
-       if (gdev->dev.type == &qeth_generic_devtype) {
+       if (gdev->dev.type) {
                rc = device_add_groups(&gdev->dev, qeth_l2_attr_groups);
                if (rc)
                        return rc;
+       } else {
+               gdev->dev.type = &qeth_l2_devtype;
        }
 
        INIT_WORK(&card->rx_mode_work, qeth_l2_rx_mode_work);
@@ -2209,9 +2296,11 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
 static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
 {
        struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+       struct qeth_priv *priv;
 
-       if (gdev->dev.type == &qeth_generic_devtype)
+       if (gdev->dev.type != &qeth_l2_devtype)
                device_remove_groups(&gdev->dev, qeth_l2_attr_groups);
+
        qeth_set_allowed_threads(card, 0, 1);
        wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
 
@@ -2219,8 +2308,15 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
                qeth_set_offline(card, card->discipline, false);
 
        cancel_work_sync(&card->close_dev_work);
-       if (card->dev->reg_state == NETREG_REGISTERED)
+       if (card->dev->reg_state == NETREG_REGISTERED) {
+               priv = netdev_priv(card->dev);
+               if (priv->brport_features & BR_LEARNING_SYNC) {
+                       rtnl_lock();
+                       qeth_l2_br2dev_put();
+                       rtnl_unlock();
+               }
                unregister_netdev(card->dev);
+       }
 }
 
 static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
@@ -2331,7 +2427,6 @@ static int qeth_l2_control_event(struct qeth_card *card,
 }
 
 const struct qeth_discipline qeth_l2_discipline = {
-       .devtype = &qeth_l2_devtype,
        .setup = qeth_l2_probe_device,
        .remove = qeth_l2_remove_device,
        .set_online = qeth_l2_set_online,
@@ -2344,6 +2439,7 @@ EXPORT_SYMBOL_GPL(qeth_l2_discipline);
 static int __init qeth_l2_init(void)
 {
        pr_info("register layer 2 discipline\n");
+       refcount_set(&qeth_l2_switchdev_notify_refcnt, 0);
        return 0;
 }
 
index f0d6f20..3a523e7 100644 (file)
@@ -1512,7 +1512,7 @@ static int qeth_l3_arp_flush_cache(struct qeth_card *card)
        return rc;
 }
 
-static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd)
 {
        struct qeth_card *card = dev->ml_priv;
        struct qeth_arp_cache_entry arp_entry;
@@ -1532,13 +1532,13 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
                        rc = -EPERM;
                        break;
                }
-               rc = qeth_l3_arp_query(card, rq->ifr_ifru.ifru_data);
+               rc = qeth_l3_arp_query(card, data);
                break;
        case SIOC_QETH_ARP_ADD_ENTRY:
        case SIOC_QETH_ARP_REMOVE_ENTRY:
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
-               if (copy_from_user(&arp_entry, rq->ifr_data, sizeof(arp_entry)))
+               if (copy_from_user(&arp_entry, data, sizeof(arp_entry)))
                        return -EFAULT;
 
                arp_cmd = (cmd == SIOC_QETH_ARP_ADD_ENTRY) ?
@@ -1841,7 +1841,8 @@ static const struct net_device_ops qeth_l3_netdev_ops = {
        .ndo_select_queue       = qeth_l3_iqd_select_queue,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = qeth_l3_set_rx_mode,
-       .ndo_do_ioctl           = qeth_do_ioctl,
+       .ndo_eth_ioctl          = qeth_do_ioctl,
+       .ndo_siocdevprivate     = qeth_siocdevprivate,
        .ndo_fix_features       = qeth_fix_features,
        .ndo_set_features       = qeth_set_features,
        .ndo_tx_timeout         = qeth_tx_timeout,
@@ -1856,7 +1857,8 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
        .ndo_select_queue       = qeth_l3_osa_select_queue,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_rx_mode        = qeth_l3_set_rx_mode,
-       .ndo_do_ioctl           = qeth_do_ioctl,
+       .ndo_eth_ioctl          = qeth_do_ioctl,
+       .ndo_siocdevprivate     = qeth_siocdevprivate,
        .ndo_fix_features       = qeth_fix_features,
        .ndo_set_features       = qeth_set_features,
        .ndo_tx_timeout         = qeth_tx_timeout,
@@ -1940,12 +1942,14 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev)
        if (!card->cmd_wq)
                return -ENOMEM;
 
-       if (gdev->dev.type == &qeth_generic_devtype) {
+       if (gdev->dev.type) {
                rc = device_add_groups(&gdev->dev, qeth_l3_attr_groups);
                if (rc) {
                        destroy_workqueue(card->cmd_wq);
                        return rc;
                }
+       } else {
+               gdev->dev.type = &qeth_l3_devtype;
        }
 
        INIT_WORK(&card->rx_mode_work, qeth_l3_rx_mode_work);
@@ -1956,7 +1960,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
 {
        struct qeth_card *card = dev_get_drvdata(&cgdev->dev);
 
-       if (cgdev->dev.type == &qeth_generic_devtype)
+       if (cgdev->dev.type != &qeth_l3_devtype)
                device_remove_groups(&cgdev->dev, qeth_l3_attr_groups);
 
        qeth_set_allowed_threads(card, 0, 1);
@@ -2065,7 +2069,6 @@ static int qeth_l3_control_event(struct qeth_card *card,
 }
 
 const struct qeth_discipline qeth_l3_discipline = {
-       .devtype = &qeth_l3_devtype,
        .setup = qeth_l3_probe_device,
        .remove = qeth_l3_remove_device,
        .set_online = qeth_l3_set_online,
index 8b0deec..63c8a0f 100644 (file)
@@ -2,6 +2,7 @@
 config SCSI_CXGB4_ISCSI
        tristate "Chelsio T4 iSCSI support"
        depends on PCI && INET && (IPV6 || IPV6=n)
+       depends on PTP_1588_CLOCK_OPTIONAL
        depends on THERMAL || !THERMAL
        depends on ETHERNET
        depends on TLS || TLS=n
index dcbba96..5d24c1b 100644 (file)
@@ -524,7 +524,7 @@ static const struct net_device_ops cvm_oct_npi_netdev_ops = {
        .ndo_start_xmit         = cvm_oct_xmit,
        .ndo_set_rx_mode        = cvm_oct_common_set_multicast_list,
        .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
-       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_eth_ioctl          = cvm_oct_ioctl,
        .ndo_change_mtu         = cvm_oct_common_change_mtu,
        .ndo_get_stats          = cvm_oct_common_get_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -540,7 +540,7 @@ static const struct net_device_ops cvm_oct_xaui_netdev_ops = {
        .ndo_start_xmit         = cvm_oct_xmit,
        .ndo_set_rx_mode        = cvm_oct_common_set_multicast_list,
        .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
-       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_eth_ioctl          = cvm_oct_ioctl,
        .ndo_change_mtu         = cvm_oct_common_change_mtu,
        .ndo_get_stats          = cvm_oct_common_get_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -556,7 +556,7 @@ static const struct net_device_ops cvm_oct_sgmii_netdev_ops = {
        .ndo_start_xmit         = cvm_oct_xmit,
        .ndo_set_rx_mode        = cvm_oct_common_set_multicast_list,
        .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
-       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_eth_ioctl          = cvm_oct_ioctl,
        .ndo_change_mtu         = cvm_oct_common_change_mtu,
        .ndo_get_stats          = cvm_oct_common_get_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -570,7 +570,7 @@ static const struct net_device_ops cvm_oct_spi_netdev_ops = {
        .ndo_start_xmit         = cvm_oct_xmit,
        .ndo_set_rx_mode        = cvm_oct_common_set_multicast_list,
        .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
-       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_eth_ioctl          = cvm_oct_ioctl,
        .ndo_change_mtu         = cvm_oct_common_change_mtu,
        .ndo_get_stats          = cvm_oct_common_get_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -586,7 +586,7 @@ static const struct net_device_ops cvm_oct_rgmii_netdev_ops = {
        .ndo_start_xmit         = cvm_oct_xmit,
        .ndo_set_rx_mode        = cvm_oct_common_set_multicast_list,
        .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
-       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_eth_ioctl          = cvm_oct_ioctl,
        .ndo_change_mtu         = cvm_oct_common_change_mtu,
        .ndo_get_stats          = cvm_oct_common_get_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -599,7 +599,7 @@ static const struct net_device_ops cvm_oct_pow_netdev_ops = {
        .ndo_start_xmit         = cvm_oct_xmit_pow,
        .ndo_set_rx_mode        = cvm_oct_common_set_multicast_list,
        .ndo_set_mac_address    = cvm_oct_common_set_mac_address,
-       .ndo_do_ioctl           = cvm_oct_ioctl,
+       .ndo_eth_ioctl          = cvm_oct_ioctl,
        .ndo_change_mtu         = cvm_oct_common_change_mtu,
        .ndo_get_stats          = cvm_oct_common_get_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
index 87d6011..12efcd1 100644 (file)
@@ -621,7 +621,10 @@ static void qlge_get_regs(struct net_device *ndev,
                regs->len = sizeof(struct qlge_reg_dump);
 }
 
-static int qlge_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *c)
+static int qlge_get_coalesce(struct net_device *ndev,
+                            struct ethtool_coalesce *c,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct qlge_adapter *qdev = netdev_to_qdev(ndev);
 
@@ -644,7 +647,10 @@ static int qlge_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *c
        return 0;
 }
 
-static int qlge_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *c)
+static int qlge_set_coalesce(struct net_device *ndev,
+                            struct ethtool_coalesce *c,
+                            struct kernel_ethtool_coalesce *kernel_coal,
+                            struct netlink_ext_ack *extack)
 {
        struct qlge_adapter *qdev = netdev_to_qdev(ndev);
 
index 19a02e9..8fcdf89 100644 (file)
@@ -4547,7 +4547,8 @@ static int qlge_probe(struct pci_dev *pdev,
        static int cards_found;
        int err;
 
-       devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter));
+       devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter),
+                               &pdev->dev);
        if (!devlink)
                return -ENOMEM;
 
@@ -4613,7 +4614,7 @@ static int qlge_probe(struct pci_dev *pdev,
                goto netdev_free;
        }
 
-       err = devlink_register(devlink, &pdev->dev);
+       err = devlink_register(devlink);
        if (err)
                goto netdev_free;
 
index 5012b91..34decb0 100644 (file)
@@ -22,6 +22,8 @@ void rtw_stop_drv_threads(struct adapter *padapter);
 void rtw_cancel_all_timer(struct adapter *padapter);
 
 int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+int rtw_android_priv_cmd(struct net_device *dev, struct ifreq *rq,
+                        void __user *data, int cmd);
 
 struct net_device *rtw_init_netdev(void);
 u16 rtw_recv_select_queue(struct sk_buff *skb);
index 2c26993..3018fc1 100644 (file)
@@ -45,6 +45,7 @@ enum ANDROID_WIFI_CMD {
        ANDROID_WIFI_CMD_MAX
 };
 
-int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr, int cmd);
+int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr,
+                        void __user *data, int cmd);
 
 #endif /* __RTW_ANDROID_H__ */
index b958a8d..193a3dd 100644 (file)
@@ -2769,9 +2769,6 @@ int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
                ret = rtw_hostapd_ioctl(dev, &wrq->u.data);
                break;
 #endif /*  CONFIG_88EU_AP_MODE */
-       case (SIOCDEVPRIVATE + 1):
-               ret = rtw_android_priv_cmd(dev, rq, cmd);
-               break;
        default:
                ret = -EOPNOTSUPP;
                break;
index 423c382..596e03e 100644 (file)
@@ -288,6 +288,7 @@ static const struct net_device_ops rtw_netdev_ops = {
        .ndo_set_mac_address = rtw_net_set_mac_address,
        .ndo_get_stats = rtw_net_get_stats,
        .ndo_do_ioctl = rtw_ioctl,
+       .ndo_siocdevprivate = rtw_android_priv_cmd,
 };
 
 static const struct device_type wlan_type = {
index 3c54469..a13df38 100644 (file)
@@ -5,6 +5,7 @@
  *
  ******************************************************************************/
 
+#include <linux/compat.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
 
@@ -116,7 +117,8 @@ static int android_get_p2p_addr(struct net_device *net, char *command,
        return ETH_ALEN;
 }
 
-int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr, int cmd)
+int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr,
+                        void __user *data, int cmd)
 {
        int ret = 0;
        char *command;
@@ -124,9 +126,15 @@ int rtw_android_priv_cmd(struct net_device *net, struct ifreq *ifr, int cmd)
        int bytes_written = 0;
        struct android_wifi_priv_cmd priv_cmd;
 
-       if (!ifr->ifr_data)
+       if (cmd != SIOCDEVPRIVATE)
+               return -EOPNOTSUPP;
+
+       if (in_compat_syscall()) /* to be implemented */
+               return -EOPNOTSUPP;
+
+       if (!data)
                return -EINVAL;
-       if (copy_from_user(&priv_cmd, ifr->ifr_data, sizeof(priv_cmd)))
+       if (copy_from_user(&priv_cmd, data, sizeof(priv_cmd)))
                return -EFAULT;
        if (priv_cmd.total_len < 1)
                return -EINVAL;
index 111e017..5badd44 100644 (file)
@@ -48,6 +48,8 @@ void rtw_stop_drv_threads(struct adapter *padapter);
 void rtw_cancel_all_timer(struct adapter *padapter);
 
 int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+int rtw_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                      void __user *data, int cmd);
 
 int rtw_init_netdev_name(struct net_device *pnetdev, const char *ifname);
 struct net_device *rtw_init_netdev(struct adapter *padapter);
index f95000d..aa7bd76 100644 (file)
@@ -4485,6 +4485,21 @@ exit:
        return err;
 }
 
+int rtw_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                      void __user *data, int cmd)
+{
+       struct iwreq *wrq = (struct iwreq *)rq;
+
+       /* little hope of fixing this, better remove the whole function */
+       if (in_compat_syscall())
+               return -EOPNOTSUPP;
+
+       if (cmd != SIOCDEVPRIVATE)
+               return -EOPNOTSUPP;
+
+       return rtw_ioctl_wext_private(dev, &wrq->u);
+}
+
 int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
        struct iwreq *wrq = (struct iwreq *)rq;
@@ -4497,9 +4512,6 @@ int rtw_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
        case RTL_IOCTL_HOSTAPD:
                ret = rtw_hostapd_ioctl(dev, &wrq->u.data);
                break;
-       case SIOCDEVPRIVATE:
-               ret = rtw_ioctl_wext_private(dev, &wrq->u);
-               break;
        default:
                ret = -EOPNOTSUPP;
                break;
index 648456b..9e38b53 100644 (file)
@@ -459,6 +459,7 @@ static const struct net_device_ops rtw_netdev_ops = {
        .ndo_set_mac_address = rtw_net_set_mac_address,
        .ndo_get_stats = rtw_net_get_stats,
        .ndo_do_ioctl = rtw_ioctl,
+       .ndo_siocdevprivate = rtw_siocdevprivate,
 };
 
 int rtw_init_netdev_name(struct net_device *pnetdev, const char *ifname)
index 6f470e7..1c62130 100644 (file)
@@ -98,8 +98,8 @@ static int p80211knetdev_stop(struct net_device *netdev);
 static netdev_tx_t p80211knetdev_hard_start_xmit(struct sk_buff *skb,
                                                 struct net_device *netdev);
 static void p80211knetdev_set_multicast_list(struct net_device *dev);
-static int p80211knetdev_do_ioctl(struct net_device *dev, struct ifreq *ifr,
-                                 int cmd);
+static int p80211knetdev_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                                       void __user *data, int cmd);
 static int p80211knetdev_set_mac_address(struct net_device *dev, void *addr);
 static void p80211knetdev_tx_timeout(struct net_device *netdev, unsigned int txqueue);
 static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc);
@@ -461,56 +461,8 @@ static void p80211knetdev_set_multicast_list(struct net_device *dev)
                wlandev->set_multicast_list(wlandev, dev);
 }
 
-#ifdef SIOCETHTOOL
-
-static int p80211netdev_ethtool(struct wlandevice *wlandev,
-                               void __user *useraddr)
-{
-       u32 ethcmd;
-       struct ethtool_drvinfo info;
-       struct ethtool_value edata;
-
-       memset(&info, 0, sizeof(info));
-       memset(&edata, 0, sizeof(edata));
-
-       if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
-               return -EFAULT;
-
-       switch (ethcmd) {
-       case ETHTOOL_GDRVINFO:
-               info.cmd = ethcmd;
-               snprintf(info.driver, sizeof(info.driver), "p80211_%s",
-                        wlandev->nsdname);
-               snprintf(info.version, sizeof(info.version), "%s",
-                        WLAN_RELEASE);
-
-               if (copy_to_user(useraddr, &info, sizeof(info)))
-                       return -EFAULT;
-               return 0;
-#ifdef ETHTOOL_GLINK
-       case ETHTOOL_GLINK:
-               edata.cmd = ethcmd;
-
-               if (wlandev->linkstatus &&
-                   (wlandev->macmode != WLAN_MACMODE_NONE)) {
-                       edata.data = 1;
-               } else {
-                       edata.data = 0;
-               }
-
-               if (copy_to_user(useraddr, &edata, sizeof(edata)))
-                       return -EFAULT;
-               return 0;
-#endif
-       }
-
-       return -EOPNOTSUPP;
-}
-
-#endif
-
 /*----------------------------------------------------------------
- * p80211knetdev_do_ioctl
+ * p80211knetdev_siocdevprivate
  *
  * Handle an ioctl call on one of our devices.  Everything Linux
  * ioctl specific is done here.  Then we pass the contents of the
@@ -537,8 +489,9 @@ static int p80211netdev_ethtool(struct wlandevice *wlandev,
  *     locks.
  *----------------------------------------------------------------
  */
-static int p80211knetdev_do_ioctl(struct net_device *dev,
-                                 struct ifreq *ifr, int cmd)
+static int p80211knetdev_siocdevprivate(struct net_device *dev,
+                                       struct ifreq *ifr,
+                                       void __user *data, int cmd)
 {
        int result = 0;
        struct p80211ioctl_req *req = (struct p80211ioctl_req *)ifr;
@@ -547,13 +500,8 @@ static int p80211knetdev_do_ioctl(struct net_device *dev,
 
        netdev_dbg(dev, "rx'd ioctl, cmd=%d, len=%d\n", cmd, req->len);
 
-#ifdef SIOCETHTOOL
-       if (cmd == SIOCETHTOOL) {
-               result =
-                   p80211netdev_ethtool(wlandev, (void __user *)ifr->ifr_data);
-               goto bail;
-       }
-#endif
+       if (in_compat_syscall())
+               return -EOPNOTSUPP;
 
        /* Test the magic, assume ifr is good if it's there */
        if (req->magic != P80211_IOCTL_MAGIC) {
@@ -569,7 +517,7 @@ static int p80211knetdev_do_ioctl(struct net_device *dev,
                goto bail;
        }
 
-       msgbuf = memdup_user(req->data, req->len);
+       msgbuf = memdup_user(data, req->len);
        if (IS_ERR(msgbuf)) {
                result = PTR_ERR(msgbuf);
                goto bail;
@@ -578,10 +526,8 @@ static int p80211knetdev_do_ioctl(struct net_device *dev,
        result = p80211req_dorequest(wlandev, msgbuf);
 
        if (result == 0) {
-               if (copy_to_user
-                   (req->data, msgbuf, req->len)) {
+               if (copy_to_user(data, msgbuf, req->len))
                        result = -EFAULT;
-               }
        }
        kfree(msgbuf);
 
@@ -682,7 +628,7 @@ static const struct net_device_ops p80211_netdev_ops = {
        .ndo_stop = p80211knetdev_stop,
        .ndo_start_xmit = p80211knetdev_hard_start_xmit,
        .ndo_set_rx_mode = p80211knetdev_set_multicast_list,
-       .ndo_do_ioctl = p80211knetdev_do_ioctl,
+       .ndo_siocdevprivate = p80211knetdev_siocdevprivate,
        .ndo_set_mac_address = p80211knetdev_set_mac_address,
        .ndo_tx_timeout = p80211knetdev_tx_timeout,
        .ndo_validate_addr = eth_validate_addr,
index 5bb928b..3e3b887 100644 (file)
@@ -1524,11 +1524,11 @@ static int hdlcdev_close(struct net_device *dev)
  *
  * Return: 0 if success, otherwise error code
  */
-static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+static int hdlcdev_ioctl(struct net_device *dev, struct if_settings *ifs)
 {
        const size_t size = sizeof(sync_serial_settings);
        sync_serial_settings new_line;
-       sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
+       sync_serial_settings __user *line = ifs->ifs_ifsu.sync;
        struct slgt_info *info = dev_to_port(dev);
        unsigned int flags;
 
@@ -1538,17 +1538,14 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        if (info->port.count)
                return -EBUSY;
 
-       if (cmd != SIOCWANDEV)
-               return hdlc_ioctl(dev, ifr, cmd);
-
        memset(&new_line, 0, sizeof(new_line));
 
-       switch(ifr->ifr_settings.type) {
+       switch (ifs->type) {
        case IF_GET_IFACE: /* return current sync_serial_settings */
 
-               ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
-               if (ifr->ifr_settings.size < size) {
-                       ifr->ifr_settings.size = size; /* data size wanted */
+               ifs->type = IF_IFACE_SYNC_SERIAL;
+               if (ifs->size < size) {
+                       ifs->size = size; /* data size wanted */
                        return -ENOBUFS;
                }
 
@@ -1615,7 +1612,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                return 0;
 
        default:
-               return hdlc_ioctl(dev, ifr, cmd);
+               return hdlc_ioctl(dev, ifs);
        }
 }
 
@@ -1688,7 +1685,7 @@ static const struct net_device_ops hdlcdev_ops = {
        .ndo_open       = hdlcdev_open,
        .ndo_stop       = hdlcdev_close,
        .ndo_start_xmit = hdlc_start_xmit,
-       .ndo_do_ioctl   = hdlcdev_ioctl,
+       .ndo_siocwandev = hdlcdev_ioctl,
        .ndo_tx_timeout = hdlcdev_tx_timeout,
 };
 
index 3cc12fc..5906cad 100644 (file)
@@ -572,7 +572,7 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
        MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
        MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
        MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
 
        err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
index 6414bd5..3a249ee 100644 (file)
@@ -643,8 +643,6 @@ static bool tx_can_batch(struct vhost_virtqueue *vq, size_t total_len)
               !vhost_vq_avail_empty(vq->dev, vq);
 }
 
-#define SKB_FRAG_PAGE_ORDER     get_order(32768)
-
 static bool vhost_net_page_frag_refill(struct vhost_net *net, unsigned int sz,
                                       struct page_frag *pfrag, gfp_t gfp)
 {
index 30f7b18..d46c020 100644 (file)
@@ -20,7 +20,18 @@ typedef u16 compat_ushort_t;
 typedef u32 compat_uint_t;
 typedef u32 compat_ulong_t;
 typedef u32 compat_uptr_t;
+typedef u32 compat_caddr_t;
 typedef u32 compat_aio_context_t;
+typedef u32 compat_old_sigset_t;
+
+#ifndef __compat_uid32_t
+typedef u32 __compat_uid32_t;
+typedef u32 __compat_gid32_t;
+#endif
+
+#ifndef compat_mode_t
+typedef u32 compat_mode_t;
+#endif
 
 #ifdef CONFIG_COMPAT_FOR_U64_ALIGNMENT
 typedef s64 __attribute__((aligned(4))) compat_s64;
@@ -30,4 +41,10 @@ typedef s64 compat_s64;
 typedef u64 compat_u64;
 #endif
 
+#ifndef _COMPAT_NSIG
+typedef u32 compat_sigset_word;
+#define _COMPAT_NSIG _NSIG
+#define _COMPAT_NSIG_BPW 32
+#endif
+
 #endif
index 26bf15e..5e62e23 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <asm/types.h>
 #include <linux/bits.h>
+#include <linux/typecheck.h>
 
 #include <uapi/linux/kernel.h>
 
@@ -253,6 +254,55 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
                __clear_bit(nr, addr);
 }
 
+/**
+ * __ptr_set_bit - Set bit in a pointer's value
+ * @nr: the bit to set
+ * @addr: the address of the pointer variable
+ *
+ * Example:
+ *     void *p = foo();
+ *     __ptr_set_bit(bit, &p);
+ */
+#define __ptr_set_bit(nr, addr)                         \
+       ({                                              \
+               typecheck_pointer(*(addr));             \
+               __set_bit(nr, (unsigned long *)(addr)); \
+       })
+
+/**
+ * __ptr_clear_bit - Clear bit in a pointer's value
+ * @nr: the bit to clear
+ * @addr: the address of the pointer variable
+ *
+ * Example:
+ *     void *p = foo();
+ *     __ptr_clear_bit(bit, &p);
+ */
+#define __ptr_clear_bit(nr, addr)                         \
+       ({                                                \
+               typecheck_pointer(*(addr));               \
+               __clear_bit(nr, (unsigned long *)(addr)); \
+       })
+
+/**
+ * __ptr_test_bit - Test bit in a pointer's value
+ * @nr: the bit to test
+ * @addr: the address of the pointer variable
+ *
+ * Example:
+ *     void *p = foo();
+ *     if (__ptr_test_bit(bit, &p)) {
+ *             ...
+ *     } else {
+ *             ...
+ *     }
+ */
+#define __ptr_test_bit(nr, addr)                       \
+       ({                                             \
+               typecheck_pointer(*(addr));            \
+               test_bit(nr, (unsigned long *)(addr)); \
+       })
+
 #ifdef __KERNEL__
 
 #ifndef set_mask_bits
index 6c9b10d..2746fd8 100644 (file)
@@ -23,22 +23,73 @@ struct ctl_table_header;
 struct task_struct;
 
 #ifdef CONFIG_CGROUP_BPF
+enum cgroup_bpf_attach_type {
+       CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
+       CGROUP_INET_INGRESS = 0,
+       CGROUP_INET_EGRESS,
+       CGROUP_INET_SOCK_CREATE,
+       CGROUP_SOCK_OPS,
+       CGROUP_DEVICE,
+       CGROUP_INET4_BIND,
+       CGROUP_INET6_BIND,
+       CGROUP_INET4_CONNECT,
+       CGROUP_INET6_CONNECT,
+       CGROUP_INET4_POST_BIND,
+       CGROUP_INET6_POST_BIND,
+       CGROUP_UDP4_SENDMSG,
+       CGROUP_UDP6_SENDMSG,
+       CGROUP_SYSCTL,
+       CGROUP_UDP4_RECVMSG,
+       CGROUP_UDP6_RECVMSG,
+       CGROUP_GETSOCKOPT,
+       CGROUP_SETSOCKOPT,
+       CGROUP_INET4_GETPEERNAME,
+       CGROUP_INET6_GETPEERNAME,
+       CGROUP_INET4_GETSOCKNAME,
+       CGROUP_INET6_GETSOCKNAME,
+       CGROUP_INET_SOCK_RELEASE,
+       MAX_CGROUP_BPF_ATTACH_TYPE
+};
 
-extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
-#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
+#define CGROUP_ATYPE(type) \
+       case BPF_##type: return type
 
-#define BPF_CGROUP_STORAGE_NEST_MAX    8
+static inline enum cgroup_bpf_attach_type
+to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
+{
+       switch (attach_type) {
+       CGROUP_ATYPE(CGROUP_INET_INGRESS);
+       CGROUP_ATYPE(CGROUP_INET_EGRESS);
+       CGROUP_ATYPE(CGROUP_INET_SOCK_CREATE);
+       CGROUP_ATYPE(CGROUP_SOCK_OPS);
+       CGROUP_ATYPE(CGROUP_DEVICE);
+       CGROUP_ATYPE(CGROUP_INET4_BIND);
+       CGROUP_ATYPE(CGROUP_INET6_BIND);
+       CGROUP_ATYPE(CGROUP_INET4_CONNECT);
+       CGROUP_ATYPE(CGROUP_INET6_CONNECT);
+       CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
+       CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
+       CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
+       CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
+       CGROUP_ATYPE(CGROUP_SYSCTL);
+       CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
+       CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
+       CGROUP_ATYPE(CGROUP_GETSOCKOPT);
+       CGROUP_ATYPE(CGROUP_SETSOCKOPT);
+       CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
+       CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
+       CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
+       CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
+       CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
+       default:
+               return CGROUP_BPF_ATTACH_TYPE_INVALID;
+       }
+}
 
-struct bpf_cgroup_storage_info {
-       struct task_struct *task;
-       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
-};
+#undef CGROUP_ATYPE
 
-/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
- * to use bpf cgroup storage simultaneously.
- */
-DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
-               bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
+extern struct static_key_false cgroup_bpf_enabled_key[MAX_CGROUP_BPF_ATTACH_TYPE];
+#define cgroup_bpf_enabled(atype) static_branch_unlikely(&cgroup_bpf_enabled_key[atype])
 
 #define for_each_cgroup_storage_type(stype) \
        for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
@@ -80,15 +131,15 @@ struct bpf_prog_array;
 
 struct cgroup_bpf {
        /* array of effective progs in this cgroup */
-       struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE];
+       struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE];
 
        /* attached progs to this cgroup and attach flags
         * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
         * have either zero or one element
         * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
         */
-       struct list_head progs[MAX_BPF_ATTACH_TYPE];
-       u32 flags[MAX_BPF_ATTACH_TYPE];
+       struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
+       u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
 
        /* list of cgroup shared storages */
        struct list_head storages;
@@ -128,28 +179,28 @@ int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 
 int __cgroup_bpf_run_filter_skb(struct sock *sk,
                                struct sk_buff *skb,
-                               enum bpf_attach_type type);
+                               enum cgroup_bpf_attach_type atype);
 
 int __cgroup_bpf_run_filter_sk(struct sock *sk,
-                              enum bpf_attach_type type);
+                              enum cgroup_bpf_attach_type atype);
 
 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
                                      struct sockaddr *uaddr,
-                                     enum bpf_attach_type type,
+                                     enum cgroup_bpf_attach_type atype,
                                      void *t_ctx,
                                      u32 *flags);
 
 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
                                     struct bpf_sock_ops_kern *sock_ops,
-                                    enum bpf_attach_type type);
+                                    enum cgroup_bpf_attach_type atype);
 
 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
-                                     short access, enum bpf_attach_type type);
+                                     short access, enum cgroup_bpf_attach_type atype);
 
 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
                                   struct ctl_table *table, int write,
                                   char **buf, size_t *pcount, loff_t *ppos,
-                                  enum bpf_attach_type type);
+                                  enum cgroup_bpf_attach_type atype);
 
 int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
                                       int *optname, char __user *optval,
@@ -172,44 +223,6 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
        return BPF_CGROUP_STORAGE_SHARED;
 }
 
-static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
-                                        *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
-{
-       enum bpf_cgroup_storage_type stype;
-       int i, err = 0;
-
-       preempt_disable();
-       for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
-               if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
-                       continue;
-
-               this_cpu_write(bpf_cgroup_storage_info[i].task, current);
-               for_each_cgroup_storage_type(stype)
-                       this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
-                                      storage[stype]);
-               goto out;
-       }
-       err = -EBUSY;
-       WARN_ON_ONCE(1);
-
-out:
-       preempt_enable();
-       return err;
-}
-
-static inline void bpf_cgroup_storage_unset(void)
-{
-       int i;
-
-       for (i = BPF_CGROUP_STORAGE_NEST_MAX - 1; i >= 0; i--) {
-               if (likely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
-                       continue;
-
-               this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
-               return;
-       }
-}
-
 struct bpf_cgroup_storage *
 cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
                      void *key, bool locked);
@@ -230,9 +243,9 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)                            \
 ({                                                                           \
        int __ret = 0;                                                        \
-       if (cgroup_bpf_enabled(BPF_CGROUP_INET_INGRESS))                      \
+       if (cgroup_bpf_enabled(CGROUP_INET_INGRESS))                  \
                __ret = __cgroup_bpf_run_filter_skb(sk, skb,                  \
-                                                   BPF_CGROUP_INET_INGRESS); \
+                                                   CGROUP_INET_INGRESS); \
                                                                              \
        __ret;                                                                \
 })
@@ -240,54 +253,54 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb)                              \
 ({                                                                            \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled(BPF_CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
+       if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
                typeof(sk) __sk = sk_to_full_sk(sk);                           \
                if (sk_fullsock(__sk))                                         \
                        __ret = __cgroup_bpf_run_filter_skb(__sk, skb,         \
-                                                     BPF_CGROUP_INET_EGRESS); \
+                                                     CGROUP_INET_EGRESS); \
        }                                                                      \
        __ret;                                                                 \
 })
 
-#define BPF_CGROUP_RUN_SK_PROG(sk, type)                                      \
+#define BPF_CGROUP_RUN_SK_PROG(sk, atype)                                     \
 ({                                                                            \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled(type)) {                                        \
-               __ret = __cgroup_bpf_run_filter_sk(sk, type);                  \
+       if (cgroup_bpf_enabled(atype)) {                                               \
+               __ret = __cgroup_bpf_run_filter_sk(sk, atype);                 \
        }                                                                      \
        __ret;                                                                 \
 })
 
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)                                     \
-       BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
+       BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET_SOCK_CREATE)
 
 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk)                             \
-       BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_RELEASE)
+       BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET_SOCK_RELEASE)
 
 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk)                                       \
-       BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
+       BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET4_POST_BIND)
 
 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk)                                       \
-       BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
+       BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND)
 
-#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type)                                       \
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype)                                      \
 ({                                                                            \
        u32 __unused_flags;                                                    \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled(type))                                          \
-               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
+       if (cgroup_bpf_enabled(atype))                                         \
+               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype,     \
                                                          NULL,                \
                                                          &__unused_flags);    \
        __ret;                                                                 \
 })
 
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx)                   \
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx)                  \
 ({                                                                            \
        u32 __unused_flags;                                                    \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled(type))   {                                      \
+       if (cgroup_bpf_enabled(atype))  {                                      \
                lock_sock(sk);                                                 \
-               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
+               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype,     \
                                                          t_ctx,               \
                                                          &__unused_flags);    \
                release_sock(sk);                                              \
@@ -300,13 +313,13 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
  * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
  * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
  */
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags)               \
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags)              \
 ({                                                                            \
        u32 __flags = 0;                                                       \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled(type))   {                                      \
+       if (cgroup_bpf_enabled(atype))  {                                      \
                lock_sock(sk);                                                 \
-               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
+               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype,     \
                                                          NULL, &__flags);     \
                release_sock(sk);                                              \
                if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE)            \
@@ -316,33 +329,33 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 })
 
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk)                                    \
-       ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) ||                      \
-         cgroup_bpf_enabled(BPF_CGROUP_INET6_CONNECT)) &&                     \
+       ((cgroup_bpf_enabled(CGROUP_INET4_CONNECT) ||                  \
+         cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) &&                 \
         (sk)->sk_prot->pre_connect)
 
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr)                          \
-       BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
+       BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT)
 
 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr)                          \
-       BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
+       BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT)
 
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr)                     \
-       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL)
+       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL)
 
 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr)                     \
-       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL)
+       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL)
 
 #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx)                       \
-       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx)
+       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx)
 
 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx)                       \
-       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)
+       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx)
 
 #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr)                       \
-       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_RECVMSG, NULL)
+       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL)
 
 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr)                       \
-       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
+       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL)
 
 /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
  * fullsock and its parent fullsock cannot be traced by
@@ -362,33 +375,33 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk)                  \
 ({                                                                     \
        int __ret = 0;                                                  \
-       if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS))                    \
+       if (cgroup_bpf_enabled(CGROUP_SOCK_OPS))                        \
                __ret = __cgroup_bpf_run_filter_sock_ops(sk,            \
                                                         sock_ops,      \
-                                                        BPF_CGROUP_SOCK_OPS); \
+                                                        CGROUP_SOCK_OPS); \
        __ret;                                                          \
 })
 
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops)                                \
 ({                                                                            \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS) && (sock_ops)->sk) {       \
+       if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) && (sock_ops)->sk) {       \
                typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk);               \
                if (__sk && sk_fullsock(__sk))                                 \
                        __ret = __cgroup_bpf_run_filter_sock_ops(__sk,         \
                                                                 sock_ops,     \
-                                                        BPF_CGROUP_SOCK_OPS); \
+                                                        CGROUP_SOCK_OPS); \
        }                                                                      \
        __ret;                                                                 \
 })
 
-#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access)        \
+#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access)       \
 ({                                                                           \
        int __ret = 0;                                                        \
-       if (cgroup_bpf_enabled(BPF_CGROUP_DEVICE))                            \
-               __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
+       if (cgroup_bpf_enabled(CGROUP_DEVICE))                        \
+               __ret = __cgroup_bpf_check_dev_permission(atype, major, minor, \
                                                          access,             \
-                                                         BPF_CGROUP_DEVICE); \
+                                                         CGROUP_DEVICE); \
                                                                              \
        __ret;                                                                \
 })
@@ -397,10 +410,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 #define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos)  \
 ({                                                                            \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled(BPF_CGROUP_SYSCTL))                             \
+       if (cgroup_bpf_enabled(CGROUP_SYSCTL))                         \
                __ret = __cgroup_bpf_run_filter_sysctl(head, table, write,     \
                                                       buf, count, pos,        \
-                                                      BPF_CGROUP_SYSCTL);     \
+                                                      CGROUP_SYSCTL);     \
        __ret;                                                                 \
 })
 
@@ -408,7 +421,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
                                       kernel_optval)                          \
 ({                                                                            \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled(BPF_CGROUP_SETSOCKOPT))                         \
+       if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT))                             \
                __ret = __cgroup_bpf_run_filter_setsockopt(sock, level,        \
                                                           optname, optval,    \
                                                           optlen,             \
@@ -419,7 +432,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen)                              \
 ({                                                                            \
        int __ret = 0;                                                         \
-       if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT))                         \
+       if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT))                             \
                get_user(__ret, optlen);                                       \
        __ret;                                                                 \
 })
@@ -428,7 +441,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
                                       max_optlen, retval)                     \
 ({                                                                            \
        int __ret = retval;                                                    \
-       if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT))                         \
+       if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT))                             \
                if (!(sock)->sk_prot->bpf_bypass_getsockopt ||                 \
                    !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
                                        tcp_bpf_bypass_getsockopt,             \
@@ -443,7 +456,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
                                            optlen, retval)                    \
 ({                                                                            \
        int __ret = retval;                                                    \
-       if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT))                         \
+       if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT))                             \
                __ret = __cgroup_bpf_run_filter_getsockopt_kern(               \
                        sock, level, optname, optval, optlen, retval);         \
        __ret;                                                                 \
@@ -487,9 +500,6 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
        return -EINVAL;
 }
 
-static inline int bpf_cgroup_storage_set(
-       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
-static inline void bpf_cgroup_storage_unset(void) {}
 static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
                                            struct bpf_map *map) { return 0; }
 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
@@ -505,14 +515,14 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
        return 0;
 }
 
-#define cgroup_bpf_enabled(type) (0)
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; })
+#define cgroup_bpf_enabled(atype) (0)
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
@@ -524,7 +534,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
 #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
index e8e2b03..f4c16f1 100644 (file)
@@ -168,6 +168,7 @@ struct bpf_map {
        u32 max_entries;
        u32 map_flags;
        int spin_lock_off; /* >=0 valid offset, <0 error */
+       int timer_off; /* >=0 valid offset, <0 error */
        u32 id;
        int numa_node;
        u32 btf_key_type_id;
@@ -197,30 +198,53 @@ static inline bool map_value_has_spin_lock(const struct bpf_map *map)
        return map->spin_lock_off >= 0;
 }
 
-static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
+static inline bool map_value_has_timer(const struct bpf_map *map)
 {
-       if (likely(!map_value_has_spin_lock(map)))
-               return;
-       *(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
-               (struct bpf_spin_lock){};
+       return map->timer_off >= 0;
 }
 
-/* copy everything but bpf_spin_lock */
+static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
+{
+       if (unlikely(map_value_has_spin_lock(map)))
+               *(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
+                       (struct bpf_spin_lock){};
+       if (unlikely(map_value_has_timer(map)))
+               *(struct bpf_timer *)(dst + map->timer_off) =
+                       (struct bpf_timer){};
+}
+
+/* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
 static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
 {
+       u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;
+
        if (unlikely(map_value_has_spin_lock(map))) {
-               u32 off = map->spin_lock_off;
+               s_off = map->spin_lock_off;
+               s_sz = sizeof(struct bpf_spin_lock);
+       } else if (unlikely(map_value_has_timer(map))) {
+               t_off = map->timer_off;
+               t_sz = sizeof(struct bpf_timer);
+       }
 
-               memcpy(dst, src, off);
-               memcpy(dst + off + sizeof(struct bpf_spin_lock),
-                      src + off + sizeof(struct bpf_spin_lock),
-                      map->value_size - off - sizeof(struct bpf_spin_lock));
+       if (unlikely(s_sz || t_sz)) {
+               if (s_off < t_off || !s_sz) {
+                       swap(s_off, t_off);
+                       swap(s_sz, t_sz);
+               }
+               memcpy(dst, src, t_off);
+               memcpy(dst + t_off + t_sz,
+                      src + t_off + t_sz,
+                      s_off - t_off - t_sz);
+               memcpy(dst + s_off + s_sz,
+                      src + s_off + s_sz,
+                      map->value_size - s_off - s_sz);
        } else {
                memcpy(dst, src, map->value_size);
        }
 }
 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
                           bool lock_src);
+void bpf_timer_cancel_and_free(void *timer);
 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
 
 struct bpf_offload_dev;
@@ -314,6 +338,7 @@ enum bpf_arg_type {
        ARG_PTR_TO_FUNC,        /* pointer to a bpf program function */
        ARG_PTR_TO_STACK_OR_NULL,       /* pointer to stack or NULL */
        ARG_PTR_TO_CONST_STR,   /* pointer to a null terminated read-only string */
+       ARG_PTR_TO_TIMER,       /* pointer to bpf_timer */
        __BPF_ARG_TYPE_MAX,
 };
 
@@ -554,6 +579,11 @@ struct btf_func_model {
  */
 #define BPF_TRAMP_F_SKIP_FRAME         BIT(2)
 
+/* Store IP address of the caller on the trampoline stack,
+ * so it's available for trampoline's programs.
+ */
+#define BPF_TRAMP_F_IP_ARG             BIT(3)
+
 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
  * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
  */
@@ -1073,7 +1103,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 /* an array of programs to be executed under rcu_lock.
  *
  * Typical usage:
- * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
+ * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run);
  *
  * the structure returned by bpf_prog_array_alloc() should be populated
  * with program pointers and the last pointer must be NULL.
@@ -1084,7 +1114,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
  */
 struct bpf_prog_array_item {
        struct bpf_prog *prog;
-       struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+       union {
+               struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+               u64 bpf_cookie;
+       };
 };
 
 struct bpf_prog_array {
@@ -1110,73 +1143,133 @@ int bpf_prog_array_copy_info(struct bpf_prog_array *array,
 int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                        struct bpf_prog *exclude_prog,
                        struct bpf_prog *include_prog,
+                       u64 bpf_cookie,
                        struct bpf_prog_array **new_array);
 
+struct bpf_run_ctx {};
+
+struct bpf_cg_run_ctx {
+       struct bpf_run_ctx run_ctx;
+       const struct bpf_prog_array_item *prog_item;
+};
+
+struct bpf_trace_run_ctx {
+       struct bpf_run_ctx run_ctx;
+       u64 bpf_cookie;
+};
+
+static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
+{
+       struct bpf_run_ctx *old_ctx = NULL;
+
+#ifdef CONFIG_BPF_SYSCALL
+       old_ctx = current->bpf_ctx;
+       current->bpf_ctx = new_ctx;
+#endif
+       return old_ctx;
+}
+
+static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
+{
+#ifdef CONFIG_BPF_SYSCALL
+       current->bpf_ctx = old_ctx;
+#endif
+}
+
 /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
 #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE                   (1 << 0)
 /* BPF program asks to set CN on the packet. */
 #define BPF_RET_SET_CN                                         (1 << 0)
 
-/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
- * if bpf_cgroup_storage_set() failed, the rest of programs
- * will not execute. This should be a really rare scenario
- * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
- * preemptions all between bpf_cgroup_storage_set() and
- * bpf_cgroup_storage_unset() on the same cpu.
- */
-#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)          \
-       ({                                                              \
-               struct bpf_prog_array_item *_item;                      \
-               struct bpf_prog *_prog;                                 \
-               struct bpf_prog_array *_array;                          \
-               u32 _ret = 1;                                           \
-               u32 func_ret;                                           \
-               migrate_disable();                                      \
-               rcu_read_lock();                                        \
-               _array = rcu_dereference(array);                        \
-               _item = &_array->items[0];                              \
-               while ((_prog = READ_ONCE(_item->prog))) {              \
-                       if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))    \
-                               break;                                  \
-                       func_ret = func(_prog, ctx);                    \
-                       _ret &= (func_ret & 1);                         \
-                       *(ret_flags) |= (func_ret >> 1);                        \
-                       bpf_cgroup_storage_unset();                     \
-                       _item++;                                        \
-               }                                                       \
-               rcu_read_unlock();                                      \
-               migrate_enable();                                       \
-               _ret;                                                   \
-        })
-
-#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \
-       ({                                              \
-               struct bpf_prog_array_item *_item;      \
-               struct bpf_prog *_prog;                 \
-               struct bpf_prog_array *_array;          \
-               u32 _ret = 1;                           \
-               migrate_disable();                      \
-               rcu_read_lock();                        \
-               _array = rcu_dereference(array);        \
-               if (unlikely(check_non_null && !_array))\
-                       goto _out;                      \
-               _item = &_array->items[0];              \
-               while ((_prog = READ_ONCE(_item->prog))) {              \
-                       if (!set_cg_storage) {                  \
-                               _ret &= func(_prog, ctx);       \
-                       } else {                                \
-                               if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))    \
-                                       break;                  \
-                               _ret &= func(_prog, ctx);       \
-                               bpf_cgroup_storage_unset();     \
-                       }                               \
-                       _item++;                        \
-               }                                       \
-_out:                                                  \
-               rcu_read_unlock();                      \
-               migrate_enable();                       \
-               _ret;                                   \
-        })
+typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
+                           const void *ctx, bpf_prog_run_fn run_prog,
+                           u32 *ret_flags)
+{
+       const struct bpf_prog_array_item *item;
+       const struct bpf_prog *prog;
+       const struct bpf_prog_array *array;
+       struct bpf_run_ctx *old_run_ctx;
+       struct bpf_cg_run_ctx run_ctx;
+       u32 ret = 1;
+       u32 func_ret;
+
+       migrate_disable();
+       rcu_read_lock();
+       array = rcu_dereference(array_rcu);
+       item = &array->items[0];
+       old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+       while ((prog = READ_ONCE(item->prog))) {
+               run_ctx.prog_item = item;
+               func_ret = run_prog(prog, ctx);
+               ret &= (func_ret & 1);
+               *(ret_flags) |= (func_ret >> 1);
+               item++;
+       }
+       bpf_reset_run_ctx(old_run_ctx);
+       rcu_read_unlock();
+       migrate_enable();
+       return ret;
+}
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
+                     const void *ctx, bpf_prog_run_fn run_prog)
+{
+       const struct bpf_prog_array_item *item;
+       const struct bpf_prog *prog;
+       const struct bpf_prog_array *array;
+       struct bpf_run_ctx *old_run_ctx;
+       struct bpf_cg_run_ctx run_ctx;
+       u32 ret = 1;
+
+       migrate_disable();
+       rcu_read_lock();
+       array = rcu_dereference(array_rcu);
+       item = &array->items[0];
+       old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+       while ((prog = READ_ONCE(item->prog))) {
+               run_ctx.prog_item = item;
+               ret &= run_prog(prog, ctx);
+               item++;
+       }
+       bpf_reset_run_ctx(old_run_ctx);
+       rcu_read_unlock();
+       migrate_enable();
+       return ret;
+}
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
+                  const void *ctx, bpf_prog_run_fn run_prog)
+{
+       const struct bpf_prog_array_item *item;
+       const struct bpf_prog *prog;
+       const struct bpf_prog_array *array;
+       struct bpf_run_ctx *old_run_ctx;
+       struct bpf_trace_run_ctx run_ctx;
+       u32 ret = 1;
+
+       migrate_disable();
+       rcu_read_lock();
+       array = rcu_dereference(array_rcu);
+       if (unlikely(!array))
+               goto out;
+       old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+       item = &array->items[0];
+       while ((prog = READ_ONCE(item->prog))) {
+               run_ctx.bpf_cookie = item->bpf_cookie;
+               ret &= run_prog(prog, ctx);
+               item++;
+       }
+       bpf_reset_run_ctx(old_run_ctx);
+out:
+       rcu_read_unlock();
+       migrate_enable();
+       return ret;
+}
 
 /* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
  * so BPF programs can request cwr for TCP packets.
@@ -1205,7 +1298,7 @@ _out:                                                     \
                u32 _flags = 0;                         \
                bool _cn;                               \
                u32 _ret;                               \
-               _ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \
+               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
                _cn = _flags & BPF_RET_SET_CN;          \
                if (_ret)                               \
                        _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);  \
@@ -1214,12 +1307,6 @@ _out:                                                    \
                _ret;                                   \
        })
 
-#define BPF_PROG_RUN_ARRAY(array, ctx, func)           \
-       __BPF_PROG_RUN_ARRAY(array, ctx, func, false, true)
-
-#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func)     \
-       __BPF_PROG_RUN_ARRAY(array, ctx, func, true, false)
-
 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);
 extern struct mutex bpf_stats_enabled_mutex;
@@ -1398,6 +1485,9 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
                                        struct seq_file *seq);
 typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
                                         struct bpf_link_info *info);
+typedef const struct bpf_func_proto *
+(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id,
+                            const struct bpf_prog *prog);
 
 enum bpf_iter_feature {
        BPF_ITER_RESCHED        = BIT(0),
@@ -1410,6 +1500,7 @@ struct bpf_iter_reg {
        bpf_iter_detach_target_t detach_target;
        bpf_iter_show_fdinfo_t show_fdinfo;
        bpf_iter_fill_link_info_t fill_link_info;
+       bpf_iter_get_func_proto_t get_func_proto;
        u32 ctx_arg_info_size;
        u32 feature;
        struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
@@ -1432,6 +1523,8 @@ struct bpf_iter__bpf_map_elem {
 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
+const struct bpf_func_proto *
+bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
 int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
 int bpf_iter_new_fd(struct bpf_link *link);
 bool bpf_link_is_iter(struct bpf_link *link);
@@ -1509,12 +1602,12 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
                           struct bpf_prog *xdp_prog, struct bpf_map *map,
                           bool exclude_ingress);
-bool dev_map_can_have_prog(struct bpf_map *map);
 
 void __cpu_map_flush(void);
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
                    struct net_device *dev_rx);
-bool cpu_map_prog_allowed(struct bpf_map *map);
+int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
+                            struct sk_buff *skb);
 
 /* Return map's numa specified by userspace */
 static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
@@ -1711,6 +1804,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
        return 0;
 }
 
+static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
+                                          struct sk_buff *skb)
+{
+       return -EOPNOTSUPP;
+}
+
 static inline bool cpu_map_prog_allowed(struct bpf_map *map)
 {
        return false;
@@ -1852,6 +1951,12 @@ void bpf_map_offload_map_free(struct bpf_map *map);
 int bpf_prog_test_run_syscall(struct bpf_prog *prog,
                              const union bpf_attr *kattr,
                              union bpf_attr __user *uattr);
+
+int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
+int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
+int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+void sock_map_unhash(struct sock *sk);
+void sock_map_close(struct sock *sk, long timeout);
 #else
 static inline int bpf_prog_offload_init(struct bpf_prog *prog,
                                        union bpf_attr *attr)
@@ -1884,24 +1989,6 @@ static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
 {
        return -ENOTSUPP;
 }
-#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
-
-#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
-int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
-int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
-int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
-void sock_map_unhash(struct sock *sk);
-void sock_map_close(struct sock *sk, long timeout);
-
-void bpf_sk_reuseport_detach(struct sock *sk);
-int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
-                                      void *value);
-int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
-                                      void *value, u64 map_flags);
-#else
-static inline void bpf_sk_reuseport_detach(struct sock *sk)
-{
-}
 
 #ifdef CONFIG_BPF_SYSCALL
 static inline int sock_map_get_from_fd(const union bpf_attr *attr,
@@ -1921,7 +2008,21 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
 {
        return -EOPNOTSUPP;
 }
+#endif /* CONFIG_BPF_SYSCALL */
+#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
+#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
+void bpf_sk_reuseport_detach(struct sock *sk);
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+                                      void *value);
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+                                      void *value, u64 map_flags);
+#else
+static inline void bpf_sk_reuseport_detach(struct sock *sk)
+{
+}
+
+#ifdef CONFIG_BPF_SYSCALL
 static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
                                                     void *key, void *value)
 {
@@ -1998,9 +2099,8 @@ extern const struct bpf_func_proto bpf_task_storage_get_proto;
 extern const struct bpf_func_proto bpf_task_storage_delete_proto;
 extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
 extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
-
-const struct bpf_func_proto *bpf_tracing_func_proto(
-       enum bpf_func_id func_id, const struct bpf_prog *prog);
+extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
+extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
index ae3ac3a..9c81724 100644 (file)
@@ -136,3 +136,6 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
 BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
 BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
 #endif
+#ifdef CONFIG_PERF_EVENTS
+BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf)
+#endif
index 828d08a..5424124 100644 (file)
@@ -53,7 +53,14 @@ struct bpf_reg_state {
                /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
                 *   PTR_TO_MAP_VALUE_OR_NULL
                 */
-               struct bpf_map *map_ptr;
+               struct {
+                       struct bpf_map *map_ptr;
+                       /* To distinguish map lookups from outer map
+                        * the map_uid is non-zero for registers
+                        * pointing to inner maps.
+                        */
+                       u32 map_uid;
+               };
 
                /* for PTR_TO_BTF_ID */
                struct {
@@ -201,12 +208,19 @@ struct bpf_func_state {
         * zero == main subprog
         */
        u32 subprogno;
+       /* Every bpf_timer_start will increment async_entry_cnt.
+        * It's used to distinguish:
+        * void foo(void) { for(;;); }
+        * void foo(void) { bpf_timer_set_callback(,foo); }
+        */
+       u32 async_entry_cnt;
+       bool in_callback_fn;
+       bool in_async_callback_fn;
 
        /* The following fields should be last. See copy_func_state() */
        int acquired_refs;
        struct bpf_reference_state *refs;
        int allocated_stack;
-       bool in_callback_fn;
        struct bpf_stack_state *stack;
 };
 
@@ -392,6 +406,7 @@ struct bpf_subprog_info {
        bool has_tail_call;
        bool tail_call_reachable;
        bool has_ld_abs;
+       bool is_async_cb;
 };
 
 /* single container for all structs
index 5cdeab4..546e27f 100644 (file)
@@ -62,9 +62,17 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,
        return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size);
 }
 
-static inline void *memdup_bpfptr(bpfptr_t src, size_t len)
+static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len)
 {
-       return memdup_sockptr((sockptr_t) src, len);
+       void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN);
+
+       if (!p)
+               return ERR_PTR(-ENOMEM);
+       if (copy_from_bpfptr(p, src, len)) {
+               kvfree(p);
+               return ERR_PTR(-EFAULT);
+       }
+       return p;
 }
 
 static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count)
index 94a0c97..214fde9 100644 (file)
@@ -99,6 +99,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
                           const struct btf_member *m,
                           u32 expected_offset, u32 expected_size);
 int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
+int btf_find_timer(const struct btf *btf, const struct btf_type *t);
 bool btf_type_is_void(const struct btf_type *t);
 s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
 const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
index 57890b3..47d9abf 100644 (file)
@@ -82,6 +82,9 @@ __BTF_ID_LIST(name, globl)
 #define BTF_ID_LIST_SINGLE(name, prefix, typename)     \
        BTF_ID_LIST(name) \
        BTF_ID(prefix, typename)
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \
+       BTF_ID_LIST_GLOBAL(name) \
+       BTF_ID(prefix, typename)
 
 /*
  * The BTF_ID_UNUSED macro defines 4 zero bytes.
@@ -148,6 +151,7 @@ extern struct btf_id_set name;
 #define BTF_ID_UNUSED
 #define BTF_ID_LIST_GLOBAL(name) u32 name[1];
 #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
 #define BTF_SET_START(name) static struct btf_id_set name = { 0 };
 #define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
 #define BTF_SET_END(name)
@@ -172,7 +176,8 @@ extern struct btf_id_set name;
        BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)          \
        BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)                    \
        BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)                      \
-       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)                    \
+       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)
 
 enum {
 #define BTF_SOCK_TYPE(name, str) name,
@@ -184,4 +189,6 @@ MAX_BTF_SOCK_TYPE,
 extern u32 btf_sock_ids[];
 #endif
 
+extern u32 btf_task_struct_ids[];
+
 #endif
index ae7a341..9de6e90 100644 (file)
@@ -37,7 +37,7 @@
  *     quanta, from when the bit is sent on the TX pin to when it is
  *     received on the RX pin of the transmitter. Possible options:
  *
- *       O: automatic mode. The controller dynamically measure @tdcv
+ *       0: automatic mode. The controller dynamically measures @tdcv
  *       for each transmitted CAN FD frame.
  *
  *       Other values: manual mode. Use the fixed provided value.
@@ -45,7 +45,7 @@
  * @tdco: Transmitter Delay Compensation Offset. Offset value, in time
  *     quanta, defining the distance between the start of the bit
  *     reception on the RX pin of the transceiver and the SSP
- *     position such as SSP = @tdcv + @tdco.
+ *     position such that SSP = @tdcv + @tdco.
  *
  *     If @tdco is zero, then TDC is disabled and both @tdcv and
  *     @tdcf should be ignored.
index 27b275e..2413253 100644 (file)
@@ -32,6 +32,12 @@ enum can_mode {
        CAN_MODE_SLEEP
 };
 
+enum can_termination_gpio {
+       CAN_TERMINATION_GPIO_DISABLED = 0,
+       CAN_TERMINATION_GPIO_ENABLED,
+       CAN_TERMINATION_GPIO_MAX,
+};
+
 /*
  * CAN common private data
  */
@@ -55,6 +61,8 @@ struct can_priv {
        unsigned int termination_const_cnt;
        const u16 *termination_const;
        u16 termination;
+       struct gpio_desc *termination_gpio;
+       u16 termination_gpio_ohms[CAN_TERMINATION_GPIO_MAX];
 
        enum can_state state;
 
diff --git a/include/linux/can/platform/flexcan.h b/include/linux/can/platform/flexcan.h
new file mode 100644 (file)
index 0000000..1b536fb
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021  Angelo Dureghello <angelo@kernel-space.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CAN_PLATFORM_FLEXCAN_H
+#define _CAN_PLATFORM_FLEXCAN_H
+
+struct flexcan_platform_data {
+       u32 clock_frequency;
+       u8 clk_src;
+};
+
+#endif /* _CAN_PLATFORM_FLEXCAN_H */
index 40882df..c114776 100644 (file)
@@ -20,6 +20,7 @@ struct can_rx_offload {
                                        bool drop);
 
        struct sk_buff_head skb_queue;
+       struct sk_buff_head skb_irq_queue;
        u32 skb_queue_len_max;
 
        unsigned int mb_first;
@@ -48,14 +49,11 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
                                         unsigned int *frame_len_ptr);
 int can_rx_offload_queue_tail(struct can_rx_offload *offload,
                              struct sk_buff *skb);
+void can_rx_offload_irq_finish(struct can_rx_offload *offload);
+void can_rx_offload_threaded_irq_finish(struct can_rx_offload *offload);
 void can_rx_offload_del(struct can_rx_offload *offload);
 void can_rx_offload_enable(struct can_rx_offload *offload);
 
-static inline void can_rx_offload_schedule(struct can_rx_offload *offload)
-{
-       napi_schedule(&offload->napi);
-}
-
 static inline void can_rx_offload_disable(struct can_rx_offload *offload)
 {
        napi_disable(&offload->napi);
index c270124..8e0598c 100644 (file)
 #include <linux/unistd.h>
 
 #include <asm/compat.h>
-
-#ifdef CONFIG_COMPAT
 #include <asm/siginfo.h>
 #include <asm/signal.h>
-#endif
 
 #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
 /*
@@ -95,8 +92,6 @@ struct compat_iovec {
        compat_size_t   iov_len;
 };
 
-#ifdef CONFIG_COMPAT
-
 #ifndef compat_user_stack_pointer
 #define compat_user_stack_pointer() current_user_stack_pointer()
 #endif
@@ -131,9 +126,11 @@ struct compat_tms {
 
 #define _COMPAT_NSIG_WORDS     (_COMPAT_NSIG / _COMPAT_NSIG_BPW)
 
+#ifndef compat_sigset_t
 typedef struct {
        compat_sigset_word      sig[_COMPAT_NSIG_WORDS];
 } compat_sigset_t;
+#endif
 
 int set_compat_user_sigmask(const compat_sigset_t __user *umask,
                            size_t sigsetsize);
@@ -384,6 +381,7 @@ struct compat_keyctl_kdf_params {
        __u32 __spare[8];
 };
 
+struct compat_stat;
 struct compat_statfs;
 struct compat_statfs64;
 struct compat_old_linux_dirent;
@@ -428,7 +426,7 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
                  unsigned int size)
 {
        /* size <= sizeof(compat_sigset_t) <= sizeof(sigset_t) */
-#ifdef __BIG_ENDIAN
+#if defined(__BIG_ENDIAN) && defined(CONFIG_64BIT)
        compat_sigset_t v;
        switch (_NSIG_WORDS) {
        case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3];
@@ -929,17 +927,6 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
 
 #endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
 
-
-/*
- * For most but not all architectures, "am I in a compat syscall?" and
- * "am I a compat task?" are the same question.  For architectures on which
- * they aren't the same question, arch code can override in_compat_syscall.
- */
-
-#ifndef in_compat_syscall
-static inline bool in_compat_syscall(void) { return is_compat_task(); }
-#endif
-
 /**
  * ns_to_old_timeval32 - Compat version of ns_to_timeval
  * @nsec:      the nanoseconds value to be converted
@@ -969,6 +956,17 @@ int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz,
 int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
                          struct compat_statfs64 __user * buf);
 
+#ifdef CONFIG_COMPAT
+
+/*
+ * For most but not all architectures, "am I in a compat syscall?" and
+ * "am I a compat task?" are the same question.  For architectures on which
+ * they aren't the same question, arch code can override in_compat_syscall.
+ */
+#ifndef in_compat_syscall
+static inline bool in_compat_syscall(void) { return is_compat_task(); }
+#endif
+
 #else /* !CONFIG_COMPAT */
 
 #define is_compat_task() (0)
index 1587961..c7fa4a3 100644 (file)
 struct dsa_switch;
 struct sk_buff;
 struct net_device;
-struct packet_type;
-struct dsa_8021q_context;
 
-struct dsa_8021q_crosschip_link {
+struct dsa_tag_8021q_vlan {
        struct list_head list;
        int port;
-       struct dsa_8021q_context *other_ctx;
-       int other_port;
+       u16 vid;
        refcount_t refcount;
 };
 
-struct dsa_8021q_ops {
-       int (*vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags);
-       int (*vlan_del)(struct dsa_switch *ds, int port, u16 vid);
-};
-
 struct dsa_8021q_context {
-       const struct dsa_8021q_ops *ops;
        struct dsa_switch *ds;
-       struct list_head crosschip_links;
+       struct list_head vlans;
        /* EtherType of RX VID, used for filtering on master interface */
        __be16 proto;
 };
 
-#define DSA_8021Q_N_SUBVLAN                    8
-
-int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled);
+int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto);
 
-int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port,
-                                   struct dsa_8021q_context *other_ctx,
-                                   int other_port);
-
-int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port,
-                                    struct dsa_8021q_context *other_ctx,
-                                    int other_port);
+void dsa_tag_8021q_unregister(struct dsa_switch *ds);
 
 struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
                               u16 tpid, u16 tci);
 
-void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
-                  int *subvlan);
+void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id);
+
+int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port,
+                                       struct net_device *br,
+                                       int bridge_num);
+
+void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
+                                          struct net_device *br,
+                                          int bridge_num);
+
+u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num);
 
 u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port);
 
 u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port);
 
-u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan);
-
 int dsa_8021q_rx_switch_id(u16 vid);
 
 int dsa_8021q_rx_source_port(u16 vid);
 
-u16 dsa_8021q_rx_subvlan(u16 vid);
-
 bool vid_is_dsa_8021q_rxvlan(u16 vid);
 
 bool vid_is_dsa_8021q_txvlan(u16 vid);
index b6089b8..1711062 100644 (file)
@@ -16,6 +16,8 @@
 #define ETH_P_SJA1105_META                     0x0008
 #define ETH_P_SJA1110                          0xdadc
 
+#define SJA1105_DEFAULT_VLAN                   (VLAN_N_VID - 1)
+
 /* IEEE 802.3 Annex 57A: Slow Protocols PDUs (01:80:C2:xx:xx:xx) */
 #define SJA1105_LINKLOCAL_FILTER_A             0x0180C2000000ull
 #define SJA1105_LINKLOCAL_FILTER_A_MASK                0xFFFFFF000000ull
@@ -59,14 +61,12 @@ struct sja1105_skb_cb {
        ((struct sja1105_skb_cb *)((skb)->cb))
 
 struct sja1105_port {
-       u16 subvlan_map[DSA_8021Q_N_SUBVLAN];
        struct kthread_worker *xmit_worker;
        struct kthread_work xmit_work;
        struct sk_buff_head xmit_queue;
        struct sja1105_tagger_data *data;
        struct dsa_port *dp;
        bool hwts_tx_en;
-       u16 xmit_tpid;
 };
 
 enum sja1110_meta_tstamp {
@@ -89,4 +89,22 @@ static inline void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
 
 #endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */
 
+#if IS_ENABLED(CONFIG_NET_DSA_SJA1105)
+
+extern const struct dsa_switch_ops sja1105_switch_ops;
+
+static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
+{
+       return dp->ds->ops == &sja1105_switch_ops;
+}
+
+#else
+
+static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
+{
+       return false;
+}
+
+#endif
+
 #endif /* _NET_DSA_SJA1105_H */
index 232daae..849524b 100644 (file)
 
 #include <linux/bitmap.h>
 #include <linux/compat.h>
+#include <linux/netlink.h>
 #include <uapi/linux/ethtool.h>
 
-#ifdef CONFIG_COMPAT
-
 struct compat_ethtool_rx_flow_spec {
        u32             flow_type;
        union ethtool_flow_union h_u;
@@ -38,8 +37,6 @@ struct compat_ethtool_rxnfc {
        u32                             rule_locs[];
 };
 
-#endif /* CONFIG_COMPAT */
-
 #include <linux/rculist.h>
 
 /**
@@ -176,6 +173,11 @@ extern int
 __ethtool_get_link_ksettings(struct net_device *dev,
                             struct ethtool_link_ksettings *link_ksettings);
 
+struct kernel_ethtool_coalesce {
+       u8 use_cqe_mode_tx;
+       u8 use_cqe_mode_rx;
+};
+
 /**
  * ethtool_intersect_link_masks - Given two link masks, AND them together
  * @dst: first mask and where result is stored
@@ -215,7 +217,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
 #define ETHTOOL_COALESCE_TX_USECS_HIGH         BIT(19)
 #define ETHTOOL_COALESCE_TX_MAX_FRAMES_HIGH    BIT(20)
 #define ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL  BIT(21)
-#define ETHTOOL_COALESCE_ALL_PARAMS            GENMASK(21, 0)
+#define ETHTOOL_COALESCE_USE_CQE_RX            BIT(22)
+#define ETHTOOL_COALESCE_USE_CQE_TX            BIT(23)
+#define ETHTOOL_COALESCE_ALL_PARAMS            GENMASK(23, 0)
 
 #define ETHTOOL_COALESCE_USECS                                         \
        (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS)
@@ -241,6 +245,8 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
         ETHTOOL_COALESCE_RX_USECS_LOW | ETHTOOL_COALESCE_RX_USECS_HIGH | \
         ETHTOOL_COALESCE_PKT_RATE_LOW | ETHTOOL_COALESCE_PKT_RATE_HIGH | \
         ETHTOOL_COALESCE_RATE_SAMPLE_INTERVAL)
+#define ETHTOOL_COALESCE_USE_CQE                                       \
+       (ETHTOOL_COALESCE_USE_CQE_RX | ETHTOOL_COALESCE_USE_CQE_TX)
 
 #define ETHTOOL_STAT_NOT_SET   (~0ULL)
 
@@ -606,8 +612,14 @@ struct ethtool_ops {
                              struct ethtool_eeprom *, u8 *);
        int     (*set_eeprom)(struct net_device *,
                              struct ethtool_eeprom *, u8 *);
-       int     (*get_coalesce)(struct net_device *, struct ethtool_coalesce *);
-       int     (*set_coalesce)(struct net_device *, struct ethtool_coalesce *);
+       int     (*get_coalesce)(struct net_device *,
+                               struct ethtool_coalesce *,
+                               struct kernel_ethtool_coalesce *,
+                               struct netlink_ext_ack *);
+       int     (*set_coalesce)(struct net_device *,
+                               struct ethtool_coalesce *,
+                               struct kernel_ethtool_coalesce *,
+                               struct netlink_ext_ack *);
        void    (*get_ringparam)(struct net_device *,
                                 struct ethtool_ringparam *);
        int     (*set_ringparam)(struct net_device *,
index 83b8960..7d24894 100644 (file)
@@ -574,7 +574,8 @@ struct bpf_prog {
                                kprobe_override:1, /* Do we override a kprobe? */
                                has_callchain_buf:1, /* callchain buffer allocated? */
                                enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
-                               call_get_stack:1; /* Do we call bpf_get_stack() or bpf_get_stackid() */
+                               call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
+                               call_get_func_ip:1; /* Do we call get_func_ip() */
        enum bpf_prog_type      type;           /* Type of BPF program */
        enum bpf_attach_type    expected_attach_type; /* For some prog types */
        u32                     len;            /* Number of filter blocks */
@@ -599,25 +600,38 @@ struct sk_filter {
 
 DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
 
-#define __BPF_PROG_RUN(prog, ctx, dfunc)       ({                      \
-       u32 __ret;                                                      \
-       cant_migrate();                                                 \
-       if (static_branch_unlikely(&bpf_stats_enabled_key)) {           \
-               struct bpf_prog_stats *__stats;                         \
-               u64 __start = sched_clock();                            \
-               __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);   \
-               __stats = this_cpu_ptr(prog->stats);                    \
-               u64_stats_update_begin(&__stats->syncp);                \
-               __stats->cnt++;                                         \
-               __stats->nsecs += sched_clock() - __start;              \
-               u64_stats_update_end(&__stats->syncp);                  \
-       } else {                                                        \
-               __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);   \
-       }                                                               \
-       __ret; })
-
-#define BPF_PROG_RUN(prog, ctx)                                                \
-       __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func)
+typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
+                                         const struct bpf_insn *insnsi,
+                                         unsigned int (*bpf_func)(const void *,
+                                                                  const struct bpf_insn *));
+
+static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
+                                         const void *ctx,
+                                         bpf_dispatcher_fn dfunc)
+{
+       u32 ret;
+
+       cant_migrate();
+       if (static_branch_unlikely(&bpf_stats_enabled_key)) {
+               struct bpf_prog_stats *stats;
+               u64 start = sched_clock();
+
+               ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+               stats = this_cpu_ptr(prog->stats);
+               u64_stats_update_begin(&stats->syncp);
+               stats->cnt++;
+               stats->nsecs += sched_clock() - start;
+               u64_stats_update_end(&stats->syncp);
+       } else {
+               ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+       }
+       return ret;
+}
+
+static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void *ctx)
+{
+       return __bpf_prog_run(prog, ctx, bpf_dispatcher_nop_func);
+}
 
 /*
  * Use in preemptible and therefore migratable context to make sure that
@@ -636,7 +650,7 @@ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
        u32 ret;
 
        migrate_disable();
-       ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func);
+       ret = bpf_prog_run(prog, ctx);
        migrate_enable();
        return ret;
 }
@@ -709,7 +723,7 @@ static inline void bpf_restore_data_end(
        cb->data_end = saved_data_end;
 }
 
-static inline u8 *bpf_skb_cb(struct sk_buff *skb)
+static inline u8 *bpf_skb_cb(const struct sk_buff *skb)
 {
        /* eBPF programs may read/write skb->cb[] area to transfer meta
         * data between tail calls. Since this also needs to work with
@@ -730,8 +744,9 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 
 /* Must be invoked with migration disabled */
 static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
-                                        struct sk_buff *skb)
+                                        const void *ctx)
 {
+       const struct sk_buff *skb = ctx;
        u8 *cb_data = bpf_skb_cb(skb);
        u8 cb_saved[BPF_SKB_CB_LEN];
        u32 res;
@@ -741,7 +756,7 @@ static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
                memset(cb_data, 0, sizeof(cb_saved));
        }
 
-       res = BPF_PROG_RUN(prog, skb);
+       res = bpf_prog_run(prog, skb);
 
        if (unlikely(prog->cb_access))
                memcpy(cb_data, cb_saved, sizeof(cb_saved));
@@ -775,6 +790,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
 
 DECLARE_BPF_DISPATCHER(xdp)
 
+DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp);
+
 static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
                                            struct xdp_buff *xdp)
 {
@@ -782,7 +801,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
         * under local_bh_disable(), which provides the needed RCU protection
         * for accessing map entries.
         */
-       return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+       u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+
+       if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+               if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
+                       act = xdp_master_redirect(xdp);
+       }
+
+       return act;
 }
 
 void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
@@ -1428,7 +1454,7 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
                };
                u32 act;
 
-               act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
+               act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, bpf_prog_run);
                if (act == SK_PASS) {
                        selected_sk = ctx.selected_sk;
                        no_reuseport = ctx.no_reuseport;
@@ -1466,7 +1492,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
                };
                u32 act;
 
-               act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
+               act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, bpf_prog_run);
                if (act == SK_PASS) {
                        selected_sk = ctx.selected_sk;
                        no_reuseport = ctx.no_reuseport;
index 63b56ab..30ece3a 100644 (file)
@@ -423,7 +423,8 @@ int __must_check fsl_mc_allocate_irqs(struct fsl_mc_device *mc_dev);
 
 void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev);
 
-struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev);
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
+                                         u16 if_id);
 
 extern struct bus_type fsl_mc_bus_type;
 
index bc73850..c285968 100644 (file)
@@ -8,34 +8,11 @@
 /* All generic netlink requests are serialized by a global lock.  */
 extern void genl_lock(void);
 extern void genl_unlock(void);
-#ifdef CONFIG_LOCKDEP
-extern bool lockdep_genl_is_held(void);
-#endif
 
 /* for synchronisation between af_netlink and genetlink */
 extern atomic_t genl_sk_destructing_cnt;
 extern wait_queue_head_t genl_sk_destructing_waitq;
 
-/**
- * rcu_dereference_genl - rcu_dereference with debug checking
- * @p: The pointer to read, prior to dereferencing
- *
- * Do an rcu_dereference(p), but check caller either holds rcu_read_lock()
- * or genl mutex. Note : Please prefer genl_dereference() or rcu_dereference()
- */
-#define rcu_dereference_genl(p)                                        \
-       rcu_dereference_check(p, lockdep_genl_is_held())
-
-/**
- * genl_dereference - fetch RCU pointer when updates are prevented by genl mutex
- * @p: The pointer to read, prior to dereferencing
- *
- * Return the value of the specified RCU-protected pointer, but omit
- * the READ_ONCE(), because caller holds genl mutex.
- */
-#define genl_dereference(p)                                    \
-       rcu_dereference_protected(p, lockdep_genl_is_held())
-
 #define MODULE_ALIAS_GENL_FAMILY(family)\
  MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family)
 
index cacc4dd..630a388 100644 (file)
@@ -22,7 +22,7 @@ struct hdlc_proto {
        void (*start)(struct net_device *dev); /* if open & DCD */
        void (*stop)(struct net_device *dev); /* if open & !DCD */
        void (*detach)(struct net_device *dev);
-       int (*ioctl)(struct net_device *dev, struct ifreq *ifr);
+       int (*ioctl)(struct net_device *dev, struct if_settings *ifs);
        __be16 (*type_trans)(struct sk_buff *skb, struct net_device *dev);
        int (*netif_rx)(struct sk_buff *skb);
        netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -54,7 +54,7 @@ typedef struct hdlc_device {
 /* Exported from hdlc module */
 
 /* Called by hardware driver when a user requests HDLC service */
-int hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+int hdlc_ioctl(struct net_device *dev, struct if_settings *ifs);
 
 /* Must be used by hardware driver on module startup/exit */
 #define register_hdlc_device(dev)      register_netdev(dev)
index d4d633a..5d70c3f 100644 (file)
@@ -79,7 +79,7 @@ struct hdlcdrv_ops {
         */
        int (*open)(struct net_device *);
        int (*close)(struct net_device *);
-       int (*ioctl)(struct net_device *, struct ifreq *, 
+       int (*ioctl)(struct net_device *, void __user *,
                     struct hdlcdrv_ioctl *, int);
 };
 
index a673007..6942645 100644 (file)
@@ -1088,6 +1088,48 @@ struct ieee80211_ext {
        } u;
 } __packed __aligned(2);
 
+#define IEEE80211_TWT_CONTROL_NDP                      BIT(0)
+#define IEEE80211_TWT_CONTROL_RESP_MODE                        BIT(1)
+#define IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST       BIT(3)
+#define IEEE80211_TWT_CONTROL_RX_DISABLED              BIT(4)
+#define IEEE80211_TWT_CONTROL_WAKE_DUR_UNIT            BIT(5)
+
+#define IEEE80211_TWT_REQTYPE_REQUEST                  BIT(0)
+#define IEEE80211_TWT_REQTYPE_SETUP_CMD                        GENMASK(3, 1)
+#define IEEE80211_TWT_REQTYPE_TRIGGER                  BIT(4)
+#define IEEE80211_TWT_REQTYPE_IMPLICIT                 BIT(5)
+#define IEEE80211_TWT_REQTYPE_FLOWTYPE                 BIT(6)
+#define IEEE80211_TWT_REQTYPE_FLOWID                   GENMASK(9, 7)
+#define IEEE80211_TWT_REQTYPE_WAKE_INT_EXP             GENMASK(14, 10)
+#define IEEE80211_TWT_REQTYPE_PROTECTION               BIT(15)
+
+enum ieee80211_twt_setup_cmd {
+       TWT_SETUP_CMD_REQUEST,
+       TWT_SETUP_CMD_SUGGEST,
+       TWT_SETUP_CMD_DEMAND,
+       TWT_SETUP_CMD_GROUPING,
+       TWT_SETUP_CMD_ACCEPT,
+       TWT_SETUP_CMD_ALTERNATE,
+       TWT_SETUP_CMD_DICTATE,
+       TWT_SETUP_CMD_REJECT,
+};
+
+struct ieee80211_twt_params {
+       __le16 req_type;
+       __le64 twt;
+       u8 min_twt_dur;
+       __le16 mantissa;
+       u8 channel;
+} __packed;
+
+struct ieee80211_twt_setup {
+       u8 dialog_token;
+       u8 element_id;
+       u8 length;
+       u8 control;
+       u8 params[];
+} __packed;
+
 struct ieee80211_mgmt {
        __le16 frame_control;
        __le16 duration;
@@ -1252,6 +1294,10 @@ struct ieee80211_mgmt {
                                        __le16 toa_error;
                                        u8 variable[0];
                                } __packed ftm;
+                               struct {
+                                       u8 action_code;
+                                       u8 variable[];
+                               } __packed s1g;
                        } u;
                } __packed action;
        } u;
@@ -2266,6 +2312,9 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info)
 #define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR               0x40000000
 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED              0x80000000
 
+#define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0
+#define IEEE80211_6GHZ_CTRL_REG_SP_AP  1
+
 /**
  * ieee80211_he_6ghz_oper - HE 6 GHz operation Information field
  * @primary: primary channel
@@ -2282,12 +2331,51 @@ struct ieee80211_he_6ghz_oper {
 #define                IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ     2
 #define                IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ    3
 #define IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON 0x4
+#define IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO   0x38
        u8 control;
        u8 ccfs0;
        u8 ccfs1;
        u8 minrate;
 } __packed;
 
+/*
+ * In "9.4.2.161 Transmit Power Envelope element" of "IEEE Std 802.11ax-2021",
+ * it show four types in "Table 9-275a-Maximum Transmit Power Interpretation
+ * subfield encoding", and two category for each type in "Table E-12-Regulatory
+ * Info subfield encoding in the United States".
+ * So it it totally max 8 Transmit Power Envelope element.
+ */
+#define IEEE80211_TPE_MAX_IE_COUNT     8
+/*
+ * In "Table 9-277—Meaning of Maximum Transmit Power Count subfield"
+ * of "IEEE Std 802.11ax™‐2021", the max power level is 8.
+ */
+#define IEEE80211_MAX_NUM_PWR_LEVEL    8
+
+#define IEEE80211_TPE_MAX_POWER_COUNT  8
+
+/* transmit power interpretation type of transmit power envelope element */
+enum ieee80211_tx_power_intrpt_type {
+       IEEE80211_TPE_LOCAL_EIRP,
+       IEEE80211_TPE_LOCAL_EIRP_PSD,
+       IEEE80211_TPE_REG_CLIENT_EIRP,
+       IEEE80211_TPE_REG_CLIENT_EIRP_PSD,
+};
+
+/**
+ * struct ieee80211_tx_pwr_env
+ *
+ * This structure represents the "Transmit Power Envelope element"
+ */
+struct ieee80211_tx_pwr_env {
+       u8 tx_power_info;
+       s8 tx_power[IEEE80211_TPE_MAX_POWER_COUNT];
+} __packed;
+
+#define IEEE80211_TX_PWR_ENV_INFO_COUNT 0x7
+#define IEEE80211_TX_PWR_ENV_INFO_INTERPRET 0x38
+#define IEEE80211_TX_PWR_ENV_INFO_CATEGORY 0xC0
+
 /*
  * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size
  * @he_oper_ie: byte data of the He Operations IE, stating from the byte
@@ -2869,7 +2957,7 @@ enum ieee80211_eid {
        WLAN_EID_VHT_OPERATION = 192,
        WLAN_EID_EXTENDED_BSS_LOAD = 193,
        WLAN_EID_WIDE_BW_CHANNEL_SWITCH = 194,
-       WLAN_EID_VHT_TX_POWER_ENVELOPE = 195,
+       WLAN_EID_TX_POWER_ENVELOPE = 195,
        WLAN_EID_CHANNEL_SWITCH_WRAPPER = 196,
        WLAN_EID_AID = 197,
        WLAN_EID_QUIET_CHANNEL = 198,
@@ -2881,6 +2969,7 @@ enum ieee80211_eid {
        WLAN_EID_AID_RESPONSE = 211,
        WLAN_EID_S1G_BCN_COMPAT = 213,
        WLAN_EID_S1G_SHORT_BCN_INTERVAL = 214,
+       WLAN_EID_S1G_TWT = 216,
        WLAN_EID_S1G_CAPABILITIES = 217,
        WLAN_EID_VENDOR_SPECIFIC = 221,
        WLAN_EID_QOS_PARAMETER = 222,
@@ -2950,6 +3039,7 @@ enum ieee80211_category {
        WLAN_CATEGORY_FST = 18,
        WLAN_CATEGORY_UNPROT_DMG = 20,
        WLAN_CATEGORY_VHT = 21,
+       WLAN_CATEGORY_S1G = 22,
        WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
        WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
 };
@@ -3023,6 +3113,20 @@ enum ieee80211_key_len {
        WLAN_KEY_LEN_BIP_GMAC_256 = 32,
 };
 
+enum ieee80211_s1g_actioncode {
+       WLAN_S1G_AID_SWITCH_REQUEST,
+       WLAN_S1G_AID_SWITCH_RESPONSE,
+       WLAN_S1G_SYNC_CONTROL,
+       WLAN_S1G_STA_INFO_ANNOUNCE,
+       WLAN_S1G_EDCA_PARAM_SET,
+       WLAN_S1G_EL_OPERATION,
+       WLAN_S1G_TWT_SETUP,
+       WLAN_S1G_TWT_TEARDOWN,
+       WLAN_S1G_SECT_GROUP_ID_LIST,
+       WLAN_S1G_SECT_ID_FEEDBACK,
+       WLAN_S1G_TWT_INFORMATION = 11,
+};
+
 #define IEEE80211_WEP_IV_LEN           4
 #define IEEE80211_WEP_ICV_LEN          4
 #define IEEE80211_CCMP_HDR_LEN         8
index b651c5e..509e18c 100644 (file)
@@ -57,10 +57,16 @@ struct br_ip_list {
 #define BR_MRP_AWARE           BIT(17)
 #define BR_MRP_LOST_CONT       BIT(18)
 #define BR_MRP_LOST_IN_CONT    BIT(19)
+#define BR_TX_FWD_OFFLOAD      BIT(20)
 
 #define BR_DEFAULT_AGEING_TIME (300 * HZ)
 
-extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
+struct net_bridge;
+void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
+                            unsigned int cmd, struct ifreq *ifr,
+                            void __user *uarg));
+int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
+                 struct ifreq *ifr, void __user *uarg);
 
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
 int br_multicast_list_adjacent(struct net_device *dev,
@@ -70,9 +76,6 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
 bool br_multicast_has_router_adjacent(struct net_device *dev, int proto);
 bool br_multicast_enabled(const struct net_device *dev);
 bool br_multicast_router(const struct net_device *dev);
-int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
-                 const void *ctx, bool adding, struct notifier_block *nb,
-                 struct netlink_ext_ack *extack);
 #else
 static inline int br_multicast_list_adjacent(struct net_device *dev,
                                             struct list_head *br_ip_list)
@@ -104,13 +107,6 @@ static inline bool br_multicast_router(const struct net_device *dev)
 {
        return false;
 }
-static inline int br_mdb_replay(const struct net_device *br_dev,
-                               const struct net_device *dev, const void *ctx,
-                               bool adding, struct notifier_block *nb,
-                               struct netlink_ext_ack *extack)
-{
-       return -EOPNOTSUPP;
-}
 #endif
 
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
@@ -120,9 +116,8 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid);
 int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto);
 int br_vlan_get_info(const struct net_device *dev, u16 vid,
                     struct bridge_vlan_info *p_vinfo);
-int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
-                  const void *ctx, bool adding, struct notifier_block *nb,
-                  struct netlink_ext_ack *extack);
+int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid,
+                        struct bridge_vlan_info *p_vinfo);
 #else
 static inline bool br_vlan_enabled(const struct net_device *dev)
 {
@@ -150,12 +145,10 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
        return -EINVAL;
 }
 
-static inline int br_vlan_replay(struct net_device *br_dev,
-                                struct net_device *dev, const void *ctx,
-                                bool adding, struct notifier_block *nb,
-                                struct netlink_ext_ack *extack)
+static inline int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid,
+                                      struct bridge_vlan_info *p_vinfo)
 {
-       return -EOPNOTSUPP;
+       return -EINVAL;
 }
 #endif
 
@@ -167,8 +160,6 @@ void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
 bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
 u8 br_port_get_stp_state(const struct net_device *dev);
 clock_t br_get_ageing_time(const struct net_device *br_dev);
-int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
-                 const void *ctx, bool adding, struct notifier_block *nb);
 #else
 static inline struct net_device *
 br_fdb_find_port(const struct net_device *br_dev,
@@ -197,13 +188,6 @@ static inline clock_t br_get_ageing_time(const struct net_device *br_dev)
 {
        return 0;
 }
-
-static inline int br_fdb_replay(const struct net_device *br_dev,
-                               const struct net_device *dev, const void *ctx,
-                               bool adding, struct notifier_block *nb)
-{
-       return -EOPNOTSUPP;
-}
 #endif
 
 #endif
index 64ce8cd..93c262e 100644 (file)
@@ -41,9 +41,6 @@ struct ip_sf_socklist {
        __be32                  sl_addr[];
 };
 
-#define IP_SFLSIZE(count)      (sizeof(struct ip_sf_socklist) + \
-       (count) * sizeof(__be32))
-
 #define IP_SFBLOCK     10      /* allocate this many at once */
 
 /* ip_mc_socklist is real list now. Speed is not argument;
index aaf4f1b..a038feb 100644 (file)
@@ -178,6 +178,15 @@ static inline struct net_device *ip_dev_find(struct net *net, __be32 addr)
 
 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *);
+#ifdef CONFIG_INET
+int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size);
+#else
+static inline int inet_gifconf(struct net_device *dev, char __user *buf,
+                              int len, int size)
+{
+       return 0;
+}
+#endif
 void devinet_init(void);
 struct in_device *inetdev_by_index(struct net *, int);
 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
diff --git a/include/linux/ioam6.h b/include/linux/ioam6.h
new file mode 100644 (file)
index 0000000..94a24b3
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ *  IPv6 IOAM
+ *
+ *  Author:
+ *  Justin Iurman <justin.iurman@uliege.be>
+ */
+#ifndef _LINUX_IOAM6_H
+#define _LINUX_IOAM6_H
+
+#include <uapi/linux/ioam6.h>
+
+#endif /* _LINUX_IOAM6_H */
diff --git a/include/linux/ioam6_genl.h b/include/linux/ioam6_genl.h
new file mode 100644 (file)
index 0000000..176e679
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ *  IPv6 IOAM Generic Netlink API
+ *
+ *  Author:
+ *  Justin Iurman <justin.iurman@uliege.be>
+ */
+#ifndef _LINUX_IOAM6_GENL_H
+#define _LINUX_IOAM6_GENL_H
+
+#include <uapi/linux/ioam6_genl.h>
+
+#endif /* _LINUX_IOAM6_GENL_H */
diff --git a/include/linux/ioam6_iptunnel.h b/include/linux/ioam6_iptunnel.h
new file mode 100644 (file)
index 0000000..07d9dfe
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ *  IPv6 IOAM Lightweight Tunnel API
+ *
+ *  Author:
+ *  Justin Iurman <justin.iurman@uliege.be>
+ */
+#ifndef _LINUX_IOAM6_IPTUNNEL_H
+#define _LINUX_IOAM6_IPTUNNEL_H
+
+#include <uapi/linux/ioam6_iptunnel.h>
+
+#endif /* _LINUX_IOAM6_IPTUNNEL_H */
index 70b2ad3..ef4a698 100644 (file)
@@ -76,6 +76,9 @@ struct ipv6_devconf {
        __s32           disable_policy;
        __s32           ndisc_tclass;
        __s32           rpl_seg_enabled;
+       __u32           ioam6_id;
+       __u32           ioam6_id_wide;
+       __u8            ioam6_enabled;
 
        struct ctl_table_header *sysctl_header;
 };
index 2479792..20151c4 100644 (file)
@@ -1582,7 +1582,8 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
 #endif /* CONFIG_CGROUP_WRITEBACK */
 
 struct sock;
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
+                            gfp_t gfp_mask);
 void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
 #ifdef CONFIG_MEMCG
 extern struct static_key_false memcg_sockets_enabled_key;
index 944aa3a..beb9183 100644 (file)
@@ -356,6 +356,7 @@ struct mhi_controller_config {
  * @fbc_download: MHI host needs to do complete image transfer (optional)
  * @wake_set: Device wakeup set flag
  * @irq_flags: irq flags passed to request_irq (optional)
+ * @mru: the default MRU for the MHI device
  *
  * Fields marked as (required) need to be populated by the controller driver
  * before calling mhi_register_controller(). For the fields marked as (optional)
@@ -448,6 +449,7 @@ struct mhi_controller {
        bool fbc_download;
        bool wake_set;
        unsigned long irq_flags;
+       u32 mru;
 };
 
 /**
index 219b93c..12ea29e 100644 (file)
@@ -32,7 +32,7 @@ struct mii_if_info {
 
 extern int mii_link_ok (struct mii_if_info *mii);
 extern int mii_nway_restart (struct mii_if_info *mii);
-extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
+extern void mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
 extern void mii_ethtool_get_link_ksettings(
        struct mii_if_info *mii, struct ethtool_link_ksettings *cmd);
 extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
index 0025913..66eaf0a 100644 (file)
@@ -1038,7 +1038,7 @@ enum {
 struct mlx5_mkey_seg {
        /* This is a two bit field occupying bits 31-30.
         * bit 31 is always 0,
-        * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have tanslation
+        * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have translation
         */
        u8              status;
        u8              pcie_control;
@@ -1157,6 +1157,9 @@ enum mlx5_cap_mode {
        HCA_CAP_OPMOD_GET_CUR   = 1,
 };
 
+/* Any new cap addition must update mlx5_hca_caps_alloc() to allocate
+ * capability memory.
+ */
 enum mlx5_cap_type {
        MLX5_CAP_GENERAL = 0,
        MLX5_CAP_ETHERNET_OFFLOADS,
@@ -1213,55 +1216,55 @@ enum mlx5_qcam_feature_groups {
 
 /* GET Dev Caps macros */
 #define MLX5_CAP_GEN(mdev, cap) \
-       MLX5_GET(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+       MLX5_GET(cmd_hca_cap, mdev->caps.hca[MLX5_CAP_GENERAL]->cur, cap)
 
 #define MLX5_CAP_GEN_64(mdev, cap) \
-       MLX5_GET64(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+       MLX5_GET64(cmd_hca_cap, mdev->caps.hca[MLX5_CAP_GENERAL]->cur, cap)
 
 #define MLX5_CAP_GEN_MAX(mdev, cap) \
-       MLX5_GET(cmd_hca_cap, mdev->caps.hca_max[MLX5_CAP_GENERAL], cap)
+       MLX5_GET(cmd_hca_cap, mdev->caps.hca[MLX5_CAP_GENERAL]->max, cap)
 
 #define MLX5_CAP_GEN_2(mdev, cap) \
-       MLX5_GET(cmd_hca_cap_2, mdev->caps.hca_cur[MLX5_CAP_GENERAL_2], cap)
+       MLX5_GET(cmd_hca_cap_2, mdev->caps.hca[MLX5_CAP_GENERAL_2]->cur, cap)
 
 #define MLX5_CAP_GEN_2_64(mdev, cap) \
-       MLX5_GET64(cmd_hca_cap_2, mdev->caps.hca_cur[MLX5_CAP_GENERAL_2], cap)
+       MLX5_GET64(cmd_hca_cap_2, mdev->caps.hca[MLX5_CAP_GENERAL_2]->cur, cap)
 
 #define MLX5_CAP_GEN_2_MAX(mdev, cap) \
-       MLX5_GET(cmd_hca_cap_2, mdev->caps.hca_max[MLX5_CAP_GENERAL_2], cap)
+       MLX5_GET(cmd_hca_cap_2, mdev->caps.hca[MLX5_CAP_GENERAL_2]->max, cap)
 
 #define MLX5_CAP_ETH(mdev, cap) \
        MLX5_GET(per_protocol_networking_offload_caps,\
-                mdev->caps.hca_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap)
+                mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->cur, cap)
 
 #define MLX5_CAP_ETH_MAX(mdev, cap) \
        MLX5_GET(per_protocol_networking_offload_caps,\
-                mdev->caps.hca_max[MLX5_CAP_ETHERNET_OFFLOADS], cap)
+                mdev->caps.hca[MLX5_CAP_ETHERNET_OFFLOADS]->max, cap)
 
 #define MLX5_CAP_IPOIB_ENHANCED(mdev, cap) \
        MLX5_GET(per_protocol_networking_offload_caps,\
-                mdev->caps.hca_cur[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS], cap)
+                mdev->caps.hca[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS]->cur, cap)
 
 #define MLX5_CAP_ROCE(mdev, cap) \
-       MLX5_GET(roce_cap, mdev->caps.hca_cur[MLX5_CAP_ROCE], cap)
+       MLX5_GET(roce_cap, mdev->caps.hca[MLX5_CAP_ROCE]->cur, cap)
 
 #define MLX5_CAP_ROCE_MAX(mdev, cap) \
-       MLX5_GET(roce_cap, mdev->caps.hca_max[MLX5_CAP_ROCE], cap)
+       MLX5_GET(roce_cap, mdev->caps.hca[MLX5_CAP_ROCE]->max, cap)
 
 #define MLX5_CAP_ATOMIC(mdev, cap) \
-       MLX5_GET(atomic_caps, mdev->caps.hca_cur[MLX5_CAP_ATOMIC], cap)
+       MLX5_GET(atomic_caps, mdev->caps.hca[MLX5_CAP_ATOMIC]->cur, cap)
 
 #define MLX5_CAP_ATOMIC_MAX(mdev, cap) \
-       MLX5_GET(atomic_caps, mdev->caps.hca_max[MLX5_CAP_ATOMIC], cap)
+       MLX5_GET(atomic_caps, mdev->caps.hca[MLX5_CAP_ATOMIC]->max, cap)
 
 #define MLX5_CAP_FLOWTABLE(mdev, cap) \
-       MLX5_GET(flow_table_nic_cap, mdev->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap)
+       MLX5_GET(flow_table_nic_cap, mdev->caps.hca[MLX5_CAP_FLOW_TABLE]->cur, cap)
 
 #define MLX5_CAP64_FLOWTABLE(mdev, cap) \
-       MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap)
+       MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca[MLX5_CAP_FLOW_TABLE]->cur, cap)
 
 #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \
-       MLX5_GET(flow_table_nic_cap, mdev->caps.hca_max[MLX5_CAP_FLOW_TABLE], cap)
+       MLX5_GET(flow_table_nic_cap, mdev->caps.hca[MLX5_CAP_FLOW_TABLE]->max, cap)
 
 #define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \
        MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap)
@@ -1301,11 +1304,11 @@ enum mlx5_qcam_feature_groups {
 
 #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \
        MLX5_GET(flow_table_eswitch_cap, \
-                mdev->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+                mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap)
 
 #define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \
        MLX5_GET(flow_table_eswitch_cap, \
-                mdev->caps.hca_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+                mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->max, cap)
 
 #define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \
        MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap)
@@ -1327,31 +1330,31 @@ enum mlx5_qcam_feature_groups {
 
 #define MLX5_CAP_ESW(mdev, cap) \
        MLX5_GET(e_switch_cap, \
-                mdev->caps.hca_cur[MLX5_CAP_ESWITCH], cap)
+                mdev->caps.hca[MLX5_CAP_ESWITCH]->cur, cap)
 
 #define MLX5_CAP64_ESW_FLOWTABLE(mdev, cap) \
        MLX5_GET64(flow_table_eswitch_cap, \
-               (mdev)->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+               (mdev)->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap)
 
 #define MLX5_CAP_ESW_MAX(mdev, cap) \
        MLX5_GET(e_switch_cap, \
-                mdev->caps.hca_max[MLX5_CAP_ESWITCH], cap)
+                mdev->caps.hca[MLX5_CAP_ESWITCH]->max, cap)
 
 #define MLX5_CAP_ODP(mdev, cap)\
-       MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap)
+       MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap)
 
 #define MLX5_CAP_ODP_MAX(mdev, cap)\
-       MLX5_GET(odp_cap, mdev->caps.hca_max[MLX5_CAP_ODP], cap)
+       MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->max, cap)
 
 #define MLX5_CAP_VECTOR_CALC(mdev, cap) \
        MLX5_GET(vector_calc_cap, \
-                mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap)
+                mdev->caps.hca[MLX5_CAP_VECTOR_CALC]->cur, cap)
 
 #define MLX5_CAP_QOS(mdev, cap)\
-       MLX5_GET(qos_cap, mdev->caps.hca_cur[MLX5_CAP_QOS], cap)
+       MLX5_GET(qos_cap, mdev->caps.hca[MLX5_CAP_QOS]->cur, cap)
 
 #define MLX5_CAP_DEBUG(mdev, cap)\
-       MLX5_GET(debug_cap, mdev->caps.hca_cur[MLX5_CAP_DEBUG], cap)
+       MLX5_GET(debug_cap, mdev->caps.hca[MLX5_CAP_DEBUG]->cur, cap)
 
 #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \
        MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld)
@@ -1387,27 +1390,27 @@ enum mlx5_qcam_feature_groups {
        MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap)
 
 #define MLX5_CAP_DEV_MEM(mdev, cap)\
-       MLX5_GET(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap)
+       MLX5_GET(device_mem_cap, mdev->caps.hca[MLX5_CAP_DEV_MEM]->cur, cap)
 
 #define MLX5_CAP64_DEV_MEM(mdev, cap)\
-       MLX5_GET64(device_mem_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_MEM], cap)
+       MLX5_GET64(device_mem_cap, mdev->caps.hca[MLX5_CAP_DEV_MEM]->cur, cap)
 
 #define MLX5_CAP_TLS(mdev, cap) \
-       MLX5_GET(tls_cap, (mdev)->caps.hca_cur[MLX5_CAP_TLS], cap)
+       MLX5_GET(tls_cap, (mdev)->caps.hca[MLX5_CAP_TLS]->cur, cap)
 
 #define MLX5_CAP_DEV_EVENT(mdev, cap)\
-       MLX5_ADDR_OF(device_event_cap, (mdev)->caps.hca_cur[MLX5_CAP_DEV_EVENT], cap)
+       MLX5_ADDR_OF(device_event_cap, (mdev)->caps.hca[MLX5_CAP_DEV_EVENT]->cur, cap)
 
 #define MLX5_CAP_DEV_VDPA_EMULATION(mdev, cap)\
        MLX5_GET(virtio_emulation_cap, \
-               (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap)
+               (mdev)->caps.hca[MLX5_CAP_VDPA_EMULATION]->cur, cap)
 
 #define MLX5_CAP64_DEV_VDPA_EMULATION(mdev, cap)\
        MLX5_GET64(virtio_emulation_cap, \
-               (mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap)
+               (mdev)->caps.hca[MLX5_CAP_VDPA_EMULATION]->cur, cap)
 
 #define MLX5_CAP_IPSEC(mdev, cap)\
-       MLX5_GET(ipsec_cap, (mdev)->caps.hca_cur[MLX5_CAP_IPSEC], cap)
+       MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap)
 
 enum {
        MLX5_CMD_STAT_OK                        = 0x0,
index 25a8be5..e234174 100644 (file)
@@ -581,7 +581,7 @@ struct mlx5_priv {
        /* end: qp staff */
 
        /* start: alloc staff */
-       /* protect buffer alocation according to numa node */
+       /* protect buffer allocation according to numa node */
        struct mutex            alloc_mutex;
        int                     numa_node;
 
@@ -623,8 +623,7 @@ struct mlx5_priv {
 };
 
 enum mlx5_device_state {
-       MLX5_DEVICE_STATE_UNINITIALIZED,
-       MLX5_DEVICE_STATE_UP,
+       MLX5_DEVICE_STATE_UP = 1,
        MLX5_DEVICE_STATE_INTERNAL_ERROR,
 };
 
@@ -730,6 +729,11 @@ struct mlx5_profile {
        } mr_cache[MAX_MR_CACHE_ENTRIES];
 };
 
+struct mlx5_hca_cap {
+       u32 cur[MLX5_UN_SZ_DW(hca_cap_union)];
+       u32 max[MLX5_UN_SZ_DW(hca_cap_union)];
+};
+
 struct mlx5_core_dev {
        struct device *device;
        enum mlx5_coredev_type coredev_type;
@@ -741,8 +745,7 @@ struct mlx5_core_dev {
        char                    board_id[MLX5_BOARD_ID_LEN];
        struct mlx5_cmd         cmd;
        struct {
-               u32 hca_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
-               u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
+               struct mlx5_hca_cap *hca[MLX5_CAP_NUM];
                u32 pcam[MLX5_ST_SZ_DW(pcam_reg)];
                u32 mcam[MLX5_MCAM_REGS_NUM][MLX5_ST_SZ_DW(mcam_reg)];
                u32 fpga[MLX5_ST_SZ_DW(fpga_cap)];
@@ -1110,7 +1113,7 @@ static inline u8 mlx5_mkey_variant(u32 mkey)
 }
 
 /* Async-atomic event notifier used by mlx5 core to forward FW
- * evetns recived from event queue to mlx5 consumers.
+ * evetns received from event queue to mlx5 consumers.
  * Optimise event queue dipatching.
  */
 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb);
@@ -1137,6 +1140,8 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
                           struct net_device *slave);
@@ -1144,6 +1149,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
                                 u64 *values,
                                 int num_counters,
                                 size_t *offsets);
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev);
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
 int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
index bc7db2e..4ab5c1f 100644 (file)
@@ -29,11 +29,20 @@ enum {
        REP_LOADED,
 };
 
+enum mlx5_switchdev_event {
+       MLX5_SWITCHDEV_EVENT_PAIR,
+       MLX5_SWITCHDEV_EVENT_UNPAIR,
+};
+
 struct mlx5_eswitch_rep;
 struct mlx5_eswitch_rep_ops {
        int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep);
        void (*unload)(struct mlx5_eswitch_rep *rep);
        void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+       int (*event)(struct mlx5_eswitch *esw,
+                    struct mlx5_eswitch_rep *rep,
+                    enum mlx5_switchdev_event event,
+                    void *data);
 };
 
 struct mlx5_eswitch_rep_data {
@@ -63,6 +72,7 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
 void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
 struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+                                   struct mlx5_eswitch *from_esw,
                                    struct mlx5_eswitch_rep *rep, u32 sqn);
 
 #ifdef CONFIG_MLX5_ESWITCH
@@ -128,6 +138,7 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
 
 u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
 u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
 
 #else  /* CONFIG_MLX5_ESWITCH */
 
@@ -171,6 +182,11 @@ static inline u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
        return 0;
 }
 
+static inline struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+       return NULL;
+}
+
 #endif /* CONFIG_MLX5_ESWITCH */
 
 static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev)
index 77746f7..0106c67 100644 (file)
@@ -38,6 +38,8 @@
 
 #define MLX5_FS_DEFAULT_FLOW_TAG 0x0
 
+#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
+
 enum {
        MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO  = 1 << 16,
        MLX5_FLOW_CONTEXT_ACTION_ENCRYPT        = 1 << 17,
index b0009aa..f3638d0 100644 (file)
@@ -865,7 +865,8 @@ struct mlx5_ifc_qos_cap_bits {
        u8         nic_bw_share[0x1];
        u8         nic_rate_limit[0x1];
        u8         packet_pacing_uid[0x1];
-       u8         reserved_at_c[0x14];
+       u8         log_esw_max_sched_depth[0x4];
+       u8         reserved_at_10[0x10];
 
        u8         reserved_at_20[0xb];
        u8         log_max_qos_nic_queue_group[0x5];
@@ -921,7 +922,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
        u8         scatter_fcs[0x1];
        u8         enhanced_multi_pkt_send_wqe[0x1];
        u8         tunnel_lso_const_out_ip_id[0x1];
-       u8         reserved_at_1c[0x2];
+       u8         tunnel_lro_gre[0x1];
+       u8         tunnel_lro_vxlan[0x1];
        u8         tunnel_stateless_gre[0x1];
        u8         tunnel_stateless_vxlan[0x1];
 
@@ -1651,7 +1653,13 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         max_geneve_tlv_option_data_len[0x5];
        u8         reserved_at_570[0x10];
 
-       u8         reserved_at_580[0x33];
+       u8         reserved_at_580[0xb];
+       u8         log_max_dci_stream_channels[0x5];
+       u8         reserved_at_590[0x3];
+       u8         log_max_dci_errored_streams[0x5];
+       u8         reserved_at_598[0x8];
+
+       u8         reserved_at_5a0[0x13];
        u8         log_max_dek[0x5];
        u8         reserved_at_5b8[0x4];
        u8         mini_cqe_resp_stride_index[0x1];
@@ -3020,10 +3028,12 @@ struct mlx5_ifc_qpc_bits {
        u8         reserved_at_3c0[0x8];
        u8         next_send_psn[0x18];
 
-       u8         reserved_at_3e0[0x8];
+       u8         reserved_at_3e0[0x3];
+       u8         log_num_dci_stream_channels[0x5];
        u8         cqn_snd[0x18];
 
-       u8         reserved_at_400[0x8];
+       u8         reserved_at_400[0x3];
+       u8         log_num_dci_errored_streams[0x5];
        u8         deth_sqpn[0x18];
 
        u8         reserved_at_420[0x20];
@@ -3911,7 +3921,7 @@ struct mlx5_ifc_cqc_bits {
        u8         status[0x4];
        u8         reserved_at_4[0x2];
        u8         dbr_umem_valid[0x1];
-       u8         apu_thread_cq[0x1];
+       u8         apu_cq[0x1];
        u8         cqe_sz[0x3];
        u8         cc[0x1];
        u8         reserved_at_c[0x1];
@@ -3937,8 +3947,7 @@ struct mlx5_ifc_cqc_bits {
        u8         cq_period[0xc];
        u8         cq_max_count[0x10];
 
-       u8         reserved_at_a0[0x18];
-       u8         c_eqn[0x8];
+       u8         c_eqn_or_apu_element[0x20];
 
        u8         reserved_at_c0[0x3];
        u8         log_page_size[0x5];
index 52bbd2b..7f8ee09 100644 (file)
@@ -103,11 +103,19 @@ struct page {
                        unsigned long pp_magic;
                        struct page_pool *pp;
                        unsigned long _pp_mapping_pad;
-                       /**
-                        * @dma_addr: might require a 64-bit value on
-                        * 32-bit architectures.
-                        */
-                       unsigned long dma_addr[2];
+                       unsigned long dma_addr;
+                       union {
+                               /**
+                                * dma_addr_upper: might require a 64-bit
+                                * value on 32-bit architectures.
+                                */
+                               unsigned long dma_addr_upper;
+                               /**
+                                * For frag page support, not supported in
+                                * 32-bit architectures with 64-bit DMA.
+                                */
+                               atomic_long_t pp_frag_count;
+                       };
                };
                struct {        /* slab, slob and slub */
                        union {
index 1203661..a85c9f0 100644 (file)
@@ -75,6 +75,7 @@
 #define SDIO_DEVICE_ID_BROADCOM_43364          0xa9a4
 #define SDIO_DEVICE_ID_BROADCOM_43430          0xa9a6
 #define SDIO_DEVICE_ID_BROADCOM_43455          0xa9bf
+#define SDIO_DEVICE_ID_BROADCOM_CYPRESS_43752  0xaae8
 
 #define SDIO_VENDOR_ID_MARVELL                 0x02df
 #define SDIO_DEVICE_ID_MARVELL_LIBERTAS                0x9103
index d65ce09..7c41593 100644 (file)
@@ -47,6 +47,7 @@
 #include <uapi/linux/if_bonding.h>
 #include <uapi/linux/pkt_cls.h>
 #include <linux/hashtable.h>
+#include <linux/rbtree.h>
 
 struct netpoll_info;
 struct device;
@@ -208,6 +209,7 @@ struct sk_buff;
 
 struct netdev_hw_addr {
        struct list_head        list;
+       struct rb_node          node;
        unsigned char           addr[MAX_ADDR_LEN];
        unsigned char           type;
 #define NETDEV_HW_ADDR_T_LAN           1
@@ -224,6 +226,9 @@ struct netdev_hw_addr {
 struct netdev_hw_addr_list {
        struct list_head        list;
        int                     count;
+
+       /* Auxiliary tree for faster lookup on addition and deletion */
+       struct rb_root          tree;
 };
 
 #define netdev_hw_addr_list_count(l) ((l)->count)
@@ -295,18 +300,6 @@ enum netdev_state_t {
 };
 
 
-/*
- * This structure holds boot-time configured netdevice settings. They
- * are then used in the device probing.
- */
-struct netdev_boot_setup {
-       char name[IFNAMSIZ];
-       struct ifmap map;
-};
-#define NETDEV_BOOT_SETUP_MAX 8
-
-int __init netdev_boot_setup(char *str);
-
 struct gro_list {
        struct list_head        list;
        int                     count;
@@ -734,13 +727,13 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
 
 /* This structure contains an instance of an RX queue. */
 struct netdev_rx_queue {
+       struct xdp_rxq_info             xdp_rxq;
 #ifdef CONFIG_RPS
        struct rps_map __rcu            *rps_map;
        struct rps_dev_flow_table __rcu *rps_flow_table;
 #endif
        struct kobject                  kobj;
        struct net_device               *dev;
-       struct xdp_rxq_info             xdp_rxq;
 #ifdef CONFIG_XDP_SOCKETS
        struct xsk_buff_pool            *pool;
 #endif
@@ -1086,9 +1079,18 @@ struct netdev_net_notifier {
  *     Test if Media Access Control address is valid for the device.
  *
  * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
- *     Called when a user requests an ioctl which can't be handled by
- *     the generic interface code. If not defined ioctls return
- *     not supported error code.
+ *     Old-style ioctl entry point. This is used internally by the
+ *     appletalk and ieee802154 subsystems but is no longer called by
+ *     the device ioctl handler.
+ *
+ * int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd);
+ *     Used by the bonding driver for its device specific ioctls:
+ *     SIOCBONDENSLAVE, SIOCBONDRELEASE, SIOCBONDSETHWADDR, SIOCBONDCHANGEACTIVE,
+ *     SIOCBONDSLAVEINFOQUERY, and SIOCBONDINFOQUERY
+ *
+ * * int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd);
+ *     Called for ethernet specific ioctls: SIOCGMIIPHY, SIOCGMIIREG,
+ *     SIOCSMIIREG, SIOCSHWTSTAMP and SIOCGHWTSTAMP.
  *
  * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map);
  *     Used to set network devices bus interface parameters. This interface
@@ -1321,6 +1323,9 @@ struct netdev_net_notifier {
  *     that got dropped are freed/returned via xdp_return_frame().
  *     Returns negative number, means general error invoking ndo, meaning
  *     no frames were xmit'ed and core-caller will free all frames.
+ * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+ *                                             struct xdp_buff *xdp);
+ *      Get the xmit slave of master device based on the xdp_buff.
  * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
  *      This function is used to wake up the softirq, ksoftirqd or kthread
  *     responsible for sending and/or receiving packets on a specific
@@ -1361,6 +1366,15 @@ struct net_device_ops {
        int                     (*ndo_validate_addr)(struct net_device *dev);
        int                     (*ndo_do_ioctl)(struct net_device *dev,
                                                struct ifreq *ifr, int cmd);
+       int                     (*ndo_eth_ioctl)(struct net_device *dev,
+                                                struct ifreq *ifr, int cmd);
+       int                     (*ndo_siocbond)(struct net_device *dev,
+                                               struct ifreq *ifr, int cmd);
+       int                     (*ndo_siocwandev)(struct net_device *dev,
+                                                 struct if_settings *ifs);
+       int                     (*ndo_siocdevprivate)(struct net_device *dev,
+                                                     struct ifreq *ifr,
+                                                     void __user *data, int cmd);
        int                     (*ndo_set_config)(struct net_device *dev,
                                                  struct ifmap *map);
        int                     (*ndo_change_mtu)(struct net_device *dev,
@@ -1539,6 +1553,8 @@ struct net_device_ops {
        int                     (*ndo_xdp_xmit)(struct net_device *dev, int n,
                                                struct xdp_frame **xdp,
                                                u32 flags);
+       struct net_device *     (*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+                                                         struct xdp_buff *xdp);
        int                     (*ndo_xsk_wakeup)(struct net_device *dev,
                                                  u32 queue_id, u32 flags);
        struct devlink_port *   (*ndo_get_devlink_port)(struct net_device *dev);
@@ -1805,6 +1821,7 @@ enum netdev_ml_priv_type {
  *     @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network
  *                      device struct
  *     @mpls_ptr:      mpls_dev struct pointer
+ *     @mctp_ptr:      MCTP specific data
  *
  *     @dev_addr:      Hw address (before bcast,
  *                     because most packets are unicast)
@@ -2092,6 +2109,9 @@ struct net_device {
 #if IS_ENABLED(CONFIG_MPLS_ROUTING)
        struct mpls_dev __rcu   *mpls_ptr;
 #endif
+#if IS_ENABLED(CONFIG_MCTP)
+       struct mctp_dev __rcu   *mctp_ptr;
+#endif
 
 /*
  * Cache lines mostly used on receive path (including eth_type_trans())
@@ -2917,7 +2937,6 @@ static inline struct net_device *first_net_device_rcu(struct net *net)
 }
 
 int netdev_boot_setup_check(struct net_device *dev);
-unsigned long netdev_boot_base(const char *prefix, int unit);
 struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
                                       const char *hwaddr);
 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
@@ -3289,14 +3308,6 @@ static inline bool dev_has_header(const struct net_device *dev)
        return dev->header_ops && dev->header_ops->create;
 }
 
-typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr,
-                          int len, int size);
-int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
-static inline int unregister_gifconf(unsigned int family)
-{
-       return register_gifconf(family, NULL);
-}
-
 #ifdef CONFIG_NET_FLOW_LIMIT
 #define FLOW_LIMIT_HISTORY     (1 << 7)  /* must be ^2 and !overflow buckets */
 struct sd_flow_limit {
@@ -3915,6 +3926,8 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev,
        return 0;
 }
 #endif
+int netif_set_real_num_queues(struct net_device *dev,
+                             unsigned int txq, unsigned int rxq);
 
 static inline struct netdev_rx_queue *
 __netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
@@ -3948,7 +3961,7 @@ void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason);
 /*
  * It is not allowed to call kfree_skb() or consume_skb() from hardware
  * interrupt context or with hardware interrupts being disabled.
- * (in_irq() || irqs_disabled())
+ * (in_hardirq() || irqs_disabled())
  *
  * We provide four helpers that can be used in following contexts :
  *
@@ -3984,6 +3997,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
        __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED);
 }
 
+u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
+                            struct bpf_prog *xdp_prog);
 void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
 int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
 int netif_rx(struct sk_buff *skb);
@@ -4016,10 +4031,12 @@ static inline bool is_socket_ioctl_cmd(unsigned int cmd)
 {
        return _IOC_TYPE(cmd) == SOCK_IOC_TYPE;
 }
+int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg);
+int put_user_ifreq(struct ifreq *ifr, void __user *arg);
 int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
-               bool *need_copyout);
-int dev_ifconf(struct net *net, struct ifconf *, int);
-int dev_ethtool(struct net *net, struct ifreq *);
+               void __user *data, bool *need_copyout);
+int dev_ifconf(struct net *net, struct ifconf __user *ifc);
+int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata);
 unsigned int dev_get_flags(const struct net_device *);
 int __dev_change_flags(struct net_device *dev, unsigned int flags,
                       struct netlink_ext_ack *extack);
@@ -4073,6 +4090,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                      int fd, int expected_fd, u32 flags);
 int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+u8 dev_xdp_prog_count(struct net_device *dev);
 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
@@ -4140,11 +4158,13 @@ void netdev_run_todo(void);
  */
 static inline void dev_put(struct net_device *dev)
 {
+       if (dev) {
 #ifdef CONFIG_PCPU_DEV_REFCNT
-       this_cpu_dec(*dev->pcpu_refcnt);
+               this_cpu_dec(*dev->pcpu_refcnt);
 #else
-       refcount_dec(&dev->dev_refcnt);
+               refcount_dec(&dev->dev_refcnt);
 #endif
+       }
 }
 
 /**
@@ -4155,11 +4175,13 @@ static inline void dev_put(struct net_device *dev)
  */
 static inline void dev_hold(struct net_device *dev)
 {
+       if (dev) {
 #ifdef CONFIG_PCPU_DEV_REFCNT
-       this_cpu_inc(*dev->pcpu_refcnt);
+               this_cpu_inc(*dev->pcpu_refcnt);
 #else
-       refcount_inc(&dev->dev_refcnt);
+               refcount_inc(&dev->dev_refcnt);
 #endif
+       }
 }
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
index 28d7027..5897f3d 100644 (file)
@@ -238,9 +238,6 @@ struct xt_table {
        u_int8_t af;            /* address/protocol family */
        int priority;           /* hook order */
 
-       /* called when table is needed in the given netns */
-       int (*table_init)(struct net *net);
-
        /* A unique name... */
        const char name[XT_TABLE_MAXNAMELEN];
 };
@@ -452,6 +449,9 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
 
 struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
 
+int xt_register_template(const struct xt_table *t, int(*table_init)(struct net *net));
+void xt_unregister_template(const struct xt_table *t);
+
 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 #include <net/compat.h>
 
index a817825..10a0197 100644 (file)
@@ -127,4 +127,6 @@ static inline bool ebt_invalid_target(int target)
        return (target < -NUM_STANDARD_TARGETS || target >= 0);
 }
 
+int ebt_register_template(const struct ebt_table *t, int(*table_init)(struct net *net));
+void ebt_unregister_template(const struct ebt_table *t);
 #endif
index 540b377..9474306 100644 (file)
@@ -1620,6 +1620,16 @@ static inline bool pci_aer_available(void) { return false; }
 
 bool pci_ats_disabled(void);
 
+#ifdef CONFIG_PCIE_PTM
+int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
+bool pcie_ptm_enabled(struct pci_dev *dev);
+#else
+static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
+{ return -EINVAL; }
+static inline bool pcie_ptm_enabled(struct pci_dev *dev)
+{ return false; }
+#endif
+
 void pci_cfg_access_lock(struct pci_dev *dev);
 bool pci_cfg_access_trylock(struct pci_dev *dev);
 void pci_cfg_access_unlock(struct pci_dev *dev);
index 2d510ad..fe156a8 100644 (file)
@@ -762,6 +762,7 @@ struct perf_event {
 #ifdef CONFIG_BPF_SYSCALL
        perf_overflow_handler_t         orig_overflow_handler;
        struct bpf_prog                 *prog;
+       u64                             bpf_cookie;
 #endif
 
 #ifdef CONFIG_EVENT_TRACING
index 3b80dc3..736e1d1 100644 (file)
@@ -1431,6 +1431,7 @@ static inline int phy_device_register(struct phy_device *phy)
 static inline void phy_device_free(struct phy_device *phydev) { }
 #endif /* CONFIG_PHYLIB */
 void phy_device_remove(struct phy_device *phydev);
+int phy_get_c45_ids(struct phy_device *phydev);
 int phy_init_hw(struct phy_device *phydev);
 int phy_suspend(struct phy_device *phydev);
 int phy_resume(struct phy_device *phydev);
index 71fac92..2e55650 100644 (file)
@@ -215,7 +215,7 @@ static inline long scaled_ppm_to_ppb(long ppm)
        return (long)ppb;
 }
 
-#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 
 /**
  * ptp_clock_register() - register a PTP hardware clock driver
@@ -307,6 +307,33 @@ int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay);
  */
 void ptp_cancel_worker_sync(struct ptp_clock *ptp);
 
+#else
+static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+                                                  struct device *parent)
+{ return NULL; }
+static inline int ptp_clock_unregister(struct ptp_clock *ptp)
+{ return 0; }
+static inline void ptp_clock_event(struct ptp_clock *ptp,
+                                  struct ptp_clock_event *event)
+{ }
+static inline int ptp_clock_index(struct ptp_clock *ptp)
+{ return -1; }
+static inline int ptp_find_pin(struct ptp_clock *ptp,
+                              enum ptp_pin_function func, unsigned int chan)
+{ return -1; }
+static inline int ptp_schedule_worker(struct ptp_clock *ptp,
+                                     unsigned long delay)
+{ return -EOPNOTSUPP; }
+static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp)
+{ }
+#endif
+
+#if IS_BUILTIN(CONFIG_PTP_1588_CLOCK)
+/*
+ * These are called by the network core, and don't work if PTP is in
+ * a loadable module.
+ */
+
 /**
  * ptp_get_vclocks_index() - get all vclocks index on pclock, and
  *                           caller is responsible to free memory
@@ -327,26 +354,7 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index);
  */
 void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
                           int vclock_index);
-
 #else
-static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
-                                                  struct device *parent)
-{ return NULL; }
-static inline int ptp_clock_unregister(struct ptp_clock *ptp)
-{ return 0; }
-static inline void ptp_clock_event(struct ptp_clock *ptp,
-                                  struct ptp_clock_event *event)
-{ }
-static inline int ptp_clock_index(struct ptp_clock *ptp)
-{ return -1; }
-static inline int ptp_find_pin(struct ptp_clock *ptp,
-                              enum ptp_pin_function func, unsigned int chan)
-{ return -1; }
-static inline int ptp_schedule_worker(struct ptp_clock *ptp,
-                                     unsigned long delay)
-{ return -EOPNOTSUPP; }
-static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp)
-{ }
 static inline int ptp_get_vclocks_index(int pclock_index, int **vclock_index)
 { return 0; }
 static inline void ptp_convert_timestamp(struct skb_shared_hwtstamps *hwtstamps,
index ec8d07d..c64119a 100644 (file)
@@ -42,6 +42,7 @@ struct backing_dev_info;
 struct bio_list;
 struct blk_plug;
 struct bpf_local_storage;
+struct bpf_run_ctx;
 struct capture_control;
 struct cfs_rq;
 struct fs_struct;
@@ -1379,6 +1380,8 @@ struct task_struct {
 #ifdef CONFIG_BPF_SYSCALL
        /* Used by BPF task local storage */
        struct bpf_local_storage __rcu  *bpf_storage;
+       /* Used for BPF run context */
+       struct bpf_run_ctx              *bpf_ctx;
 #endif
 
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
index b2db9cd..6bdb0db 100644 (file)
@@ -689,6 +689,7 @@ typedef unsigned char *sk_buff_data_t;
  *             CHECKSUM_UNNECESSARY (max 3)
  *     @dst_pending_confirm: need to confirm neighbour
  *     @decrypted: Decrypted SKB
+ *     @slow_gro: state present at GRO time, slower prepare step required
  *     @napi_id: id of the NAPI struct this skb came from
  *     @sender_cpu: (aka @napi_id) source CPU in XPS
  *     @secmark: security marking
@@ -863,13 +864,14 @@ struct sk_buff {
        __u8                    tc_skip_classify:1;
        __u8                    tc_at_ingress:1;
 #endif
-#ifdef CONFIG_NET_REDIRECT
        __u8                    redirected:1;
+#ifdef CONFIG_NET_REDIRECT
        __u8                    from_ingress:1;
 #endif
 #ifdef CONFIG_TLS_DEVICE
        __u8                    decrypted:1;
 #endif
+       __u8                    slow_gro:1;
 
 #ifdef CONFIG_NET_SCHED
        __u16                   tc_index;       /* traffic control index */
@@ -990,6 +992,7 @@ static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
  */
 static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 {
+       skb->slow_gro |= !!dst;
        skb->_skb_refdst = (unsigned long)dst;
 }
 
@@ -1006,6 +1009,7 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
 {
        WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+       skb->slow_gro |= !!dst;
        skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
 }
 
@@ -1179,6 +1183,7 @@ static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
                                     unsigned int headroom);
+struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom);
 struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
                                int newtailroom, gfp_t priority);
 int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
@@ -4216,6 +4221,7 @@ static inline unsigned long skb_get_nfct(const struct sk_buff *skb)
 static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct)
 {
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       skb->slow_gro |= !!nfct;
        skb->_nfct = nfct;
 #endif
 }
@@ -4375,6 +4381,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        nf_conntrack_put(skb_nfct(dst));
 #endif
+       dst->slow_gro = src->slow_gro;
        __nf_copy(dst, src, true);
 }
 
@@ -4664,17 +4671,13 @@ static inline __wsum lco_csum(struct sk_buff *skb)
 
 static inline bool skb_is_redirected(const struct sk_buff *skb)
 {
-#ifdef CONFIG_NET_REDIRECT
        return skb->redirected;
-#else
-       return false;
-#endif
 }
 
 static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
 {
-#ifdef CONFIG_NET_REDIRECT
        skb->redirected = 1;
+#ifdef CONFIG_NET_REDIRECT
        skb->from_ingress = from_ingress;
        if (skb->from_ingress)
                skb->tstamp = 0;
@@ -4683,9 +4686,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
 
 static inline void skb_reset_redirect(struct sk_buff *skb)
 {
-#ifdef CONFIG_NET_REDIRECT
        skb->redirected = 0;
-#endif
 }
 
 static inline bool skb_csum_is_sctp(struct sk_buff *skb)
@@ -4711,11 +4712,9 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
 }
 
 #ifdef CONFIG_PAGE_POOL
-static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page,
-                                       struct page_pool *pp)
+static inline void skb_mark_for_recycle(struct sk_buff *skb)
 {
        skb->pp_recycle = 1;
-       page_pool_store_mem_info(page, pp);
 }
 #endif
 
index 0d8e3dc..fd9ce51 100644 (file)
@@ -223,8 +223,11 @@ struct ucred {
                                 * reuses AF_INET address family
                                 */
 #define AF_XDP         44      /* XDP sockets                  */
+#define AF_MCTP                45      /* Management component
+                                * transport protocol
+                                */
 
-#define AF_MAX         45      /* For now.. */
+#define AF_MAX         46      /* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC      AF_UNSPEC
@@ -274,6 +277,7 @@ struct ucred {
 #define PF_QIPCRTR     AF_QIPCRTR
 #define PF_SMC         AF_SMC
 #define PF_XDP         AF_XDP
+#define PF_MCTP                AF_MCTP
 #define PF_MAX         AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
index 0d5a269..f9b53ac 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 #include <linux/mod_devicetable.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
index 3f8bc97..19253bf 100644 (file)
@@ -197,7 +197,7 @@ struct ssb_extif {
 
 static inline bool ssb_extif_available(struct ssb_extif *extif)
 {
-       return 0;
+       return false;
 }
 
 static inline
index ad413b3..8e0631a 100644 (file)
@@ -675,7 +675,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
 
 #ifdef CONFIG_BPF_EVENTS
 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
-int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
+int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
 void perf_event_detach_bpf_prog(struct perf_event *event);
 int perf_event_query_prog_array(struct perf_event *event, void __user *info);
 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
@@ -692,7 +692,7 @@ static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *c
 }
 
 static inline int
-perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
+perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie)
 {
        return -EOPNOTSUPP;
 }
@@ -803,6 +803,9 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
 void perf_trace_buf_update(void *record, u16 type);
 void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
 
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
+void perf_event_free_bpf_prog(struct perf_event *event);
+
 void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
 void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
 void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
index 20d3103..46b15e2 100644 (file)
        (void)__tmp; \
 })
 
+/*
+ * Check at compile time that something is a pointer type.
+ */
+#define typecheck_pointer(x) \
+({     typeof(x) __dummy; \
+       (void)sizeof(*__dummy); \
+       1; \
+})
+
 #endif         /* TYPECHECK_H_INCLUDED */
index 9cce0d8..08ca9ce 100644 (file)
@@ -8,23 +8,13 @@ struct net_device *ultra_probe(int unit);
 struct net_device *wd_probe(int unit);
 struct net_device *ne_probe(int unit);
 struct net_device *fmv18x_probe(int unit);
-struct net_device *i82596_probe(int unit);
 struct net_device *ni65_probe(int unit);
 struct net_device *sonic_probe(int unit);
 struct net_device *smc_init(int unit);
-struct net_device *atarilance_probe(int unit);
-struct net_device *sun3lance_probe(int unit);
-struct net_device *sun3_82586_probe(int unit);
-struct net_device *apne_probe(int unit);
 struct net_device *cs89x0_probe(int unit);
-struct net_device *mvme147lance_probe(int unit);
 struct net_device *tc515_probe(int unit);
 struct net_device *lance_probe(int unit);
 struct net_device *cops_probe(int unit);
-struct net_device *ltpc_probe(void);
 
 /* Fibre Channel adapters */
 int iph5526_probe(struct net_device *dev);
-
-/* SBNI adapters */
-int sbni_probe(int unit);
index 086b291..f19f7f4 100644 (file)
@@ -58,6 +58,14 @@ struct tc_action {
 #define TCA_ACT_HW_STATS_ANY (TCA_ACT_HW_STATS_IMMEDIATE | \
                              TCA_ACT_HW_STATS_DELAYED)
 
+/* Reserve 16 bits for user-space. See TCA_ACT_FLAGS_NO_PERCPU_STATS. */
+#define TCA_ACT_FLAGS_USER_BITS 16
+#define TCA_ACT_FLAGS_USER_MASK 0xffff
+#define TCA_ACT_FLAGS_POLICE   (1U << TCA_ACT_FLAGS_USER_BITS)
+#define TCA_ACT_FLAGS_BIND     (1U << (TCA_ACT_FLAGS_USER_BITS + 1))
+#define TCA_ACT_FLAGS_REPLACE  (1U << (TCA_ACT_FLAGS_USER_BITS + 2))
+#define TCA_ACT_FLAGS_NO_RTNL  (1U << (TCA_ACT_FLAGS_USER_BITS + 3))
+
 /* Update lastuse only if needed, to avoid dirtying a cache line.
  * We use a temp variable to avoid fetching jiffies twice.
  */
@@ -99,8 +107,8 @@ struct tc_action_ops {
        void    (*cleanup)(struct tc_action *);
        int     (*lookup)(struct net *net, struct tc_action **a, u32 index);
        int     (*init)(struct net *net, struct nlattr *nla,
-                       struct nlattr *est, struct tc_action **act, int ovr,
-                       int bind, bool rtnl_held, struct tcf_proto *tp,
+                       struct nlattr *est, struct tc_action **act,
+                       struct tcf_proto *tp,
                        u32 flags, struct netlink_ext_ack *extack);
        int     (*walk)(struct net *, struct sk_buff *,
                        struct netlink_callback *, int,
@@ -179,18 +187,16 @@ int tcf_action_destroy(struct tc_action *actions[], int bind);
 int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
                    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
-                   struct nlattr *est, char *name, int ovr, int bind,
+                   struct nlattr *est,
                    struct tc_action *actions[], int init_res[], size_t *attr_size,
-                   bool rtnl_held, struct netlink_ext_ack *extack);
-struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+                   u32 flags, struct netlink_ext_ack *extack);
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
                                         bool rtnl_held,
                                         struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    struct nlattr *nla, struct nlattr *est,
-                                   char *name, int ovr, int bind,
                                    struct tc_action_ops *a_o, int *init_res,
-                                   bool rtnl_held,
-                                   struct netlink_ext_ack *extack);
+                                   u32 flags, struct netlink_ext_ack *extack);
 int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
                    int ref, bool terse);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
index f42fddd..7d142e8 100644 (file)
@@ -70,6 +70,9 @@ struct unix_sock {
        struct socket_wq        peer_wq;
        wait_queue_entry_t      peer_wake;
        struct scm_stat         scm_stat;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+       struct sk_buff          *oob_skb;
+#endif
 };
 
 static inline struct unix_sock *unix_sk(const struct sock *sk)
@@ -82,6 +85,10 @@ static inline struct unix_sock *unix_sk(const struct sock *sk)
 long unix_inq_len(struct sock *sk);
 long unix_outq_len(struct sock *sk);
 
+int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+                        int flags);
+int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+                         int flags);
 #ifdef CONFIG_SYSCTL
 int unix_sysctl_register(struct net *net);
 void unix_sysctl_unregister(struct net *net);
@@ -89,4 +96,16 @@ void unix_sysctl_unregister(struct net *net);
 static inline int unix_sysctl_register(struct net *net) { return 0; }
 static inline void unix_sysctl_unregister(struct net *net) {}
 #endif
+
+#ifdef CONFIG_BPF_SYSCALL
+extern struct proto unix_dgram_proto;
+extern struct proto unix_stream_proto;
+
+int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+void __init unix_bpf_build_proto(void);
+#else
+static inline void __init unix_bpf_build_proto(void)
+{}
+#endif
 #endif
index aa52b2e..2ed23a3 100644 (file)
@@ -38,4 +38,7 @@ struct ax_plat_data {
        int (*check_irq)(struct platform_device *pdev);
 };
 
+/* exported from ax88796.c for xsurf100.c  */
+extern void ax_NS8390_reinit(struct net_device *dev);
+
 #endif /* __NET_AX88796_PLAT_H */
index db4312e..a7360c8 100644 (file)
@@ -221,6 +221,7 @@ struct oob_data {
 
 struct adv_info {
        struct list_head list;
+       bool enabled;
        bool pending;
        __u8    instance;
        __u32   flags;
@@ -628,6 +629,7 @@ struct hci_conn {
        __u8            init_addr_type;
        bdaddr_t        resp_addr;
        __u8            resp_addr_type;
+       __u8            adv_instance;
        __u16           handle;
        __u16           state;
        __u8            mode;
@@ -1223,14 +1225,25 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data)
        dev_set_drvdata(&hdev->dev, data);
 }
 
+static inline void *hci_get_priv(struct hci_dev *hdev)
+{
+       return (char *)hdev + sizeof(*hdev);
+}
+
 struct hci_dev *hci_dev_get(int index);
 struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type);
 
-struct hci_dev *hci_alloc_dev(void);
+struct hci_dev *hci_alloc_dev_priv(int sizeof_priv);
+
+static inline struct hci_dev *hci_alloc_dev(void)
+{
+       return hci_alloc_dev_priv(0);
+}
+
 void hci_free_dev(struct hci_dev *hdev);
 int hci_register_dev(struct hci_dev *hdev);
 void hci_unregister_dev(struct hci_dev *hdev);
-void hci_cleanup_dev(struct hci_dev *hdev);
+void hci_release_dev(struct hci_dev *hdev);
 int hci_suspend_dev(struct hci_dev *hdev);
 int hci_resume_dev(struct hci_dev *hdev);
 int hci_reset_dev(struct hci_dev *hdev);
@@ -1412,6 +1425,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
                                !hci_dev_test_flag(dev, HCI_AUTO_OFF))
 #define bredr_sc_enabled(dev)  (lmp_sc_capable(dev) && \
                                hci_dev_test_flag(dev, HCI_SC_ENABLED))
+#define rpa_valid(dev)         (bacmp(&dev->rpa, BDADDR_ANY) && \
+                               !hci_dev_test_flag(dev, HCI_RPA_EXPIRED))
+#define adv_rpa_valid(adv)     (bacmp(&adv->random_addr, BDADDR_ANY) && \
+                               !adv->rpa_expired)
 
 #define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \
                      ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M))
index c8696a2..38785d4 100644 (file)
@@ -303,6 +303,7 @@ int  __bond_3ad_get_active_agg_info(struct bonding *bond,
 int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
                         struct slave *slave);
 int bond_3ad_set_carrier(struct bonding *bond);
+void bond_3ad_update_lacp_active(struct bonding *bond);
 void bond_3ad_update_lacp_rate(struct bonding *bond);
 void bond_3ad_update_ad_actor_settings(struct bonding *bond);
 int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
index 9d382f2..e64833a 100644 (file)
@@ -64,6 +64,7 @@ enum {
        BOND_OPT_AD_USER_PORT_KEY,
        BOND_OPT_NUM_PEER_NOTIF_ALIAS,
        BOND_OPT_PEER_NOTIF_DELAY,
+       BOND_OPT_LACP_ACTIVE,
        BOND_OPT_LAST
 };
 
index 625d9c7..15e083e 100644 (file)
@@ -129,6 +129,7 @@ struct bond_params {
        int updelay;
        int downdelay;
        int peer_notif_delay;
+       int lacp_active;
        int lacp_fast;
        unsigned int min_links;
        int ad_select;
@@ -149,11 +150,6 @@ struct bond_params {
        u8 ad_actor_system[ETH_ALEN + 2];
 };
 
-struct bond_parm_tbl {
-       char *modename;
-       int mode;
-};
-
 struct slave {
        struct net_device *dev; /* first - useful for panic debug */
        struct bonding *bond; /* our master */
@@ -258,6 +254,7 @@ struct bonding {
        /* protecting ipsec_list */
        spinlock_t ipsec_lock;
 #endif /* CONFIG_XFRM_OFFLOAD */
+       struct bpf_prog *xdp_prog;
 };
 
 #define bond_slave_get_rcu(dev) \
@@ -753,13 +750,6 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip)
 
 /* exported from bond_main.c */
 extern unsigned int bond_net_id;
-extern const struct bond_parm_tbl bond_lacp_tbl[];
-extern const struct bond_parm_tbl xmit_hashtype_tbl[];
-extern const struct bond_parm_tbl arp_validate_tbl[];
-extern const struct bond_parm_tbl arp_all_targets_tbl[];
-extern const struct bond_parm_tbl fail_over_mac_tbl[];
-extern const struct bond_parm_tbl pri_reselect_tbl[];
-extern struct bond_parm_tbl ad_select_tbl[];
 
 /* exported from bond_netlink.c */
 extern struct rtnl_link_ops bond_link_ops;
index 161cdf7..62dd842 100644 (file)
@@ -1252,6 +1252,27 @@ struct cfg80211_csa_settings {
        u8 count;
 };
 
+/**
+ * struct cfg80211_color_change_settings - color change settings
+ *
+ * Used for bss color change
+ *
+ * @beacon_color_change: beacon data while performing the color countdown
+ * @counter_offsets_beacon: offsets of the counters within the beacon (tail)
+ * @counter_offsets_presp: offsets of the counters within the probe response
+ * @beacon_next: beacon data to be used after the color change
+ * @count: number of beacons until the color change
+ * @color: the color used after the change
+ */
+struct cfg80211_color_change_settings {
+       struct cfg80211_beacon_data beacon_color_change;
+       u16 counter_offset_beacon;
+       u16 counter_offset_presp;
+       struct cfg80211_beacon_data beacon_next;
+       u8 count;
+       u8 color;
+};
+
 /**
  * struct iface_combination_params - input parameters for interface combinations
  *
@@ -3995,6 +4016,8 @@ struct mgmt_frame_regs {
  *     given TIDs. This callback may sleep.
  *
  * @set_sar_specs: Update the SAR (TX power) settings.
+ *
+ * @color_change: Initiate a color change.
  */
 struct cfg80211_ops {
        int     (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4322,6 +4345,9 @@ struct cfg80211_ops {
                                    const u8 *peer, u8 tids);
        int     (*set_sar_specs)(struct wiphy *wiphy,
                                 struct cfg80211_sar_specs *sar);
+       int     (*color_change)(struct wiphy *wiphy,
+                               struct net_device *dev,
+                               struct cfg80211_color_change_settings *params);
 };
 
 /*
@@ -8218,4 +8244,70 @@ void cfg80211_update_owe_info_event(struct net_device *netdev,
  */
 void cfg80211_bss_flush(struct wiphy *wiphy);
 
+/**
+ * cfg80211_bss_color_notify - notify about bss color event
+ * @dev: network device
+ * @gfp: allocation flags
+ * @cmd: the actual event we want to notify
+ * @count: the number of TBTTs until the color change happens
+ * @color_bitmap: representations of the colors that the local BSS is aware of
+ */
+int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
+                             enum nl80211_commands cmd, u8 count,
+                             u64 color_bitmap);
+
+/**
+ * cfg80211_obss_color_collision_notify - notify about bss color collision
+ * @dev: network device
+ * @color_bitmap: representations of the colors that the local BSS is aware of
+ */
+static inline int cfg80211_obss_color_collision_notify(struct net_device *dev,
+                                                      u64 color_bitmap)
+{
+       return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+                                        NL80211_CMD_OBSS_COLOR_COLLISION,
+                                        0, color_bitmap);
+}
+
+/**
+ * cfg80211_color_change_started_notify - notify color change start
+ * @dev: the device on which the color is switched
+ * @count: the number of TBTTs until the color change happens
+ *
+ * Inform the userspace about the color change that has started.
+ */
+static inline int cfg80211_color_change_started_notify(struct net_device *dev,
+                                                      u8 count)
+{
+       return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+                                        NL80211_CMD_COLOR_CHANGE_STARTED,
+                                        count, 0);
+}
+
+/**
+ * cfg80211_color_change_aborted_notify - notify color change abort
+ * @dev: the device on which the color is switched
+ *
+ * Inform the userspace about the color change that has aborted.
+ */
+static inline int cfg80211_color_change_aborted_notify(struct net_device *dev)
+{
+       return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+                                        NL80211_CMD_COLOR_CHANGE_ABORTED,
+                                        0, 0);
+}
+
+/**
+ * cfg80211_color_change_notify - notify color change completion
+ * @dev: the device on which the color was switched
+ *
+ * Inform the userspace about the color change that has completed.
+ */
+static inline int cfg80211_color_change_notify(struct net_device *dev)
+{
+       return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+                                        NL80211_CMD_COLOR_CHANGE_COMPLETED,
+                                        0, 0);
+}
+
 #endif /* __NET_CFG80211_H */
index 84805bd..595fee0 100644 (file)
@@ -71,13 +71,26 @@ struct compat_group_source_req {
 } __packed;
 
 struct compat_group_filter {
-       __u32                            gf_interface;
-       struct __kernel_sockaddr_storage gf_group
-               __aligned(4);
-       __u32                            gf_fmode;
-       __u32                            gf_numsrc;
-       struct __kernel_sockaddr_storage gf_slist[1]
-               __aligned(4);
+       union {
+               struct {
+                       __u32                            gf_interface_aux;
+                       struct __kernel_sockaddr_storage gf_group_aux
+                               __aligned(4);
+                       __u32                            gf_fmode_aux;
+                       __u32                            gf_numsrc_aux;
+                       struct __kernel_sockaddr_storage gf_slist[1]
+                               __aligned(4);
+               } __packed;
+               struct {
+                       __u32                            gf_interface;
+                       struct __kernel_sockaddr_storage gf_group
+                               __aligned(4);
+                       __u32                            gf_fmode;
+                       __u32                            gf_numsrc;
+                       struct __kernel_sockaddr_storage gf_slist_flex[]
+                               __aligned(4);
+               } __packed;
+       };
 } __packed;
 
 #endif /* NET_COMPAT_H */
index 57b738b..154cf0d 100644 (file)
@@ -32,7 +32,7 @@ struct devlink_dev_stats {
 struct devlink_ops;
 
 struct devlink {
-       struct list_head list;
+       u32 index;
        struct list_head port_list;
        struct list_head rate_list;
        struct list_head sb_list;
@@ -55,8 +55,9 @@ struct devlink {
                            * port, sb, dpipe, resource, params, region, traps and more.
                            */
        u8 reload_failed:1,
-          reload_enabled:1,
-          registered:1;
+          reload_enabled:1;
+       refcount_t refcount;
+       struct completion comp;
        char priv[0] __aligned(NETDEV_ALIGN);
 };
 
@@ -158,7 +159,6 @@ struct devlink_port {
        struct list_head region_list;
        struct devlink *devlink;
        unsigned int index;
-       bool registered;
        spinlock_t type_lock; /* Protects type and type_dev
                               * pointer consistency.
                               */
@@ -521,6 +521,9 @@ enum devlink_param_generic_id {
        DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
        DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
        DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET,
+       DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+       DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+       DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
 
        /* add new param generic ids above here*/
        __DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -561,6 +564,15 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME "enable_remote_dev_reset"
 #define DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL
 
+#define DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME "enable_eth"
+#define DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME "enable_rdma"
+#define DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME "enable_vnet"
+#define DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE DEVLINK_PARAM_TYPE_BOOL
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)     \
 {                                                                      \
        .id = DEVLINK_PARAM_GENERIC_ID_##_id,                           \
@@ -1398,8 +1410,8 @@ struct devlink_ops {
         *
         * Note: @extack can be NULL when port notifier queries the port function.
         */
-       int (*port_function_hw_addr_get)(struct devlink *devlink, struct devlink_port *port,
-                                        u8 *hw_addr, int *hw_addr_len,
+       int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr,
+                                        int *hw_addr_len,
                                         struct netlink_ext_ack *extack);
        /**
         * @port_function_hw_addr_set: Port function's hardware address set function.
@@ -1408,7 +1420,7 @@ struct devlink_ops {
         * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port
         * function handling for a particular port.
         */
-       int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port,
+       int (*port_function_hw_addr_set)(struct devlink_port *port,
                                         const u8 *hw_addr, int hw_addr_len,
                                         struct netlink_ext_ack *extack);
        /**
@@ -1464,8 +1476,7 @@ struct devlink_ops {
         *
         * Return: 0 on success, negative value otherwise.
         */
-       int (*port_fn_state_get)(struct devlink *devlink,
-                                struct devlink_port *port,
+       int (*port_fn_state_get)(struct devlink_port *port,
                                 enum devlink_port_fn_state *state,
                                 enum devlink_port_fn_opstate *opstate,
                                 struct netlink_ext_ack *extack);
@@ -1480,8 +1491,7 @@ struct devlink_ops {
         *
         * Return: 0 on success, negative value otherwise.
         */
-       int (*port_fn_state_set)(struct devlink *devlink,
-                                struct devlink_port *port,
+       int (*port_fn_state_set)(struct devlink_port *port,
                                 enum devlink_port_fn_state state,
                                 struct netlink_ext_ack *extack);
 
@@ -1542,9 +1552,21 @@ static inline struct devlink *netdev_to_devlink(struct net_device *dev)
 struct ib_device;
 
 struct net *devlink_net(const struct devlink *devlink);
-void devlink_net_set(struct devlink *devlink, struct net *net);
-struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size);
-int devlink_register(struct devlink *devlink, struct device *dev);
+/* This call is intended for software devices that can create
+ * devlink instances in other namespaces than init_net.
+ *
+ * Drivers that operate on real HW must use devlink_alloc() instead.
+ */
+struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
+                                size_t priv_size, struct net *net,
+                                struct device *dev);
+static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
+                                           size_t priv_size,
+                                           struct device *dev)
+{
+       return devlink_alloc_ns(ops, priv_size, &init_net, dev);
+}
+int devlink_register(struct devlink *devlink);
 void devlink_unregister(struct devlink *devlink);
 void devlink_reload_enable(struct devlink *devlink);
 void devlink_reload_disable(struct devlink *devlink);
@@ -1625,8 +1647,16 @@ int devlink_params_register(struct devlink *devlink,
 void devlink_params_unregister(struct devlink *devlink,
                               const struct devlink_param *params,
                               size_t params_count);
+int devlink_param_register(struct devlink *devlink,
+                          const struct devlink_param *param);
+void devlink_param_unregister(struct devlink *devlink,
+                             const struct devlink_param *param);
 void devlink_params_publish(struct devlink *devlink);
 void devlink_params_unpublish(struct devlink *devlink);
+void devlink_param_publish(struct devlink *devlink,
+                          const struct devlink_param *param);
+void devlink_param_unpublish(struct devlink *devlink,
+                            const struct devlink_param *param);
 int devlink_port_params_register(struct devlink_port *devlink_port,
                                 const struct devlink_param *params,
                                 size_t params_count);
index ccc6e9d..ddd6565 100644 (file)
@@ -29,7 +29,7 @@ struct dn_fib_nh {
 struct dn_fib_info {
        struct dn_fib_info      *fib_next;
        struct dn_fib_info      *fib_prev;
-       int                     fib_treeref;
+       refcount_t              fib_treeref;
        refcount_t              fib_clntref;
        int                     fib_dead;
        unsigned int            fib_flags;
index 33f40c1..f9a1714 100644 (file)
@@ -79,20 +79,13 @@ enum dsa_tag_protocol {
        DSA_TAG_PROTO_SJA1110           = DSA_TAG_PROTO_SJA1110_VALUE,
 };
 
-struct packet_type;
 struct dsa_switch;
 
 struct dsa_device_ops {
        struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
-       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
-                              struct packet_type *pt);
+       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
        void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
                             int *offset);
-       /* Used to determine which traffic should match the DSA filter in
-        * eth_type_trans, and which, if any, should bypass it and be processed
-        * as regular on the master net device.
-        */
-       bool (*filter)(const struct sk_buff *skb, struct net_device *dev);
        unsigned int needed_headroom;
        unsigned int needed_tailroom;
        const char *name;
@@ -111,8 +104,8 @@ struct dsa_device_ops {
  * function pointers.
  */
 struct dsa_netdevice_ops {
-       int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr,
-                           int cmd);
+       int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr,
+                            int cmd);
 };
 
 #define DSA_TAG_DRIVER_ALIAS "dsa_tag-"
@@ -159,6 +152,9 @@ struct dsa_switch_tree {
         */
        struct net_device **lags;
        unsigned int lags_len;
+
+       /* Track the largest switch index within a tree */
+       unsigned int last_switch;
 };
 
 #define dsa_lags_foreach_id(_id, _dst)                         \
@@ -238,9 +234,7 @@ struct dsa_port {
 
        /* Copies for faster access in master receive hot path */
        struct dsa_switch_tree *dst;
-       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
-                              struct packet_type *pt);
-       bool (*filter)(const struct sk_buff *skb, struct net_device *dev);
+       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
 
        enum {
                DSA_PORT_TYPE_UNUSED = 0,
@@ -257,8 +251,11 @@ struct dsa_port {
        struct device_node      *dn;
        unsigned int            ageing_time;
        bool                    vlan_filtering;
+       /* Managed by DSA on user ports and by drivers on CPU and DSA ports */
+       bool                    learning;
        u8                      stp_state;
        struct net_device       *bridge_dev;
+       int                     bridge_num;
        struct devlink_port     devlink_port;
        bool                    devlink_port_setup;
        struct phylink          *pl;
@@ -352,6 +349,9 @@ struct dsa_switch {
        unsigned int ageing_time_min;
        unsigned int ageing_time_max;
 
+       /* Storage for drivers using tag_8021q */
+       struct dsa_8021q_context *tag_8021q_ctx;
+
        /* devlink used to represent this switch device */
        struct devlink          *devlink;
 
@@ -363,6 +363,9 @@ struct dsa_switch {
         */
        bool                    vlan_filtering_is_global;
 
+       /* Keep VLAN filtering enabled on ports not offloading any upper. */
+       bool                    needs_standalone_vlan_filtering;
+
        /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges
         * that have vlan_filtering=0. All drivers should ideally set this (and
         * then the option would get removed), but it is unknown whether this
@@ -407,6 +410,13 @@ struct dsa_switch {
         */
        unsigned int            num_lag_ids;
 
+       /* Drivers that support bridge forwarding offload should set this to
+        * the maximum number of bridges spanning the same switch tree (or all
+        * trees, in the case of cross-tree bridging support) that can be
+        * offloaded.
+        */
+       unsigned int            num_fwd_offloading_bridges;
+
        size_t num_ports;
 };
 
@@ -690,6 +700,14 @@ struct dsa_switch_ops {
                                    struct net_device *bridge);
        void    (*port_bridge_leave)(struct dsa_switch *ds, int port,
                                     struct net_device *bridge);
+       /* Called right after .port_bridge_join() */
+       int     (*port_bridge_tx_fwd_offload)(struct dsa_switch *ds, int port,
+                                             struct net_device *bridge,
+                                             int bridge_num);
+       /* Called right before .port_bridge_leave() */
+       void    (*port_bridge_tx_fwd_unoffload)(struct dsa_switch *ds, int port,
+                                               struct net_device *bridge,
+                                               int bridge_num);
        void    (*port_stp_state_set)(struct dsa_switch *ds, int port,
                                      u8 state);
        void    (*port_fast_age)(struct dsa_switch *ds, int port);
@@ -699,8 +717,6 @@ struct dsa_switch_ops {
        int     (*port_bridge_flags)(struct dsa_switch *ds, int port,
                                     struct switchdev_brport_flags flags,
                                     struct netlink_ext_ack *extack);
-       int     (*port_set_mrouter)(struct dsa_switch *ds, int port, bool mrouter,
-                                   struct netlink_ext_ack *extack);
 
        /*
         * VLAN support
@@ -869,6 +885,13 @@ struct dsa_switch_ops {
                                          const struct switchdev_obj_ring_role_mrp *mrp);
        int     (*port_mrp_del_ring_role)(struct dsa_switch *ds, int port,
                                          const struct switchdev_obj_ring_role_mrp *mrp);
+
+       /*
+        * tag_8021q operations
+        */
+       int     (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid,
+                                     u16 flags);
+       int     (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid);
 };
 
 #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes)           \
@@ -954,15 +977,6 @@ static inline bool netdev_uses_dsa(const struct net_device *dev)
        return false;
 }
 
-static inline bool dsa_can_decode(const struct sk_buff *skb,
-                                 struct net_device *dev)
-{
-#if IS_ENABLED(CONFIG_NET_DSA)
-       return !dev->dsa_ptr->filter || dev->dsa_ptr->filter(skb, dev);
-#endif
-       return false;
-}
-
 /* All DSA tags that push the EtherType to the right (basically all except tail
  * tags, which don't break dissection) can be treated the same from the
  * perspective of the flow dissector.
@@ -1003,8 +1017,8 @@ static inline int __dsa_netdevice_ops_check(struct net_device *dev)
        return 0;
 }
 
-static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr,
-                                  int cmd)
+static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr,
+                                   int cmd)
 {
        const struct dsa_netdevice_ops *ops;
        int err;
@@ -1015,11 +1029,11 @@ static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr,
 
        ops = dev->dsa_ptr->netdev_ops;
 
-       return ops->ndo_do_ioctl(dev, ifr, cmd);
+       return ops->ndo_eth_ioctl(dev, ifr, cmd);
 }
 #else
-static inline int dsa_ndo_do_ioctl(struct net_device *dev, struct ifreq *ifr,
-                                  int cmd)
+static inline int dsa_ndo_eth_ioctl(struct net_device *dev, struct ifreq *ifr,
+                                   int cmd)
 {
        return -EOPNOTSUPP;
 }
index 75b1e73..a057319 100644 (file)
@@ -277,6 +277,7 @@ static inline void skb_dst_drop(struct sk_buff *skb)
 
 static inline void __skb_dst_copy(struct sk_buff *nskb, unsigned long refdst)
 {
+       nskb->slow_gro |= !!refdst;
        nskb->_skb_refdst = refdst;
        if (!(nskb->_skb_refdst & SKB_DST_NOREF))
                dst_clone(skb_dst(nskb));
@@ -316,6 +317,7 @@ static inline bool skb_dst_force(struct sk_buff *skb)
                        dst = NULL;
 
                skb->_skb_refdst = (unsigned long)dst;
+               skb->slow_gro |= !!dst;
        }
 
        return skb->_skb_refdst != 0UL;
index 1b9d75a..3961461 100644 (file)
@@ -451,6 +451,7 @@ struct flow_block_offload {
        struct list_head *driver_block_list;
        struct netlink_ext_ack *extack;
        struct Qdisc *sch;
+       struct list_head *cb_list_head;
 };
 
 enum tc_setup_type;
index c085493..1163035 100644 (file)
@@ -43,6 +43,11 @@ struct ieee80211_radiotap_header {
         * @it_present: (first) present word
         */
        __le32 it_present;
+
+       /**
+        * @it_optional: all remaining presence bitmaps
+        */
+       __le32 it_optional[];
 } __packed;
 
 /* version is always 0 */
index 71bb4cc..653e7d0 100644 (file)
@@ -82,9 +82,6 @@ struct ip6_sf_socklist {
        struct in6_addr         sl_addr[];
 };
 
-#define IP6_SFLSIZE(count)     (sizeof(struct ip6_sf_socklist) + \
-       (count) * sizeof(struct in6_addr))
-
 #define IP6_SFBLOCK    10      /* allocate this many at once */
 
 struct ipv6_mc_socklist {
@@ -213,6 +210,8 @@ struct inet6_dev {
 
        unsigned long           tstamp; /* ipv6InterfaceTable update timestamp */
        struct rcu_head         rcu;
+
+       unsigned int            ra_mtu;
 };
 
 static inline void ipv6_eth_mc_map(const struct in6_addr *addr, char *buf)
index ca6a3ea..f72ec11 100644 (file)
@@ -160,6 +160,12 @@ struct inet_hashinfo {
                                        ____cacheline_aligned_in_smp;
 };
 
+#define inet_lhash2_for_each_icsk_continue(__icsk) \
+       hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node)
+
+#define inet_lhash2_for_each_icsk(__icsk, list) \
+       hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node)
+
 #define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
        hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
 
diff --git a/include/net/ioam6.h b/include/net/ioam6.h
new file mode 100644 (file)
index 0000000..3c2993b
--- /dev/null
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ *  IPv6 IOAM implementation
+ *
+ *  Author:
+ *  Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#ifndef _NET_IOAM6_H
+#define _NET_IOAM6_H
+
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/ioam6.h>
+#include <linux/rhashtable-types.h>
+
+struct ioam6_namespace {
+       struct rhash_head head;
+       struct rcu_head rcu;
+
+       struct ioam6_schema __rcu *schema;
+
+       __be16 id;
+       __be32 data;
+       __be64 data_wide;
+};
+
+struct ioam6_schema {
+       struct rhash_head head;
+       struct rcu_head rcu;
+
+       struct ioam6_namespace __rcu *ns;
+
+       u32 id;
+       int len;
+       __be32 hdr;
+
+       u8 data[0];
+};
+
+struct ioam6_pernet_data {
+       struct mutex lock;
+       struct rhashtable namespaces;
+       struct rhashtable schemas;
+};
+
+static inline struct ioam6_pernet_data *ioam6_pernet(struct net *net)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       return net->ipv6.ioam6_data;
+#else
+       return NULL;
+#endif
+}
+
+struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id);
+void ioam6_fill_trace_data(struct sk_buff *skb,
+                          struct ioam6_namespace *ns,
+                          struct ioam6_trace_hdr *trace);
+
+int ioam6_init(void);
+void ioam6_exit(void);
+
+int ioam6_iptunnel_init(void);
+void ioam6_iptunnel_exit(void);
+
+#endif /* _NET_IOAM6_H */
index d9683be..9192444 100644 (file)
@@ -436,18 +436,32 @@ static inline bool ip_sk_ignore_df(const struct sock *sk)
 static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
                                                    bool forwarding)
 {
+       const struct rtable *rt = container_of(dst, struct rtable, dst);
        struct net *net = dev_net(dst->dev);
        unsigned int mtu;
 
        if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
            ip_mtu_locked(dst) ||
-           !forwarding)
-               return dst_mtu(dst);
+           !forwarding) {
+               mtu = rt->rt_pmtu;
+               if (mtu && time_before(jiffies, rt->dst.expires))
+                       goto out;
+       }
 
        /* 'forwarding = true' case should always honour route mtu */
        mtu = dst_metric_raw(dst, RTAX_MTU);
-       if (!mtu)
-               mtu = min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU);
+       if (mtu)
+               goto out;
+
+       mtu = READ_ONCE(dst->dev->mtu);
+
+       if (unlikely(ip_mtu_locked(dst))) {
+               if (rt->rt_uses_gateway && mtu > 576)
+                       mtu = 576;
+       }
+
+out:
+       mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
 
        return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
index 0bf09a9..5efd0b7 100644 (file)
@@ -316,12 +316,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
               !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
 }
 
-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
+                                                    bool forwarding)
 {
        struct inet6_dev *idev;
        unsigned int mtu;
 
-       if (dst_metric_locked(dst, RTAX_MTU)) {
+       if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
                mtu = dst_metric_raw(dst, RTAX_MTU);
                if (mtu)
                        goto out;
index 3ab2563..21c5386 100644 (file)
@@ -133,7 +133,7 @@ struct fib_info {
        struct hlist_node       fib_lhash;
        struct list_head        nh_list;
        struct net              *fib_net;
-       int                     fib_treeref;
+       refcount_t              fib_treeref;
        refcount_t              fib_clntref;
        unsigned int            fib_flags;
        unsigned char           fib_dead;
index 548b65b..bc3b13e 100644 (file)
@@ -270,7 +270,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
                       const u8 proto, int tunnel_hlen);
 int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
-int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                            void __user *data, int cmd);
 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
 
diff --git a/include/net/ipx.h b/include/net/ipx.h
deleted file mode 100644 (file)
index 9d13428..0000000
+++ /dev/null
@@ -1,171 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_INET_IPX_H_
-#define _NET_INET_IPX_H_
-/*
- *     The following information is in its entirety obtained from:
- *
- *     Novell 'IPX Router Specification' Version 1.10 
- *             Part No. 107-000029-001
- *
- *     Which is available from ftp.novell.com
- */
-
-#include <linux/netdevice.h>
-#include <net/datalink.h>
-#include <linux/ipx.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/refcount.h>
-
-struct ipx_address {
-       __be32  net;
-       __u8    node[IPX_NODE_LEN]; 
-       __be16  sock;
-};
-
-#define ipx_broadcast_node     "\377\377\377\377\377\377"
-#define ipx_this_node           "\0\0\0\0\0\0"
-
-#define IPX_MAX_PPROP_HOPS 8
-
-struct ipxhdr {
-       __be16                  ipx_checksum __packed;
-#define IPX_NO_CHECKSUM        cpu_to_be16(0xFFFF)
-       __be16                  ipx_pktsize __packed;
-       __u8                    ipx_tctrl;
-       __u8                    ipx_type;
-#define IPX_TYPE_UNKNOWN       0x00
-#define IPX_TYPE_RIP           0x01    /* may also be 0 */
-#define IPX_TYPE_SAP           0x04    /* may also be 0 */
-#define IPX_TYPE_SPX           0x05    /* SPX protocol */
-#define IPX_TYPE_NCP           0x11    /* $lots for docs on this (SPIT) */
-#define IPX_TYPE_PPROP         0x14    /* complicated flood fill brdcast */
-       struct ipx_address      ipx_dest __packed;
-       struct ipx_address      ipx_source __packed;
-};
-
-/* From af_ipx.c */
-extern int sysctl_ipx_pprop_broadcasting;
-
-struct ipx_interface {
-       /* IPX address */
-       __be32                  if_netnum;
-       unsigned char           if_node[IPX_NODE_LEN];
-       refcount_t              refcnt;
-
-       /* physical device info */
-       struct net_device       *if_dev;
-       struct datalink_proto   *if_dlink;
-       __be16                  if_dlink_type;
-
-       /* socket support */
-       unsigned short          if_sknum;
-       struct hlist_head       if_sklist;
-       spinlock_t              if_sklist_lock;
-
-       /* administrative overhead */
-       int                     if_ipx_offset;
-       unsigned char           if_internal;
-       unsigned char           if_primary;
-       
-       struct list_head        node; /* node in ipx_interfaces list */
-};
-
-struct ipx_route {
-       __be32                  ir_net;
-       struct ipx_interface    *ir_intrfc;
-       unsigned char           ir_routed;
-       unsigned char           ir_router_node[IPX_NODE_LEN];
-       struct list_head        node; /* node in ipx_routes list */
-       refcount_t              refcnt;
-};
-
-struct ipx_cb {
-       u8      ipx_tctrl;
-       __be32  ipx_dest_net;
-       __be32  ipx_source_net;
-       struct {
-               __be32 netnum;
-               int index;
-       } last_hop;
-};
-
-#include <net/sock.h>
-
-struct ipx_sock {
-       /* struct sock has to be the first member of ipx_sock */
-       struct sock             sk;
-       struct ipx_address      dest_addr;
-       struct ipx_interface    *intrfc;
-       __be16                  port;
-#ifdef CONFIG_IPX_INTERN
-       unsigned char           node[IPX_NODE_LEN];
-#endif
-       unsigned short          type;
-       /*
-        * To handle special ncp connection-handling sockets for mars_nwe,
-        * the connection number must be stored in the socket.
-        */
-       unsigned short          ipx_ncp_conn;
-};
-
-static inline struct ipx_sock *ipx_sk(struct sock *sk)
-{
-       return (struct ipx_sock *)sk;
-}
-
-#define IPX_SKB_CB(__skb) ((struct ipx_cb *)&((__skb)->cb[0]))
-
-#define IPX_MIN_EPHEMERAL_SOCKET       0x4000
-#define IPX_MAX_EPHEMERAL_SOCKET       0x7fff
-
-extern struct list_head ipx_routes;
-extern rwlock_t ipx_routes_lock;
-
-extern struct list_head ipx_interfaces;
-struct ipx_interface *ipx_interfaces_head(void);
-extern spinlock_t ipx_interfaces_lock;
-
-extern struct ipx_interface *ipx_primary_net;
-
-int ipx_proc_init(void);
-void ipx_proc_exit(void);
-
-const char *ipx_frame_name(__be16);
-const char *ipx_device_name(struct ipx_interface *intrfc);
-
-static __inline__ void ipxitf_hold(struct ipx_interface *intrfc)
-{
-       refcount_inc(&intrfc->refcnt);
-}
-
-void ipxitf_down(struct ipx_interface *intrfc);
-struct ipx_interface *ipxitf_find_using_net(__be32 net);
-int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node);
-__be16 ipx_cksum(struct ipxhdr *packet, int length);
-int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
-                    unsigned char *node);
-void ipxrtr_del_routes(struct ipx_interface *intrfc);
-int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
-                       struct msghdr *msg, size_t len, int noblock);
-int ipxrtr_route_skb(struct sk_buff *skb);
-struct ipx_route *ipxrtr_lookup(__be32 net);
-int ipxrtr_ioctl(unsigned int cmd, void __user *arg);
-
-static __inline__ void ipxitf_put(struct ipx_interface *intrfc)
-{
-       if (refcount_dec_and_test(&intrfc->refcnt))
-               ipxitf_down(intrfc);
-}
-
-static __inline__ void ipxrtr_hold(struct ipx_route *rt)
-{
-               refcount_inc(&rt->refcnt);
-}
-
-static __inline__ void ipxrtr_put(struct ipx_route *rt)
-{
-               if (refcount_dec_and_test(&rt->refcnt))
-                                       kfree(rt);
-}
-#endif /* _NET_INET_IPX_H_ */
index 05cfd6f..6f15e6f 100644 (file)
@@ -51,6 +51,9 @@ struct lwtunnel_encap_ops {
 };
 
 #ifdef CONFIG_LWTUNNEL
+
+DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
+
 void lwtstate_free(struct lwtunnel_state *lws);
 
 static inline struct lwtunnel_state *
index d8a1d09..af0fc13 100644 (file)
@@ -1711,6 +1711,10 @@ enum ieee80211_offload_flags {
  *     protected by fq->lock.
  * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see
  *     &enum ieee80211_offload_flags.
+ * @color_change_active: marks whether a color change is ongoing. Internally it is
+ *     write-protected by sdata_lock and local->mtx so holding either is fine
+ *     for read access.
+ * @color_change_color: the bss color that will be used after the change.
  */
 struct ieee80211_vif {
        enum nl80211_iftype type;
@@ -1739,6 +1743,9 @@ struct ieee80211_vif {
 
        bool txqs_stopped[IEEE80211_NUM_ACS];
 
+       bool color_change_active;
+       u8 color_change_color;
+
        /* must be last */
        u8 drv_priv[] __aligned(sizeof(void *));
 };
@@ -3919,6 +3926,13 @@ struct ieee80211_prep_tx_info {
  * @set_sar_specs: Update the SAR (TX power) settings.
  * @sta_set_decap_offload: Called to notify the driver when a station is allowed
  *     to use rx decapsulation offload
+ * @add_twt_setup: Update hw with TWT agreement parameters received from the peer.
+ *     This callback allows the hw to check if requested parameters
+ *     are supported and if there is enough room for a new agreement.
+ *     The hw is expected to set agreement result in the req_type field of
+ *     twt structure.
+ * @twt_teardown_request: Update the hw with TWT teardown request received
+ *     from the peer.
  */
 struct ieee80211_ops {
        void (*tx)(struct ieee80211_hw *hw,
@@ -4242,6 +4256,11 @@ struct ieee80211_ops {
        void (*sta_set_decap_offload)(struct ieee80211_hw *hw,
                                      struct ieee80211_vif *vif,
                                      struct ieee80211_sta *sta, bool enabled);
+       void (*add_twt_setup)(struct ieee80211_hw *hw,
+                             struct ieee80211_sta *sta,
+                             struct ieee80211_twt_setup *twt);
+       void (*twt_teardown_request)(struct ieee80211_hw *hw,
+                                    struct ieee80211_sta *sta, u8 flowid);
 };
 
 /**
@@ -5007,6 +5026,16 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif);
  */
 bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif);
 
+/**
+ * ieee80211_color_change_finish - notify mac80211 about color change
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * After a color change announcement was scheduled and the counter in this
+ * announcement hits 1, this function must be called by the driver to
+ * notify mac80211 that the color can be changed
+ */
+void ieee80211_color_change_finish(struct ieee80211_vif *vif);
+
 /**
  * ieee80211_proberesp_get - retrieve a Probe Response template
  * @hw: pointer obtained from ieee80211_alloc_hw().
@@ -6771,6 +6800,18 @@ struct sk_buff *
 ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
                                          struct ieee80211_vif *vif);
 
+/**
+ * ieeee80211_obss_color_collision_notify - notify userland about a BSS color
+ * collision.
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is
+ *     aware of.
+ */
+void
+ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
+                                      u64 color_bitmap);
+
 /**
  * ieee80211_is_tx_data - check if frame is a data frame
  *
diff --git a/include/net/mctp.h b/include/net/mctp.h
new file mode 100644 (file)
index 0000000..a824d47
--- /dev/null
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __NET_MCTP_H
+#define __NET_MCTP_H
+
+#include <linux/bits.h>
+#include <linux/mctp.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+/* MCTP packet definitions */
+struct mctp_hdr {
+       u8      ver;
+       u8      dest;
+       u8      src;
+       u8      flags_seq_tag;
+};
+
+#define MCTP_VER_MIN   1
+#define MCTP_VER_MAX   1
+
+/* Definitions for flags_seq_tag field */
+#define MCTP_HDR_FLAG_SOM      BIT(7)
+#define MCTP_HDR_FLAG_EOM      BIT(6)
+#define MCTP_HDR_FLAG_TO       BIT(3)
+#define MCTP_HDR_FLAGS         GENMASK(5, 3)
+#define MCTP_HDR_SEQ_SHIFT     4
+#define MCTP_HDR_SEQ_MASK      GENMASK(1, 0)
+#define MCTP_HDR_TAG_SHIFT     0
+#define MCTP_HDR_TAG_MASK      GENMASK(2, 0)
+
+#define MCTP_HEADER_MAXLEN     4
+
+#define MCTP_INITIAL_DEFAULT_NET       1
+
+static inline bool mctp_address_ok(mctp_eid_t eid)
+{
+       return eid >= 8 && eid < 255;
+}
+
+static inline struct mctp_hdr *mctp_hdr(struct sk_buff *skb)
+{
+       return (struct mctp_hdr *)skb_network_header(skb);
+}
+
+/* socket implementation */
+struct mctp_sock {
+       struct sock     sk;
+
+       /* bind() params */
+       int             bind_net;
+       mctp_eid_t      bind_addr;
+       __u8            bind_type;
+
+       /* list of mctp_sk_key, for incoming tag lookup. updates protected
+        * by sk->net->keys_lock
+        */
+       struct hlist_head keys;
+};
+
+/* Key for matching incoming packets to sockets or reassembly contexts.
+ * Packets are matched on (src,dest,tag).
+ *
+ * Lifetime requirements:
+ *
+ *  - keys are free()ed via RCU
+ *
+ *  - a mctp_sk_key contains a reference to a struct sock; this is valid
+ *    for the life of the key. On sock destruction (through unhash), the key is
+ *    removed from lists (see below), and will not be observable after a RCU
+ *    grace period.
+ *
+ *    any RX occurring within that grace period may still queue to the socket,
+ *    but will hit the SOCK_DEAD case before the socket is freed.
+ *
+ * - these mctp_sk_keys appear on two lists:
+ *     1) the struct mctp_sock->keys list
+ *     2) the struct netns_mctp->keys list
+ *
+ *        updates to either list are performed under the netns_mctp->keys
+ *        lock.
+ *
+ * - a key may have a sk_buff attached as part of an in-progress message
+ *   reassembly (->reasm_head). The reassembly context is protected by
+ *   reasm_lock, which may be acquired with the keys lock (above) held, if
+ *   necessary. Consequently, keys lock *cannot* be acquired with the
+ *   reasm_lock held.
+ *
+ * - there are two destruction paths for a mctp_sk_key:
+ *
+ *    - through socket unhash (see mctp_sk_unhash). This performs the list
+ *      removal under keys_lock.
+ *
+ *    - where a key is established to receive a reply message: after receiving
+ *      the (complete) reply, or during reassembly errors. Here, we clean up
+ *      the reassembly context (marking reasm_dead, to prevent another from
+ *      starting), and remove the socket from the netns & socket lists.
+ */
+struct mctp_sk_key {
+       mctp_eid_t      peer_addr;
+       mctp_eid_t      local_addr;
+       __u8            tag; /* incoming tag match; invert TO for local */
+
+       /* we hold a ref to sk when set */
+       struct sock     *sk;
+
+       /* routing lookup list */
+       struct hlist_node hlist;
+
+       /* per-socket list */
+       struct hlist_node sklist;
+
+       /* incoming fragment reassembly context */
+       spinlock_t      reasm_lock;
+       struct sk_buff  *reasm_head;
+       struct sk_buff  **reasm_tailp;
+       bool            reasm_dead;
+       u8              last_seq;
+
+       struct rcu_head rcu;
+};
+
+struct mctp_skb_cb {
+       unsigned int    magic;
+       unsigned int    net;
+       mctp_eid_t      src;
+};
+
+/* skb control-block accessors with a little extra debugging for initial
+ * development.
+ *
+ * TODO: remove checks & mctp_skb_cb->magic; replace callers of __mctp_cb
+ * with mctp_cb().
+ *
+ * __mctp_cb() is only for the initial ingress code; we should see ->magic set
+ * at all times after this.
+ */
+static inline struct mctp_skb_cb *__mctp_cb(struct sk_buff *skb)
+{
+       struct mctp_skb_cb *cb = (void *)skb->cb;
+
+       cb->magic = 0x4d435450;
+       return cb;
+}
+
+static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
+{
+       struct mctp_skb_cb *cb = (void *)skb->cb;
+
+       WARN_ON(cb->magic != 0x4d435450);
+       return (void *)(skb->cb);
+}
+
+/* Route definition.
+ *
+ * These are held in the pernet->mctp.routes list, with RCU protection for
+ * removed routes. We hold a reference to the netdev; routes need to be
+ * dropped on NETDEV_UNREGISTER events.
+ *
+ * Updates to the route table are performed under rtnl; all reads under RCU,
+ * so routes cannot be referenced over a RCU grace period. Specifically: A
+ * caller cannot block between mctp_route_lookup and passing the route to
+ * mctp_do_route.
+ */
+struct mctp_route {
+       mctp_eid_t              min, max;
+
+       struct mctp_dev         *dev;
+       unsigned int            mtu;
+       unsigned char           type;
+       int                     (*output)(struct mctp_route *route,
+                                         struct sk_buff *skb);
+
+       struct list_head        list;
+       refcount_t              refs;
+       struct rcu_head         rcu;
+};
+
+/* route interfaces */
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+                                    mctp_eid_t daddr);
+
+int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb);
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+                     struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
+
+/* routing <--> device interface */
+unsigned int mctp_default_net(struct net *net);
+int mctp_default_net_set(struct net *net, unsigned int index);
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr);
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr);
+void mctp_route_remove_dev(struct mctp_dev *mdev);
+
+/* neighbour definitions */
+enum mctp_neigh_source {
+       MCTP_NEIGH_STATIC,
+       MCTP_NEIGH_DISCOVER,
+};
+
+struct mctp_neigh {
+       struct mctp_dev         *dev;
+       mctp_eid_t              eid;
+       enum mctp_neigh_source  source;
+
+       unsigned char           ha[MAX_ADDR_LEN];
+
+       struct list_head        list;
+       struct rcu_head         rcu;
+};
+
+int mctp_neigh_init(void);
+void mctp_neigh_exit(void);
+
+// ret_hwaddr may be NULL, otherwise must have space for MAX_ADDR_LEN
+int mctp_neigh_lookup(struct mctp_dev *dev, mctp_eid_t eid,
+                     void *ret_hwaddr);
+void mctp_neigh_remove_dev(struct mctp_dev *mdev);
+
+int mctp_routes_init(void);
+void mctp_routes_exit(void);
+
+void mctp_device_init(void);
+void mctp_device_exit(void);
+
+#endif /* __NET_MCTP_H */
diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h
new file mode 100644 (file)
index 0000000..71a1101
--- /dev/null
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Management Component Transport Protocol (MCTP) - device
+ * definitions.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __NET_MCTPDEVICE_H
+#define __NET_MCTPDEVICE_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/refcount.h>
+
+struct mctp_dev {
+       struct net_device       *dev;
+
+       unsigned int            net;
+
+       /* Only modified under RTNL. Reads have addrs_lock held */
+       u8                      *addrs;
+       size_t                  num_addrs;
+       spinlock_t              addrs_lock;
+
+       struct rcu_head         rcu;
+};
+
+#define MCTP_INITIAL_DEFAULT_NET       1
+
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev);
+struct mctp_dev *__mctp_dev_get(const struct net_device *dev);
+
+#endif /* __NET_MCTPDEVICE_H */
index 8b5af68..6026bbe 100644 (file)
@@ -58,10 +58,6 @@ struct mptcp_addr_info {
 struct mptcp_out_options {
 #if IS_ENABLED(CONFIG_MPTCP)
        u16 suboptions;
-       u64 sndr_key;
-       u64 rcvr_key;
-       u64 ahmac;
-       struct mptcp_addr_info addr;
        struct mptcp_rm_list rm_list;
        u8 join_id;
        u8 backup;
@@ -69,11 +65,26 @@ struct mptcp_out_options {
           reset_transient:1,
           csum_reqd:1,
           allow_join_id0:1;
-       u32 nonce;
-       u64 thmac;
-       u32 token;
-       u8 hmac[20];
-       struct mptcp_ext ext_copy;
+       union {
+               struct {
+                       u64 sndr_key;
+                       u64 rcvr_key;
+               };
+               struct {
+                       struct mptcp_addr_info addr;
+                       u64 ahmac;
+               };
+               struct {
+                       struct mptcp_ext ext_copy;
+                       u64 fail_seq;
+               };
+               struct {
+                       u32 nonce;
+                       u32 token;
+                       u64 thmac;
+                       u8 hmac[20];
+               };
+       };
 #endif
 };
 
index 12cf6d7..bb5fa59 100644 (file)
@@ -23,7 +23,6 @@
 #include <net/netns/ieee802154_6lowpan.h>
 #include <net/netns/sctp.h>
 #include <net/netns/netfilter.h>
-#include <net/netns/x_tables.h>
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netns/conntrack.h>
 #endif
@@ -34,6 +33,7 @@
 #include <net/netns/xdp.h>
 #include <net/netns/smc.h>
 #include <net/netns/bpf.h>
+#include <net/netns/mctp.h>
 #include <linux/ns_common.h>
 #include <linux/idr.h>
 #include <linux/skbuff.h>
@@ -132,7 +132,6 @@ struct net {
 #endif
 #ifdef CONFIG_NETFILTER
        struct netns_nf         nf;
-       struct netns_xt         xt;
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
        struct netns_ct         ct;
 #endif
@@ -167,6 +166,9 @@ struct net {
 #ifdef CONFIG_XDP_SOCKETS
        struct netns_xdp        xdp;
 #endif
+#if IS_ENABLED(CONFIG_MCTP)
+       struct netns_mctp       mctp;
+#endif
 #if IS_ENABLED(CONFIG_CRYPTO_USER)
        struct sock             *crypto_nlsk;
 #endif
index d00ba60..d932e22 100644 (file)
@@ -72,14 +72,20 @@ struct nf_ct_event {
        int report;
 };
 
+struct nf_exp_event {
+       struct nf_conntrack_expect *exp;
+       u32 portid;
+       int report;
+};
+
 struct nf_ct_event_notifier {
-       int (*fcn)(unsigned int events, struct nf_ct_event *item);
+       int (*ct_event)(unsigned int events, const struct nf_ct_event *item);
+       int (*exp_event)(unsigned int events, const struct nf_exp_event *item);
 };
 
-int nf_conntrack_register_notifier(struct net *net,
-                                  struct nf_ct_event_notifier *nb);
-void nf_conntrack_unregister_notifier(struct net *net,
-                                     struct nf_ct_event_notifier *nb);
+void nf_conntrack_register_notifier(struct net *net,
+                                  const struct nf_ct_event_notifier *nb);
+void nf_conntrack_unregister_notifier(struct net *net);
 
 void nf_ct_deliver_cached_events(struct nf_conn *ct);
 int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
@@ -151,22 +157,6 @@ nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
 }
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-
-struct nf_exp_event {
-       struct nf_conntrack_expect *exp;
-       u32 portid;
-       int report;
-};
-
-struct nf_exp_event_notifier {
-       int (*fcn)(unsigned int events, struct nf_exp_event *item);
-};
-
-int nf_ct_expect_register_notifier(struct net *net,
-                                  struct nf_exp_event_notifier *nb);
-void nf_ct_expect_unregister_notifier(struct net *net,
-                                     struct nf_exp_event_notifier *nb);
-
 void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
                               struct nf_conntrack_expect *exp,
                               u32 portid, int report);
diff --git a/include/net/netfilter/nf_hooks_lwtunnel.h b/include/net/netfilter/nf_hooks_lwtunnel.h
new file mode 100644 (file)
index 0000000..52e2792
--- /dev/null
@@ -0,0 +1,7 @@
+#include <linux/sysctl.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_SYSCTL
+int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+                                    void *buffer, size_t *lenp, loff_t *ppos);
+#endif
index e770bba..9eed51e 100644 (file)
@@ -33,8 +33,8 @@ struct nf_queue_handler {
        void            (*nf_hook_drop)(struct net *net);
 };
 
-void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
-void nf_unregister_queue_handler(struct net *net);
+void nf_register_queue_handler(const struct nf_queue_handler *qh);
+void nf_unregister_queue_handler(void);
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
 
 void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
index 1ceec51..7a2a9d3 100644 (file)
@@ -885,7 +885,7 @@ static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh,
  */
 static inline int nlmsg_report(const struct nlmsghdr *nlh)
 {
-       return !!(nlh->nlmsg_flags & NLM_F_ECHO);
+       return nlh ? !!(nlh->nlmsg_flags & NLM_F_ECHO) : 0;
 }
 
 /**
index fefd38d..0294f3d 100644 (file)
@@ -113,7 +113,6 @@ struct netns_ct {
        struct ct_pcpu __percpu *pcpu_lists;
        struct ip_conntrack_stat __percpu *stat;
        struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
-       struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
        struct nf_ip_net        nf_ct_proto;
 #if defined(CONFIG_NF_CONNTRACK_LABELS)
        unsigned int            labels_used;
index b862051..2f65701 100644 (file)
@@ -174,7 +174,6 @@ struct netns_ipv4 {
        int sysctl_tcp_fastopen;
        const struct tcp_congestion_ops __rcu  *tcp_congestion_control;
        struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-       spinlock_t tcp_fastopen_ctx_lock;
        unsigned int sysctl_tcp_fastopen_blackhole_timeout;
        atomic_t tfo_active_disable_times;
        unsigned long tfo_active_disable_stamp;
index bde0b7a..a4b5503 100644 (file)
@@ -51,6 +51,8 @@ struct netns_sysctl_ipv6 {
        int max_dst_opts_len;
        int max_hbh_opts_len;
        int seg6_flowlabel;
+       u32 ioam6_id;
+       u64 ioam6_id_wide;
        bool skip_notify_on_dev_down;
        u8 fib_notify_on_flag_change;
 };
@@ -110,6 +112,7 @@ struct netns_ipv6 {
                spinlock_t      lock;
                u32             seq;
        } ip6addrlbl_table;
+       struct ioam6_pernet_data *ioam6_data;
 };
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/include/net/netns/mctp.h b/include/net/netns/mctp.h
new file mode 100644 (file)
index 0000000..acedef1
--- /dev/null
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MCTP per-net structures
+ */
+
+#ifndef __NETNS_MCTP_H__
+#define __NETNS_MCTP_H__
+
+#include <linux/types.h>
+
+struct netns_mctp {
+       /* Only updated under RTNL, entries freed via RCU */
+       struct list_head routes;
+
+       /* Bound sockets: list of sockets bound by type.
+        * This list is updated from non-atomic contexts (under bind_lock),
+        * and read (under rcu) in packet rx
+        */
+       struct mutex bind_lock;
+       struct hlist_head binds;
+
+       /* tag allocations. This list is read and updated from atomic contexts,
+        * but elements are free()ed after a RCU grace-period
+        */
+       spinlock_t keys_lock;
+       struct hlist_head keys;
+
+       /* MCTP network */
+       unsigned int default_net;
+
+       /* neighbour table */
+       struct mutex neigh_lock;
+       struct list_head neighbours;
+};
+
+#endif /* __NETNS_MCTP_H__ */
index 15e2b13..986a2a9 100644 (file)
@@ -12,7 +12,6 @@ struct netns_nf {
 #if defined CONFIG_PROC_FS
        struct proc_dir_entry *proc_netfilter;
 #endif
-       const struct nf_queue_handler __rcu *queue_handler;
        const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO];
 #ifdef CONFIG_SYSCTL
        struct ctl_table_header *nf_log_dir_header;
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
deleted file mode 100644 (file)
index d02316e..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NETNS_X_TABLES_H
-#define __NETNS_X_TABLES_H
-
-#include <linux/list.h>
-#include <linux/netfilter_defs.h>
-
-struct netns_xt {
-       bool notrack_deprecated_warning;
-       bool clusterip_deprecated_warning;
-};
-#endif
index 1f4e181..947733a 100644 (file)
@@ -65,6 +65,13 @@ struct netns_xfrm {
        u32                     sysctl_aevent_rseqth;
        int                     sysctl_larval_drop;
        u32                     sysctl_acq_expires;
+
+       u8                      policy_default;
+#define XFRM_POL_DEFAULT_IN    1
+#define XFRM_POL_DEFAULT_OUT   2
+#define XFRM_POL_DEFAULT_FWD   4
+#define XFRM_POL_DEFAULT_MASK  7
+
 #ifdef CONFIG_SYSCTL
        struct ctl_table_header *sysctl_hdr;
 #endif
index 963db96..bb3e8fd 100644 (file)
@@ -191,7 +191,7 @@ struct digital_poll_tech {
 
 struct nfc_digital_dev {
        struct nfc_dev *nfc_dev;
-       struct nfc_digital_ops *ops;
+       const struct nfc_digital_ops *ops;
 
        u32 protocols;
 
@@ -236,7 +236,7 @@ struct nfc_digital_dev {
        void (*skb_add_crc)(struct sk_buff *skb);
 };
 
-struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
+struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops,
                                                    __u32 supported_protocols,
                                                    __u32 driver_capabilities,
                                                    int tx_headroom,
index b35f37a..756c110 100644 (file)
@@ -118,7 +118,7 @@ struct nfc_hci_dev {
 
        struct sk_buff_head msg_rx_queue;
 
-       struct nfc_hci_ops *ops;
+       const struct nfc_hci_ops *ops;
 
        struct nfc_llc *llc;
 
@@ -151,7 +151,7 @@ struct nfc_hci_dev {
 };
 
 /* hci device allocation */
-struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
+struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops,
                                            struct nfc_hci_init_data *init_data,
                                            unsigned long quirks,
                                            u32 protocols,
@@ -168,7 +168,7 @@ void nfc_hci_set_clientdata(struct nfc_hci_dev *hdev, void *clientdata);
 void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev);
 
 static inline int nfc_hci_set_vendor_cmds(struct nfc_hci_dev *hdev,
-                                         struct nfc_vendor_cmd *cmds,
+                                         const struct nfc_vendor_cmd *cmds,
                                          int n_cmds)
 {
        return nfc_set_vendor_cmds(hdev->ndev, cmds, n_cmds);
index 1df0f80..a964dae 100644 (file)
@@ -82,10 +82,10 @@ struct nci_ops {
        void  (*hci_cmd_received)(struct nci_dev *ndev, u8 pipe, u8 cmd,
                                  struct sk_buff *skb);
 
-       struct nci_driver_ops *prop_ops;
+       const struct nci_driver_ops *prop_ops;
        size_t n_prop_ops;
 
-       struct nci_driver_ops *core_ops;
+       const struct nci_driver_ops *core_ops;
        size_t n_core_ops;
 };
 
@@ -194,7 +194,7 @@ struct nci_hci_dev {
 /* NCI Core structures */
 struct nci_dev {
        struct nfc_dev          *nfc_dev;
-       struct nci_ops          *ops;
+       const struct nci_ops    *ops;
        struct nci_hci_dev      *hci_dev;
 
        int                     tx_headroom;
@@ -267,7 +267,7 @@ struct nci_dev {
 };
 
 /* ----- NCI Devices ----- */
-struct nci_dev *nci_allocate_device(struct nci_ops *ops,
+struct nci_dev *nci_allocate_device(const struct nci_ops *ops,
                                    __u32 supported_protocols,
                                    int tx_headroom,
                                    int tx_tailroom);
@@ -276,25 +276,27 @@ int nci_register_device(struct nci_dev *ndev);
 void nci_unregister_device(struct nci_dev *ndev);
 int nci_request(struct nci_dev *ndev,
                void (*req)(struct nci_dev *ndev,
-                           unsigned long opt),
-               unsigned long opt, __u32 timeout);
-int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload);
-int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload);
+                           const void *opt),
+               const void *opt, __u32 timeout);
+int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len,
+                const __u8 *payload);
+int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
+                const __u8 *payload);
 int nci_core_reset(struct nci_dev *ndev);
 int nci_core_init(struct nci_dev *ndev);
 
 int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);
 int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb);
-int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
+int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val);
 
 int nci_nfcee_discover(struct nci_dev *ndev, u8 action);
 int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode);
 int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
                         u8 number_destination_params,
                         size_t params_len,
-                        struct core_conn_create_dest_spec_params *params);
+                        const struct core_conn_create_dest_spec_params *params);
 int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id);
-int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
+int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len,
                      struct sk_buff **resp);
 
 struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev);
@@ -343,7 +345,7 @@ static inline void *nci_get_drvdata(struct nci_dev *ndev)
 }
 
 static inline int nci_set_vendor_cmds(struct nci_dev *ndev,
-                                     struct nfc_vendor_cmd *cmds,
+                                     const struct nfc_vendor_cmd *cmds,
                                      int n_cmds)
 {
        return nfc_set_vendor_cmds(ndev->nfc_dev, cmds, n_cmds);
@@ -360,7 +362,7 @@ int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode,
 int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
                        struct sk_buff *skb);
 void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb);
-int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload);
+int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload);
 int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb);
 int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id);
 void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
@@ -378,7 +380,7 @@ void nci_req_complete(struct nci_dev *ndev, int result);
 struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
                                                   int conn_id);
 int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
-                                         struct dest_spec_params *params);
+                                         const struct dest_spec_params *params);
 
 /* ----- NCI status code ----- */
 int nci_to_errno(__u8 code);
index 2cd3a26..5dee575 100644 (file)
@@ -188,17 +188,17 @@ struct nfc_dev {
 
        struct rfkill *rfkill;
 
-       struct nfc_vendor_cmd *vendor_cmds;
+       const struct nfc_vendor_cmd *vendor_cmds;
        int n_vendor_cmds;
 
-       struct nfc_ops *ops;
+       const struct nfc_ops *ops;
        struct genl_info *cur_cmd_info;
 };
 #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev)
 
 extern struct class nfc_class;
 
-struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
+struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
                                    u32 supported_protocols,
                                    int tx_headroom,
                                    int tx_tailroom);
@@ -245,7 +245,7 @@ static inline void nfc_set_drvdata(struct nfc_dev *dev, void *data)
  *
  * @dev: The nfc device
  */
-static inline void *nfc_get_drvdata(struct nfc_dev *dev)
+static inline void *nfc_get_drvdata(const struct nfc_dev *dev)
 {
        return dev_get_drvdata(&dev->dev);
 }
@@ -255,7 +255,7 @@ static inline void *nfc_get_drvdata(struct nfc_dev *dev)
  *
  * @dev: The nfc device whose name to return
  */
-static inline const char *nfc_device_name(struct nfc_dev *dev)
+static inline const char *nfc_device_name(const struct nfc_dev *dev)
 {
        return dev_name(&dev->dev);
 }
@@ -266,7 +266,7 @@ struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk,
 struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp);
 
 int nfc_set_remote_general_bytes(struct nfc_dev *dev,
-                                u8 *gt, u8 gt_len);
+                                const u8 *gt, u8 gt_len);
 u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len);
 
 int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
@@ -280,7 +280,7 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx,
                       u8 comm_mode, u8 rf_mode);
 
 int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode,
-                    u8 *gb, size_t gb_len);
+                    const u8 *gb, size_t gb_len);
 int nfc_tm_deactivated(struct nfc_dev *dev);
 int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb);
 
@@ -297,7 +297,7 @@ void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb,
                          u8 payload_type, u8 direction);
 
 static inline int nfc_set_vendor_cmds(struct nfc_dev *dev,
-                                     struct nfc_vendor_cmd *cmds,
+                                     const struct nfc_vendor_cmd *cmds,
                                      int n_cmds)
 {
        if (dev->vendor_cmds || dev->n_vendor_cmds)
index 3dd62dd..a408240 100644 (file)
                                        * Please note DMA-sync-for-CPU is still
                                        * device driver responsibility
                                        */
-#define PP_FLAG_ALL            (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
+#define PP_FLAG_PAGE_FRAG      BIT(2) /* for page frag feature */
+#define PP_FLAG_ALL            (PP_FLAG_DMA_MAP |\
+                                PP_FLAG_DMA_SYNC_DEV |\
+                                PP_FLAG_PAGE_FRAG)
 
 /*
  * Fast allocation side cache array/stack
@@ -88,6 +91,9 @@ struct page_pool {
        unsigned long defer_warn;
 
        u32 pages_state_hold_cnt;
+       unsigned int frag_offset;
+       struct page *frag_page;
+       long frag_users;
 
        /*
         * Data structure for allocation side
@@ -137,6 +143,18 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
        return page_pool_alloc_pages(pool, gfp);
 }
 
+struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
+                                 unsigned int size, gfp_t gfp);
+
+static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
+                                                   unsigned int *offset,
+                                                   unsigned int size)
+{
+       gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+       return page_pool_alloc_frag(pool, offset, size, gfp);
+}
+
 /* get the stored dma direction. A driver might decide to treat this locally and
  * avoid the extra cache line from page_pool to determine the direction
  */
@@ -198,19 +216,48 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
        page_pool_put_full_page(pool, page, true);
 }
 
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT        \
+               (sizeof(dma_addr_t) > sizeof(unsigned long))
+
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-       dma_addr_t ret = page->dma_addr[0];
-       if (sizeof(dma_addr_t) > sizeof(unsigned long))
-               ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+       dma_addr_t ret = page->dma_addr;
+
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+               ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
        return ret;
 }
 
 static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 {
-       page->dma_addr[0] = addr;
-       if (sizeof(dma_addr_t) > sizeof(unsigned long))
-               page->dma_addr[1] = upper_32_bits(addr);
+       page->dma_addr = addr;
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+               page->dma_addr_upper = upper_32_bits(addr);
+}
+
+static inline void page_pool_set_frag_count(struct page *page, long nr)
+{
+       atomic_long_set(&page->pp_frag_count, nr);
+}
+
+static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
+                                                         long nr)
+{
+       long ret;
+
+       /* As suggested by Alexander, atomic_long_read() may cover up the
+        * reference count errors, so avoid calling atomic_long_read() in
+        * the cases of freeing or draining the page_frags, where we would
+        * not expect it to match or that are slowpath anyway.
+        */
+       if (__builtin_constant_p(nr) &&
+           atomic_long_read(&page->pp_frag_count) == nr)
+               return 0;
+
+       ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+       WARN_ON(ret < 0);
+       return ret;
 }
 
 static inline bool is_page_pool_compiled_in(void)
@@ -253,11 +300,4 @@ static inline void page_pool_ring_unlock(struct page_pool *pool)
                spin_unlock_bh(&pool->ring.producer_lock);
 }
 
-/* Store mem_info on struct page and use it while recycling skb frags */
-static inline
-void page_pool_store_mem_info(struct page *page, struct page_pool *pp)
-{
-       page->pp = pp;
-}
-
 #endif /* _NET_PAGE_POOL_H */
index 298a8d1..83a6d07 100644 (file)
@@ -76,12 +76,10 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
        return block->q;
 }
 
-int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
-                struct tcf_result *res, bool compat_mode);
-int tcf_classify_ingress(struct sk_buff *skb,
-                        const struct tcf_block *ingress_block,
-                        const struct tcf_proto *tp, struct tcf_result *res,
-                        bool compat_mode);
+int tcf_classify(struct sk_buff *skb,
+                const struct tcf_block *block,
+                const struct tcf_proto *tp, struct tcf_result *res,
+                bool compat_mode);
 
 #else
 static inline bool tcf_block_shared(struct tcf_block *block)
@@ -138,20 +136,14 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
 {
 }
 
-static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+static inline int tcf_classify(struct sk_buff *skb,
+                              const struct tcf_block *block,
+                              const struct tcf_proto *tp,
                               struct tcf_result *res, bool compat_mode)
 {
        return TC_ACT_UNSPEC;
 }
 
-static inline int tcf_classify_ingress(struct sk_buff *skb,
-                                      const struct tcf_block *ingress_block,
-                                      const struct tcf_proto *tp,
-                                      struct tcf_result *res, bool compat_mode)
-{
-       return TC_ACT_UNSPEC;
-}
-
 #endif
 
 static inline unsigned long
@@ -327,7 +319,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
                      struct nlattr **tb, struct nlattr *rate_tlv,
-                     struct tcf_exts *exts, bool ovr, bool rtnl_held,
+                     struct tcf_exts *exts, u32 flags,
                      struct netlink_ext_ack *extack);
 void tcf_exts_destroy(struct tcf_exts *exts);
 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
@@ -824,10 +816,9 @@ enum tc_htb_command {
 struct tc_htb_qopt_offload {
        struct netlink_ext_ack *extack;
        enum tc_htb_command command;
-       u16 classid;
        u32 parent_classid;
+       u16 classid;
        u16 qid;
-       u16 moved_qid;
        u64 rate;
        u64 ceil;
 };
index 384e800..9f48733 100644 (file)
@@ -153,7 +153,8 @@ struct rtnl_af_ops {
                                                    u32 ext_filter_mask);
 
        int                     (*validate_link_af)(const struct net_device *dev,
-                                                   const struct nlattr *attr);
+                                                   const struct nlattr *attr,
+                                                   struct netlink_ext_ack *extack);
        int                     (*set_link_af)(struct net_device *dev,
                                               const struct nlattr *attr,
                                               struct netlink_ext_ack *extack);
index 9ed33e6..c0069ac 100644 (file)
@@ -357,7 +357,7 @@ struct tcf_proto_ops {
        int                     (*change)(struct net *net, struct sk_buff *,
                                        struct tcf_proto*, unsigned long,
                                        u32 handle, struct nlattr **,
-                                       void **, bool, bool,
+                                       void **, u32,
                                        struct netlink_ext_ack *);
        int                     (*delete)(struct tcf_proto *tp, void *arg,
                                          bool *last, bool rtnl_held,
index f23cb25..66a9a90 100644 (file)
@@ -68,6 +68,7 @@
 #include <net/tcp_states.h>
 #include <linux/net_tstamp.h>
 #include <net/l3mdev.h>
+#include <uapi/linux/socket.h>
 
 /*
  * This structure really needs to be cleaned up.
@@ -1438,8 +1439,6 @@ static inline int __sk_prot_rehash(struct sock *sk)
 #define RCV_SHUTDOWN   1
 #define SEND_SHUTDOWN  2
 
-#define SOCK_SNDBUF_LOCK       1
-#define SOCK_RCVBUF_LOCK       2
 #define SOCK_BINDADDR_LOCK     4
 #define SOCK_BINDPORT_LOCK     8
 
@@ -2249,6 +2248,15 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc
        return false;
 }
 
+static inline void skb_prepare_for_gro(struct sk_buff *skb)
+{
+       if (skb->destructor != sock_wfree) {
+               skb_orphan(skb);
+               return;
+       }
+       skb->slow_gro = 1;
+}
+
 void sk_reset_timer(struct sock *sk, struct timer_list *timer,
                    unsigned long expires);
 
@@ -2392,6 +2400,11 @@ static inline gfp_t gfp_any(void)
        return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
 }
 
+static inline gfp_t gfp_memcg_charge(void)
+{
+       return in_softirq() ? GFP_NOWAIT : GFP_KERNEL;
+}
+
 static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
 {
        return noblock ? 0 : sk->sk_rcvtimeo;
@@ -2704,6 +2717,7 @@ extern int sysctl_optmem_max;
 extern __u32 sysctl_wmem_default;
 extern __u32 sysctl_rmem_default;
 
+#define SKB_FRAG_PAGE_ORDER    get_order(32768)
 DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
 
 static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
index e4cac92..60d806b 100644 (file)
@@ -180,6 +180,14 @@ struct switchdev_obj_in_state_mrp {
 
 typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
 
+struct switchdev_brport {
+       struct net_device *dev;
+       const void *ctx;
+       struct notifier_block *atomic_nb;
+       struct notifier_block *blocking_nb;
+       bool tx_fwd_offload;
+};
+
 enum switchdev_notifier_type {
        SWITCHDEV_FDB_ADD_TO_BRIDGE = 1,
        SWITCHDEV_FDB_DEL_TO_BRIDGE,
@@ -197,6 +205,9 @@ enum switchdev_notifier_type {
        SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
        SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
        SWITCHDEV_VXLAN_FDB_OFFLOADED,
+
+       SWITCHDEV_BRPORT_OFFLOADED,
+       SWITCHDEV_BRPORT_UNOFFLOADED,
 };
 
 struct switchdev_notifier_info {
@@ -226,6 +237,11 @@ struct switchdev_notifier_port_attr_info {
        bool handled;
 };
 
+struct switchdev_notifier_brport_info {
+       struct switchdev_notifier_info info; /* must be first */
+       const struct switchdev_brport brport;
+};
+
 static inline struct net_device *
 switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info)
 {
@@ -238,8 +254,25 @@ switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info)
        return info->extack;
 }
 
+static inline bool
+switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *fdb_info)
+{
+       return !fdb_info->added_by_user && !fdb_info->is_local;
+}
+
 #ifdef CONFIG_NET_SWITCHDEV
 
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+                                 struct net_device *dev, const void *ctx,
+                                 struct notifier_block *atomic_nb,
+                                 struct notifier_block *blocking_nb,
+                                 bool tx_fwd_offload,
+                                 struct netlink_ext_ack *extack);
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+                                    const void *ctx,
+                                    struct notifier_block *atomic_nb,
+                                    struct notifier_block *blocking_nb);
+
 void switchdev_deferred_process(void);
 int switchdev_port_attr_set(struct net_device *dev,
                            const struct switchdev_attr *attr,
@@ -266,6 +299,30 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
                                 struct net_device *group_dev,
                                 bool joining);
 
+int switchdev_handle_fdb_add_to_device(struct net_device *dev,
+               const struct switchdev_notifier_fdb_info *fdb_info,
+               bool (*check_cb)(const struct net_device *dev),
+               bool (*foreign_dev_check_cb)(const struct net_device *dev,
+                                            const struct net_device *foreign_dev),
+               int (*add_cb)(struct net_device *dev,
+                             const struct net_device *orig_dev, const void *ctx,
+                             const struct switchdev_notifier_fdb_info *fdb_info),
+               int (*lag_add_cb)(struct net_device *dev,
+                                 const struct net_device *orig_dev, const void *ctx,
+                                 const struct switchdev_notifier_fdb_info *fdb_info));
+
+int switchdev_handle_fdb_del_to_device(struct net_device *dev,
+               const struct switchdev_notifier_fdb_info *fdb_info,
+               bool (*check_cb)(const struct net_device *dev),
+               bool (*foreign_dev_check_cb)(const struct net_device *dev,
+                                            const struct net_device *foreign_dev),
+               int (*del_cb)(struct net_device *dev,
+                             const struct net_device *orig_dev, const void *ctx,
+                             const struct switchdev_notifier_fdb_info *fdb_info),
+               int (*lag_del_cb)(struct net_device *dev,
+                                 const struct net_device *orig_dev, const void *ctx,
+                                 const struct switchdev_notifier_fdb_info *fdb_info));
+
 int switchdev_handle_port_obj_add(struct net_device *dev,
                        struct switchdev_notifier_port_obj_info *port_obj_info,
                        bool (*check_cb)(const struct net_device *dev),
@@ -286,6 +343,25 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
                                      struct netlink_ext_ack *extack));
 #else
 
+static inline int
+switchdev_bridge_port_offload(struct net_device *brport_dev,
+                             struct net_device *dev, const void *ctx,
+                             struct notifier_block *atomic_nb,
+                             struct notifier_block *blocking_nb,
+                             bool tx_fwd_offload,
+                             struct netlink_ext_ack *extack)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void
+switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+                               const void *ctx,
+                               struct notifier_block *atomic_nb,
+                               struct notifier_block *blocking_nb)
+{
+}
+
 static inline void switchdev_deferred_process(void)
 {
 }
@@ -349,6 +425,38 @@ call_switchdev_blocking_notifiers(unsigned long val,
        return NOTIFY_DONE;
 }
 
+static inline int
+switchdev_handle_fdb_add_to_device(struct net_device *dev,
+               const struct switchdev_notifier_fdb_info *fdb_info,
+               bool (*check_cb)(const struct net_device *dev),
+               bool (*foreign_dev_check_cb)(const struct net_device *dev,
+                                            const struct net_device *foreign_dev),
+               int (*add_cb)(struct net_device *dev,
+                             const struct net_device *orig_dev, const void *ctx,
+                             const struct switchdev_notifier_fdb_info *fdb_info),
+               int (*lag_add_cb)(struct net_device *dev,
+                                 const struct net_device *orig_dev, const void *ctx,
+                                 const struct switchdev_notifier_fdb_info *fdb_info))
+{
+       return 0;
+}
+
+static inline int
+switchdev_handle_fdb_del_to_device(struct net_device *dev,
+               const struct switchdev_notifier_fdb_info *fdb_info,
+               bool (*check_cb)(const struct net_device *dev),
+               bool (*foreign_dev_check_cb)(const struct net_device *dev,
+                                            const struct net_device *foreign_dev),
+               int (*del_cb)(struct net_device *dev,
+                             const struct net_device *orig_dev, const void *ctx,
+                             const struct switchdev_notifier_fdb_info *fdb_info),
+               int (*lag_del_cb)(struct net_device *dev,
+                                 const struct net_device *orig_dev, const void *ctx,
+                                 const struct switchdev_notifier_fdb_info *fdb_info))
+{
+       return 0;
+}
+
 static inline int
 switchdev_handle_port_obj_add(struct net_device *dev,
                        struct switchdev_notifier_port_obj_info *port_obj_info,
index 784d5c3..3166dc1 100644 (file)
@@ -1958,7 +1958,6 @@ struct tcp_iter_state {
        struct seq_net_private  p;
        enum tcp_seq_states     state;
        struct sock             *syn_wait_sk;
-       struct tcp_seq_afinfo   *bpf_seq_afinfo;
        int                     bucket, offset, sbucket, num;
        loff_t                  last_pos;
 };
index 5533f0a..ad5b02d 100644 (file)
@@ -276,6 +276,11 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
        return unlikely(xdp->data_meta > xdp->data);
 }
 
+static inline bool xdp_metalen_invalid(unsigned long metalen)
+{
+       return (metalen & (sizeof(__u32) - 1)) || (metalen > 32);
+}
+
 struct xdp_attachment_info {
        struct bpf_prog *prog;
        u32 flags;
index cbff7c2..2308210 100644 (file)
@@ -1075,6 +1075,22 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un
 }
 
 #ifdef CONFIG_XFRM
+static inline bool
+xfrm_default_allow(struct net *net, int dir)
+{
+       u8 def = net->xfrm.policy_default;
+
+       switch (dir) {
+       case XFRM_POLICY_IN:
+               return def & XFRM_POL_DEFAULT_IN ? false : true;
+       case XFRM_POLICY_OUT:
+               return def & XFRM_POL_DEFAULT_OUT ? false : true;
+       case XFRM_POLICY_FWD:
+               return def & XFRM_POL_DEFAULT_FWD ? false : true;
+       }
+       return false;
+}
+
 int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb,
                        unsigned short family);
 
@@ -1088,9 +1104,13 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
        if (sk && sk->sk_policy[XFRM_POLICY_IN])
                return __xfrm_policy_check(sk, ndir, skb, family);
 
-       return  (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
-               (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
-               __xfrm_policy_check(sk, ndir, skb, family);
+       if (xfrm_default_allow(net, dir))
+               return (!net->xfrm.policy_count[dir] && !secpath_exists(skb)) ||
+                      (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
+                      __xfrm_policy_check(sk, ndir, skb, family);
+       else
+               return (skb_dst(skb) && (skb_dst(skb)->flags & DST_NOPOLICY)) ||
+                      __xfrm_policy_check(sk, ndir, skb, family);
 }
 
 static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family)
@@ -1142,9 +1162,13 @@ static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
        struct net *net = dev_net(skb->dev);
 
-       return  !net->xfrm.policy_count[XFRM_POLICY_OUT] ||
-               (skb_dst(skb)->flags & DST_NOXFRM) ||
-               __xfrm_route_forward(skb, family);
+       if (xfrm_default_allow(net, XFRM_POLICY_FWD))
+               return !net->xfrm.policy_count[XFRM_POLICY_OUT] ||
+                       (skb_dst(skb)->flags & DST_NOXFRM) ||
+                       __xfrm_route_forward(skb, family);
+       else
+               return (skb_dst(skb)->flags & DST_NOXFRM) ||
+                       __xfrm_route_forward(skb, family);
 }
 
 static inline int xfrm4_route_forward(struct sk_buff *skb)
index 2f5ce4d..06706a9 100644 (file)
@@ -589,6 +589,9 @@ enum ocelot_sb_pool {
        OCELOT_SB_POOL_NUM,
 };
 
+#define OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION      BIT(0)
+#define OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP           BIT(1)
+
 struct ocelot_port {
        struct ocelot                   *ocelot;
 
@@ -798,19 +801,14 @@ void ocelot_init_port(struct ocelot *ocelot, int port);
 void ocelot_deinit_port(struct ocelot *ocelot, int port);
 
 /* DSA callbacks */
-void ocelot_port_enable(struct ocelot *ocelot, int port,
-                       struct phy_device *phy);
-void ocelot_port_disable(struct ocelot *ocelot, int port);
 void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data);
 void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data);
 int ocelot_get_sset_count(struct ocelot *ocelot, int port, int sset);
 int ocelot_get_ts_info(struct ocelot *ocelot, int port,
                       struct ethtool_ts_info *info);
 void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs);
-int ocelot_port_flush(struct ocelot *ocelot, int port);
-void ocelot_adjust_link(struct ocelot *ocelot, int port,
-                       struct phy_device *phydev);
-int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled);
+int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled,
+                              struct netlink_ext_ack *extack);
 void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state);
 void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot);
 int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port,
@@ -828,7 +826,7 @@ int ocelot_fdb_add(struct ocelot *ocelot, int port,
 int ocelot_fdb_del(struct ocelot *ocelot, int port,
                   const unsigned char *addr, u16 vid);
 int ocelot_vlan_prepare(struct ocelot *ocelot, int port, u16 vid, bool pvid,
-                       bool untagged);
+                       bool untagged, struct netlink_ext_ack *extack);
 int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
                    bool untagged);
 int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid);
@@ -894,6 +892,18 @@ int ocelot_sb_occ_tc_port_bind_get(struct ocelot *ocelot, int port,
                                   enum devlink_sb_pool_type pool_type,
                                   u32 *p_cur, u32 *p_max);
 
+void ocelot_phylink_mac_link_down(struct ocelot *ocelot, int port,
+                                 unsigned int link_an_mode,
+                                 phy_interface_t interface,
+                                 unsigned long quirks);
+void ocelot_phylink_mac_link_up(struct ocelot *ocelot, int port,
+                               struct phy_device *phydev,
+                               unsigned int link_an_mode,
+                               phy_interface_t interface,
+                               int speed, int duplex,
+                               bool tx_pause, bool rx_pause,
+                               unsigned long quirks);
+
 #if IS_ENABLED(CONFIG_BRIDGE_MRP)
 int ocelot_mrp_add(struct ocelot *ocelot, int port,
                   const struct switchdev_obj_mrp *mrp);
index c3006c6..59c945b 100644 (file)
@@ -54,6 +54,7 @@ TRACE_EVENT(qdisc_enqueue,
 
        TP_STRUCT__entry(
                __field(struct Qdisc *, qdisc)
+               __field(const struct netdev_queue *, txq)
                __field(void *, skbaddr)
                __field(int, ifindex)
                __field(u32, handle)
@@ -62,6 +63,7 @@ TRACE_EVENT(qdisc_enqueue,
 
        TP_fast_assign(
                __entry->qdisc = qdisc;
+               __entry->txq     = txq;
                __entry->skbaddr = skb;
                __entry->ifindex = txq->dev ? txq->dev->ifindex : 0;
                __entry->handle  = qdisc->handle;
index d588c24..1f0a2b4 100644 (file)
 
 #define SO_NETNS_COOKIE                71
 
+#define SO_BUF_LOCK            72
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
index bf9252c..791f31d 100644 (file)
@@ -84,7 +84,7 @@ struct bpf_lpm_trie_key {
 
 struct bpf_cgroup_storage_key {
        __u64   cgroup_inode_id;        /* cgroup inode id */
-       __u32   attach_type;            /* program attach type */
+       __u32   attach_type;            /* program attach type (enum bpf_attach_type) */
 };
 
 union bpf_iter_link_info {
@@ -324,9 +324,6 @@ union bpf_iter_link_info {
  *             **BPF_PROG_TYPE_SK_LOOKUP**
  *                     *data_in* and *data_out* must be NULL.
  *
- *             **BPF_PROG_TYPE_XDP**
- *                     *ctx_in* and *ctx_out* must be NULL.
- *
  *             **BPF_PROG_TYPE_RAW_TRACEPOINT**,
  *             **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
  *
@@ -996,6 +993,7 @@ enum bpf_attach_type {
        BPF_SK_SKB_VERDICT,
        BPF_SK_REUSEPORT_SELECT,
        BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
+       BPF_PERF_EVENT,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -1009,6 +1007,7 @@ enum bpf_link_type {
        BPF_LINK_TYPE_ITER = 4,
        BPF_LINK_TYPE_NETNS = 5,
        BPF_LINK_TYPE_XDP = 6,
+       BPF_LINK_TYPE_PERF_EVENT = 7,
 
        MAX_BPF_LINK_TYPE,
 };
@@ -1449,6 +1448,13 @@ union bpf_attr {
                                __aligned_u64   iter_info;      /* extra bpf_iter_link_info */
                                __u32           iter_info_len;  /* iter_info length */
                        };
+                       struct {
+                               /* black box user-provided value passed through
+                                * to BPF program at the execution time and
+                                * accessible through bpf_get_attach_cookie() BPF helper
+                                */
+                               __u64           bpf_cookie;
+                       } perf_event;
                };
        } link_create;
 
@@ -3249,7 +3255,7 @@ union bpf_attr {
  * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
  *     Description
  *             Select a **SO_REUSEPORT** socket from a
- *             **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ *             **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*.
  *             It checks the selected socket is matching the incoming
  *             request in the socket buffer.
  *     Return
@@ -4780,6 +4786,97 @@ union bpf_attr {
  *             Execute close syscall for given FD.
  *     Return
  *             A syscall result.
+ *
+ * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags)
+ *     Description
+ *             Initialize the timer.
+ *             First 4 bits of *flags* specify clockid.
+ *             Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
+ *             All other bits of *flags* are reserved.
+ *             The verifier will reject the program if *timer* is not from
+ *             the same *map*.
+ *     Return
+ *             0 on success.
+ *             **-EBUSY** if *timer* is already initialized.
+ *             **-EINVAL** if invalid *flags* are passed.
+ *             **-EPERM** if *timer* is in a map that doesn't have any user references.
+ *             The user space should either hold a file descriptor to a map with timers
+ *             or pin such map in bpffs. When map is unpinned or file descriptor is
+ *             closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn)
+ *     Description
+ *             Configure the timer to call *callback_fn* static function.
+ *     Return
+ *             0 on success.
+ *             **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ *             **-EPERM** if *timer* is in a map that doesn't have any user references.
+ *             The user space should either hold a file descriptor to a map with timers
+ *             or pin such map in bpffs. When map is unpinned or file descriptor is
+ *             closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags)
+ *     Description
+ *             Set timer expiration N nanoseconds from the current time. The
+ *             configured callback will be invoked in soft irq context on some cpu
+ *             and will not repeat unless another bpf_timer_start() is made.
+ *             In such case the next invocation can migrate to a different cpu.
+ *             Since struct bpf_timer is a field inside map element the map
+ *             owns the timer. The bpf_timer_set_callback() will increment refcnt
+ *             of BPF program to make sure that callback_fn code stays valid.
+ *             When user space reference to a map reaches zero all timers
+ *             in a map are cancelled and corresponding program's refcnts are
+ *             decremented. This is done to make sure that Ctrl-C of a user
+ *             process doesn't leave any timers running. If map is pinned in
+ *             bpffs the callback_fn can re-arm itself indefinitely.
+ *             bpf_map_update/delete_elem() helpers and user space sys_bpf commands
+ *             cancel and free the timer in the given map element.
+ *             The map can contain timers that invoke callback_fn-s from different
+ *             programs. The same callback_fn can serve different timers from
+ *             different maps if key/value layout matches across maps.
+ *             Every bpf_timer_set_callback() can have different callback_fn.
+ *
+ *     Return
+ *             0 on success.
+ *             **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
+ *             or invalid *flags* are passed.
+ *
+ * long bpf_timer_cancel(struct bpf_timer *timer)
+ *     Description
+ *             Cancel the timer and wait for callback_fn to finish if it was running.
+ *     Return
+ *             0 if the timer was not active.
+ *             1 if the timer was active.
+ *             **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ *             **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its
+ *             own timer which would have led to a deadlock otherwise.
+ *
+ * u64 bpf_get_func_ip(void *ctx)
+ *     Description
+ *             Get address of the traced function (for tracing and kprobe programs).
+ *     Return
+ *             Address of the traced function.
+ *
+ * u64 bpf_get_attach_cookie(void *ctx)
+ *     Description
+ *             Get bpf_cookie value provided (optionally) during the program
+ *             attachment. It might be different for each individual
+ *             attachment, even if BPF program itself is the same.
+ *             Expects BPF program context *ctx* as a first argument.
+ *
+ *             Supported for the following program types:
+ *                     - kprobe/uprobe;
+ *                     - tracepoint;
+ *                     - perf_event.
+ *     Return
+ *             Value specified by user at BPF link creation/attachment time
+ *             or 0, if it was not specified.
+ *
+ * long bpf_task_pt_regs(struct task_struct *task)
+ *     Description
+ *             Get the struct pt_regs associated with **task**.
+ *     Return
+ *             A pointer to struct pt_regs.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -4951,6 +5048,13 @@ union bpf_attr {
        FN(sys_bpf),                    \
        FN(btf_find_by_name_kind),      \
        FN(sys_close),                  \
+       FN(timer_init),                 \
+       FN(timer_set_callback),         \
+       FN(timer_start),                \
+       FN(timer_cancel),               \
+       FN(get_func_ip),                \
+       FN(get_attach_cookie),          \
+       FN(task_pt_regs),               \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6077,6 +6181,11 @@ struct bpf_spin_lock {
        __u32   val;
 };
 
+struct bpf_timer {
+       __u64 :64;
+       __u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
        __u32   write;          /* Sysctl is being read (= 0) or written (= 1).
                                 * Allows 1,2,4-byte read, but no write.
index df6e821..3893646 100644 (file)
@@ -78,11 +78,20 @@ enum {
 enum {
        J1939_NLA_PAD,
        J1939_NLA_BYTES_ACKED,
+       J1939_NLA_TOTAL_SIZE,
+       J1939_NLA_PGN,
+       J1939_NLA_SRC_NAME,
+       J1939_NLA_DEST_NAME,
+       J1939_NLA_SRC_ADDR,
+       J1939_NLA_DEST_ADDR,
 };
 
 enum {
        J1939_EE_INFO_NONE,
        J1939_EE_INFO_TX_ABORT,
+       J1939_EE_INFO_RX_RTS,
+       J1939_EE_INFO_RX_DPO,
+       J1939_EE_INFO_RX_ABORT,
 };
 
 struct j1939_filter {
index 67aa713..b6db659 100644 (file)
@@ -639,6 +639,8 @@ enum ethtool_link_ext_substate_link_logical_mismatch {
 enum ethtool_link_ext_substate_bad_signal_integrity {
        ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1,
        ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE,
+       ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST,
+       ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS,
 };
 
 /* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */
index b3b9371..5545f1c 100644 (file)
@@ -377,6 +377,8 @@ enum {
        ETHTOOL_A_COALESCE_TX_USECS_HIGH,               /* u32 */
        ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH,          /* u32 */
        ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL,        /* u32 */
+       ETHTOOL_A_COALESCE_USE_CQE_MODE_TX,             /* u8 */
+       ETHTOOL_A_COALESCE_USE_CQE_MODE_RX,             /* u8 */
 
        /* add new constants above here */
        __ETHTOOL_A_COALESCE_CNT,
index c3cc5a9..4783af9 100644 (file)
@@ -54,6 +54,7 @@
 #define ARPHRD_X25     271             /* CCITT X.25                   */
 #define ARPHRD_HWX25   272             /* Boards with X.25 in firmware */
 #define ARPHRD_CAN     280             /* Controller Area Network      */
+#define ARPHRD_MCTP    290
 #define ARPHRD_PPP     512
 #define ARPHRD_CISCO   513             /* Cisco HDLC                   */
 #define ARPHRD_HDLC    ARPHRD_CISCO
index 6b56a75..2711c35 100644 (file)
@@ -479,16 +479,22 @@ enum {
 
 /* flags used in BRIDGE_VLANDB_DUMP_FLAGS attribute to affect dumps */
 #define BRIDGE_VLANDB_DUMPF_STATS      (1 << 0) /* Include stats in the dump */
+#define BRIDGE_VLANDB_DUMPF_GLOBAL     (1 << 1) /* Dump global vlan options only */
 
 /* Bridge vlan RTM attributes
  * [BRIDGE_VLANDB_ENTRY] = {
  *     [BRIDGE_VLANDB_ENTRY_INFO]
  *     ...
  * }
+ * [BRIDGE_VLANDB_GLOBAL_OPTIONS] = {
+ *     [BRIDGE_VLANDB_GOPTS_ID]
+ *     ...
+ * }
  */
 enum {
        BRIDGE_VLANDB_UNSPEC,
        BRIDGE_VLANDB_ENTRY,
+       BRIDGE_VLANDB_GLOBAL_OPTIONS,
        __BRIDGE_VLANDB_MAX,
 };
 #define BRIDGE_VLANDB_MAX (__BRIDGE_VLANDB_MAX - 1)
@@ -500,6 +506,7 @@ enum {
        BRIDGE_VLANDB_ENTRY_STATE,
        BRIDGE_VLANDB_ENTRY_TUNNEL_INFO,
        BRIDGE_VLANDB_ENTRY_STATS,
+       BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
        __BRIDGE_VLANDB_ENTRY_MAX,
 };
 #define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1)
@@ -538,6 +545,29 @@ enum {
 };
 #define BRIDGE_VLANDB_STATS_MAX (__BRIDGE_VLANDB_STATS_MAX - 1)
 
+enum {
+       BRIDGE_VLANDB_GOPTS_UNSPEC,
+       BRIDGE_VLANDB_GOPTS_ID,
+       BRIDGE_VLANDB_GOPTS_RANGE,
+       BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING,
+       BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION,
+       BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION,
+       BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT,
+       BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT,
+       BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL,
+       BRIDGE_VLANDB_GOPTS_PAD,
+       BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL,
+       BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL,
+       BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL,
+       BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL,
+       BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL,
+       BRIDGE_VLANDB_GOPTS_MCAST_QUERIER,
+       BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS,
+       BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE,
+       __BRIDGE_VLANDB_GOPTS_MAX
+};
+#define BRIDGE_VLANDB_GOPTS_MAX (__BRIDGE_VLANDB_GOPTS_MAX - 1)
+
 /* Bridge multicast database attributes
  * [MDBA_MDB] = {
  *     [MDBA_MDB_ENTRY] = {
@@ -629,6 +659,7 @@ enum {
        MDBA_ROUTER_PATTR_TYPE,
        MDBA_ROUTER_PATTR_INET_TIMER,
        MDBA_ROUTER_PATTR_INET6_TIMER,
+       MDBA_ROUTER_PATTR_VID,
        __MDBA_ROUTER_PATTR_MAX
 };
 #define MDBA_ROUTER_PATTR_MAX (__MDBA_ROUTER_PATTR_MAX - 1)
@@ -720,12 +751,14 @@ struct br_mcast_stats {
 
 /* bridge boolean options
  * BR_BOOLOPT_NO_LL_LEARN - disable learning from link-local packets
+ * BR_BOOLOPT_MCAST_VLAN_SNOOPING - control vlan multicast snooping
  *
  * IMPORTANT: if adding a new option do not forget to handle
  *            it in br_boolopt_toggle/get and bridge sysfs
  */
 enum br_boolopt_id {
        BR_BOOLOPT_NO_LL_LEARN,
+       BR_BOOLOPT_MCAST_VLAN_SNOOPING,
        BR_BOOLOPT_MAX
 };
 
@@ -738,4 +771,17 @@ struct br_boolopt_multi {
        __u32 optval;
        __u32 optmask;
 };
+
+enum {
+       BRIDGE_QUERIER_UNSPEC,
+       BRIDGE_QUERIER_IP_ADDRESS,
+       BRIDGE_QUERIER_IP_PORT,
+       BRIDGE_QUERIER_IP_OTHER_TIMER,
+       BRIDGE_QUERIER_PAD,
+       BRIDGE_QUERIER_IPV6_ADDRESS,
+       BRIDGE_QUERIER_IPV6_PORT,
+       BRIDGE_QUERIER_IPV6_OTHER_TIMER,
+       __BRIDGE_QUERIER_MAX
+};
+#define BRIDGE_QUERIER_MAX (__BRIDGE_QUERIER_MAX - 1)
 #endif /* _UAPI_LINUX_IF_BRIDGE_H */
index a0b6379..5f589c7 100644 (file)
 #define ETH_P_MAP      0x00F9          /* Qualcomm multiplexing and
                                         * aggregation protocol
                                         */
+#define ETH_P_MCTP     0x00FA          /* Management component transport
+                                        * protocol packets
+                                        */
 
 /*
  *     This is an Ethernet frame header.
index 4882e81..eebd389 100644 (file)
@@ -417,6 +417,7 @@ enum {
        IFLA_INET6_ICMP6STATS,  /* statistics (icmpv6)          */
        IFLA_INET6_TOKEN,       /* device token                 */
        IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+       IFLA_INET6_RA_MTU,      /* mtu carried in the RA message */
        __IFLA_INET6_MAX
 };
 
@@ -479,6 +480,7 @@ enum {
        IFLA_BR_MCAST_MLD_VERSION,
        IFLA_BR_VLAN_STATS_PER_PORT,
        IFLA_BR_MULTI_BOOLOPT,
+       IFLA_BR_MCAST_QUERIER_STATE,
        __IFLA_BR_MAX,
 };
 
@@ -855,6 +857,7 @@ enum {
        IFLA_BOND_AD_ACTOR_SYSTEM,
        IFLA_BOND_TLB_DYNAMIC_LB,
        IFLA_BOND_PEER_NOTIF_DELAY,
+       IFLA_BOND_AD_LACP_ACTIVE,
        __IFLA_BOND_MAX,
 };
 
@@ -1260,4 +1263,14 @@ struct ifla_rmnet_flags {
        __u32   mask;
 };
 
+/* MCTP section */
+
+enum {
+       IFLA_MCTP_UNSPEC,
+       IFLA_MCTP_NET,
+       __IFLA_MCTP_MAX,
+};
+
+#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
index d1b3270..1416822 100644 (file)
@@ -188,11 +188,22 @@ struct ip_mreq_source {
 };
 
 struct ip_msfilter {
-       __be32          imsf_multiaddr;
-       __be32          imsf_interface;
-       __u32           imsf_fmode;
-       __u32           imsf_numsrc;
-       __be32          imsf_slist[1];
+       union {
+               struct {
+                       __be32          imsf_multiaddr_aux;
+                       __be32          imsf_interface_aux;
+                       __u32           imsf_fmode_aux;
+                       __u32           imsf_numsrc_aux;
+                       __be32          imsf_slist[1];
+               };
+               struct {
+                       __be32          imsf_multiaddr;
+                       __be32          imsf_interface;
+                       __u32           imsf_fmode;
+                       __u32           imsf_numsrc;
+                       __be32          imsf_slist_flex[];
+               };
+       };
 };
 
 #define IP_MSFILTER_SIZE(numsrc) \
@@ -211,11 +222,22 @@ struct group_source_req {
 };
 
 struct group_filter {
-       __u32                            gf_interface;  /* interface index */
-       struct __kernel_sockaddr_storage gf_group;      /* multicast address */
-       __u32                            gf_fmode;      /* filter mode */
-       __u32                            gf_numsrc;     /* number of sources */
-       struct __kernel_sockaddr_storage gf_slist[1];   /* interface index */
+       union {
+               struct {
+                       __u32                            gf_interface_aux; /* interface index */
+                       struct __kernel_sockaddr_storage gf_group_aux;     /* multicast address */
+                       __u32                            gf_fmode_aux;     /* filter mode */
+                       __u32                            gf_numsrc_aux;    /* number of sources */
+                       struct __kernel_sockaddr_storage gf_slist[1];      /* interface index */
+               };
+               struct {
+                       __u32                            gf_interface;    /* interface index */
+                       struct __kernel_sockaddr_storage gf_group;        /* multicast address */
+                       __u32                            gf_fmode;        /* filter mode */
+                       __u32                            gf_numsrc;       /* number of sources */
+                       struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */
+               };
+       };
 };
 
 #define GROUP_FILTER_SIZE(numsrc) \
index 5ad396a..c4c53a9 100644 (file)
@@ -145,6 +145,7 @@ struct in6_flowlabel_req {
 #define IPV6_TLV_PADN          1
 #define IPV6_TLV_ROUTERALERT   5
 #define IPV6_TLV_CALIPSO       7       /* RFC 5570 */
+#define IPV6_TLV_IOAM          49      /* TEMPORARY IANA allocation for IOAM */
 #define IPV6_TLV_JUMBO         194
 #define IPV6_TLV_HAO           201     /* home address option */
 
diff --git a/include/uapi/linux/ioam6.h b/include/uapi/linux/ioam6.h
new file mode 100644 (file)
index 0000000..ac4de37
--- /dev/null
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ *  IPv6 IOAM implementation
+ *
+ *  Author:
+ *  Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#ifndef _UAPI_LINUX_IOAM6_H
+#define _UAPI_LINUX_IOAM6_H
+
+#include <asm/byteorder.h>
+#include <linux/types.h>
+
+#define IOAM6_U16_UNAVAILABLE U16_MAX
+#define IOAM6_U32_UNAVAILABLE U32_MAX
+#define IOAM6_U64_UNAVAILABLE U64_MAX
+
+#define IOAM6_DEFAULT_ID (IOAM6_U32_UNAVAILABLE >> 8)
+#define IOAM6_DEFAULT_ID_WIDE (IOAM6_U64_UNAVAILABLE >> 8)
+#define IOAM6_DEFAULT_IF_ID IOAM6_U16_UNAVAILABLE
+#define IOAM6_DEFAULT_IF_ID_WIDE IOAM6_U32_UNAVAILABLE
+
+/*
+ * IPv6 IOAM Option Header
+ */
+struct ioam6_hdr {
+       __u8 opt_type;
+       __u8 opt_len;
+       __u8 :8;                                /* reserved */
+#define IOAM6_TYPE_PREALLOC 0
+       __u8 type;
+} __attribute__((packed));
+
+/*
+ * IOAM Trace Header
+ */
+struct ioam6_trace_hdr {
+       __be16  namespace_id;
+
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+
+       __u8    :1,                             /* unused */
+               :1,                             /* unused */
+               overflow:1,
+               nodelen:5;
+
+       __u8    remlen:7,
+               :1;                             /* unused */
+
+       union {
+               __be32 type_be32;
+
+               struct {
+                       __u32   bit7:1,
+                               bit6:1,
+                               bit5:1,
+                               bit4:1,
+                               bit3:1,
+                               bit2:1,
+                               bit1:1,
+                               bit0:1,
+                               bit15:1,        /* unused */
+                               bit14:1,        /* unused */
+                               bit13:1,        /* unused */
+                               bit12:1,        /* unused */
+                               bit11:1,
+                               bit10:1,
+                               bit9:1,
+                               bit8:1,
+                               bit23:1,        /* reserved */
+                               bit22:1,
+                               bit21:1,        /* unused */
+                               bit20:1,        /* unused */
+                               bit19:1,        /* unused */
+                               bit18:1,        /* unused */
+                               bit17:1,        /* unused */
+                               bit16:1,        /* unused */
+                               :8;             /* reserved */
+               } type;
+       };
+
+#elif defined(__BIG_ENDIAN_BITFIELD)
+
+       __u8    nodelen:5,
+               overflow:1,
+               :1,                             /* unused */
+               :1;                             /* unused */
+
+       __u8    :1,                             /* unused */
+               remlen:7;
+
+       union {
+               __be32 type_be32;
+
+               struct {
+                       __u32   bit0:1,
+                               bit1:1,
+                               bit2:1,
+                               bit3:1,
+                               bit4:1,
+                               bit5:1,
+                               bit6:1,
+                               bit7:1,
+                               bit8:1,
+                               bit9:1,
+                               bit10:1,
+                               bit11:1,
+                               bit12:1,        /* unused */
+                               bit13:1,        /* unused */
+                               bit14:1,        /* unused */
+                               bit15:1,        /* unused */
+                               bit16:1,        /* unused */
+                               bit17:1,        /* unused */
+                               bit18:1,        /* unused */
+                               bit19:1,        /* unused */
+                               bit20:1,        /* unused */
+                               bit21:1,        /* unused */
+                               bit22:1,
+                               bit23:1,        /* reserved */
+                               :8;             /* reserved */
+               } type;
+       };
+
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+
+#define IOAM6_TRACE_DATA_SIZE_MAX 244
+       __u8    data[0];
+} __attribute__((packed));
+
+#endif /* _UAPI_LINUX_IOAM6_H */
diff --git a/include/uapi/linux/ioam6_genl.h b/include/uapi/linux/ioam6_genl.h
new file mode 100644 (file)
index 0000000..ca4b228
--- /dev/null
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ *  IPv6 IOAM Generic Netlink API
+ *
+ *  Author:
+ *  Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#ifndef _UAPI_LINUX_IOAM6_GENL_H
+#define _UAPI_LINUX_IOAM6_GENL_H
+
+#define IOAM6_GENL_NAME "IOAM6"
+#define IOAM6_GENL_VERSION 0x1
+
+enum {
+       IOAM6_ATTR_UNSPEC,
+
+       IOAM6_ATTR_NS_ID,       /* u16 */
+       IOAM6_ATTR_NS_DATA,     /* u32 */
+       IOAM6_ATTR_NS_DATA_WIDE,/* u64 */
+
+#define IOAM6_MAX_SCHEMA_DATA_LEN (255 * 4)
+       IOAM6_ATTR_SC_ID,       /* u32 */
+       IOAM6_ATTR_SC_DATA,     /* Binary */
+       IOAM6_ATTR_SC_NONE,     /* Flag */
+
+       IOAM6_ATTR_PAD,
+
+       __IOAM6_ATTR_MAX,
+};
+
+#define IOAM6_ATTR_MAX (__IOAM6_ATTR_MAX - 1)
+
+enum {
+       IOAM6_CMD_UNSPEC,
+
+       IOAM6_CMD_ADD_NAMESPACE,
+       IOAM6_CMD_DEL_NAMESPACE,
+       IOAM6_CMD_DUMP_NAMESPACES,
+
+       IOAM6_CMD_ADD_SCHEMA,
+       IOAM6_CMD_DEL_SCHEMA,
+       IOAM6_CMD_DUMP_SCHEMAS,
+
+       IOAM6_CMD_NS_SET_SCHEMA,
+
+       __IOAM6_CMD_MAX,
+};
+
+#define IOAM6_CMD_MAX (__IOAM6_CMD_MAX - 1)
+
+#endif /* _UAPI_LINUX_IOAM6_GENL_H */
diff --git a/include/uapi/linux/ioam6_iptunnel.h b/include/uapi/linux/ioam6_iptunnel.h
new file mode 100644 (file)
index 0000000..bae1463
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ *  IPv6 IOAM Lightweight Tunnel API
+ *
+ *  Author:
+ *  Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#ifndef _UAPI_LINUX_IOAM6_IPTUNNEL_H
+#define _UAPI_LINUX_IOAM6_IPTUNNEL_H
+
+enum {
+       IOAM6_IPTUNNEL_UNSPEC,
+       IOAM6_IPTUNNEL_TRACE,           /* struct ioam6_trace_hdr */
+       __IOAM6_IPTUNNEL_MAX,
+};
+
+#define IOAM6_IPTUNNEL_MAX (__IOAM6_IPTUNNEL_MAX - 1)
+
+#endif /* _UAPI_LINUX_IOAM6_IPTUNNEL_H */
index 7060377..b243a53 100644 (file)
@@ -190,6 +190,9 @@ enum {
        DEVCONF_NDISC_TCLASS,
        DEVCONF_RPL_SEG_ENABLED,
        DEVCONF_RA_DEFRTR_METRIC,
+       DEVCONF_IOAM6_ENABLED,
+       DEVCONF_IOAM6_ID,
+       DEVCONF_IOAM6_ID_WIDE,
        DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/ipx.h b/include/uapi/linux/ipx.h
deleted file mode 100644 (file)
index 3168137..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _IPX_H_
-#define _IPX_H_
-#include <linux/libc-compat.h> /* for compatibility with glibc netipx/ipx.h */
-#include <linux/types.h>
-#include <linux/sockios.h>
-#include <linux/socket.h>
-#define IPX_NODE_LEN   6
-#define IPX_MTU                576
-
-#if __UAPI_DEF_SOCKADDR_IPX
-struct sockaddr_ipx {
-       __kernel_sa_family_t sipx_family;
-       __be16          sipx_port;
-       __be32          sipx_network;
-       unsigned char   sipx_node[IPX_NODE_LEN];
-       __u8            sipx_type;
-       unsigned char   sipx_zero;      /* 16 byte fill */
-};
-#endif /* __UAPI_DEF_SOCKADDR_IPX */
-
-/*
- * So we can fit the extra info for SIOCSIFADDR into the address nicely
- */
-#define sipx_special   sipx_port
-#define sipx_action    sipx_zero
-#define IPX_DLTITF     0
-#define IPX_CRTITF     1
-
-#if __UAPI_DEF_IPX_ROUTE_DEFINITION
-struct ipx_route_definition {
-       __be32        ipx_network;
-       __be32        ipx_router_network;
-       unsigned char ipx_router_node[IPX_NODE_LEN];
-};
-#endif /* __UAPI_DEF_IPX_ROUTE_DEFINITION */
-
-#if __UAPI_DEF_IPX_INTERFACE_DEFINITION
-struct ipx_interface_definition {
-       __be32        ipx_network;
-       unsigned char ipx_device[16];
-       unsigned char ipx_dlink_type;
-#define IPX_FRAME_NONE         0
-#define IPX_FRAME_SNAP         1
-#define IPX_FRAME_8022         2
-#define IPX_FRAME_ETHERII      3
-#define IPX_FRAME_8023         4
-#define IPX_FRAME_TR_8022       5 /* obsolete */
-       unsigned char ipx_special;
-#define IPX_SPECIAL_NONE       0
-#define IPX_PRIMARY            1
-#define IPX_INTERNAL           2
-       unsigned char ipx_node[IPX_NODE_LEN];
-};
-#endif /* __UAPI_DEF_IPX_INTERFACE_DEFINITION */
-
-#if __UAPI_DEF_IPX_CONFIG_DATA
-struct ipx_config_data {
-       unsigned char   ipxcfg_auto_select_primary;
-       unsigned char   ipxcfg_auto_create_interfaces;
-};
-#endif /* __UAPI_DEF_IPX_CONFIG_DATA */
-
-/*
- * OLD Route Definition for backward compatibility.
- */
-
-#if __UAPI_DEF_IPX_ROUTE_DEF
-struct ipx_route_def {
-       __be32          ipx_network;
-       __be32          ipx_router_network;
-#define IPX_ROUTE_NO_ROUTER    0
-       unsigned char   ipx_router_node[IPX_NODE_LEN];
-       unsigned char   ipx_device[16];
-       unsigned short  ipx_flags;
-#define IPX_RT_SNAP            8
-#define IPX_RT_8022            4
-#define IPX_RT_BLUEBOOK                2
-#define IPX_RT_ROUTED          1
-};
-#endif /* __UAPI_DEF_IPX_ROUTE_DEF */
-
-#define SIOCAIPXITFCRT         (SIOCPROTOPRIVATE)
-#define SIOCAIPXPRISLT         (SIOCPROTOPRIVATE + 1)
-#define SIOCIPXCFGDATA         (SIOCPROTOPRIVATE + 2)
-#define SIOCIPXNCPCONN         (SIOCPROTOPRIVATE + 3)
-#endif /* _IPX_H_ */
index 568a430..2e20691 100644 (file)
@@ -14,6 +14,7 @@ enum lwtunnel_encap_types {
        LWTUNNEL_ENCAP_BPF,
        LWTUNNEL_ENCAP_SEG6_LOCAL,
        LWTUNNEL_ENCAP_RPL,
+       LWTUNNEL_ENCAP_IOAM6,
        __LWTUNNEL_ENCAP_MAX,
 };
 
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
new file mode 100644 (file)
index 0000000..52b54d1
--- /dev/null
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#ifndef __UAPI_MCTP_H
+#define __UAPI_MCTP_H
+
+#include <linux/types.h>
+
+typedef __u8                   mctp_eid_t;
+
+struct mctp_addr {
+       mctp_eid_t              s_addr;
+};
+
+struct sockaddr_mctp {
+       unsigned short int      smctp_family;
+       int                     smctp_network;
+       struct mctp_addr        smctp_addr;
+       __u8                    smctp_type;
+       __u8                    smctp_tag;
+};
+
+#define MCTP_NET_ANY           0x0
+
+#define MCTP_ADDR_NULL         0x00
+#define MCTP_ADDR_ANY          0xff
+
+#define MCTP_TAG_MASK          0x07
+#define MCTP_TAG_OWNER         0x08
+
+#endif /* __UAPI_MCTP_H */
index 7b05f71..f66038b 100644 (file)
@@ -73,6 +73,7 @@ enum {
 #define MPTCP_PM_ADDR_FLAG_SIGNAL                      (1 << 0)
 #define MPTCP_PM_ADDR_FLAG_SUBFLOW                     (1 << 1)
 #define MPTCP_PM_ADDR_FLAG_BACKUP                      (1 << 2)
+#define MPTCP_PM_ADDR_FLAG_FULLMESH                    (1 << 3)
 
 enum {
        MPTCP_PM_CMD_UNSPEC,
index d8484be..c6e6d7d 100644 (file)
@@ -56,6 +56,7 @@ enum ctattr_type {
        CTA_LABELS_MASK,
        CTA_SYNPROXY,
        CTA_FILTER,
+       CTA_STATUS_MASK,
        __CTA_MAX
 };
 #define CTA_MAX (__CTA_MAX - 1)
diff --git a/include/uapi/linux/nl80211-vnd-intel.h b/include/uapi/linux/nl80211-vnd-intel.h
new file mode 100644 (file)
index 0000000..0bf177b
--- /dev/null
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
+ * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
+ * Copyright (C) 2016-2017 Intel Deutschland GmbH
+ */
+#ifndef __VENDOR_CMD_INTEL_H__
+#define __VENDOR_CMD_INTEL_H__
+
+#define INTEL_OUI      0x001735
+
+/**
+ * enum iwl_mvm_vendor_cmd - supported vendor commands
+ * @IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO: reports CSME connection info.
+ * @IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP: asks for ownership on the device.
+ * @IWL_MVM_VENDOR_CMD_ROAMING_FORBIDDEN_EVENT: notifies if roaming is allowed.
+ *     It contains a &IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN and a
+ *     &IWL_MVM_VENDOR_ATTR_VIF_ADDR attributes.
+ */
+
+enum iwl_mvm_vendor_cmd {
+       IWL_MVM_VENDOR_CMD_GET_CSME_CONN_INFO                   = 0x2d,
+       IWL_MVM_VENDOR_CMD_HOST_GET_OWNERSHIP                   = 0x30,
+       IWL_MVM_VENDOR_CMD_ROAMING_FORBIDDEN_EVENT              = 0x32,
+};
+
+enum iwl_vendor_auth_akm_mode {
+       IWL_VENDOR_AUTH_OPEN,
+       IWL_VENDOR_AUTH_RSNA = 0x6,
+       IWL_VENDOR_AUTH_RSNA_PSK,
+       IWL_VENDOR_AUTH_SAE = 0x9,
+       IWL_VENDOR_AUTH_MAX,
+};
+
+/**
+ * enum iwl_mvm_vendor_attr - attributes used in vendor commands
+ * @__IWL_MVM_VENDOR_ATTR_INVALID: attribute 0 is invalid
+ * @IWL_MVM_VENDOR_ATTR_VIF_ADDR: interface MAC address
+ * @IWL_MVM_VENDOR_ATTR_ADDR: MAC address
+ * @IWL_MVM_VENDOR_ATTR_SSID: SSID (binary attribute, 0..32 octets)
+ * @IWL_MVM_VENDOR_ATTR_STA_CIPHER: the cipher to use for the station with the
+ *     mac address specified in &IWL_MVM_VENDOR_ATTR_ADDR.
+ * @IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN: u8 attribute. Indicates whether
+ *     roaming is forbidden or not. Value 1 means roaming is forbidden,
+ *     0 mean roaming is allowed.
+ * @IWL_MVM_VENDOR_ATTR_AUTH_MODE: u32 attribute. Authentication mode type
+ *     as specified in &enum iwl_vendor_auth_akm_mode.
+ * @IWL_MVM_VENDOR_ATTR_CHANNEL_NUM: u8 attribute. Contains channel number.
+ * @IWL_MVM_VENDOR_ATTR_BAND: u8 attribute.
+ *     0 for 2.4 GHz band, 1 for 5.2GHz band and 2 for 6GHz band.
+ * @IWL_MVM_VENDOR_ATTR_COLLOC_CHANNEL: u32 attribute. Channel number of
+ *     collocated AP. Relevant for 6GHz AP info.
+ * @IWL_MVM_VENDOR_ATTR_COLLOC_ADDR: MAC address of a collocated AP.
+ *     Relevant for 6GHz AP info.
+ *
+ * @NUM_IWL_MVM_VENDOR_ATTR: number of vendor attributes
+ * @MAX_IWL_MVM_VENDOR_ATTR: highest vendor attribute number
+
+ */
+enum iwl_mvm_vendor_attr {
+       __IWL_MVM_VENDOR_ATTR_INVALID                           = 0x00,
+       IWL_MVM_VENDOR_ATTR_VIF_ADDR                            = 0x02,
+       IWL_MVM_VENDOR_ATTR_ADDR                                = 0x0a,
+       IWL_MVM_VENDOR_ATTR_SSID                                = 0x3d,
+       IWL_MVM_VENDOR_ATTR_STA_CIPHER                          = 0x51,
+       IWL_MVM_VENDOR_ATTR_ROAMING_FORBIDDEN                   = 0x64,
+       IWL_MVM_VENDOR_ATTR_AUTH_MODE                           = 0x65,
+       IWL_MVM_VENDOR_ATTR_CHANNEL_NUM                         = 0x66,
+       IWL_MVM_VENDOR_ATTR_BAND                                = 0x69,
+       IWL_MVM_VENDOR_ATTR_COLLOC_CHANNEL                      = 0x70,
+       IWL_MVM_VENDOR_ATTR_COLLOC_ADDR                         = 0x71,
+
+       NUM_IWL_MVM_VENDOR_ATTR,
+       MAX_IWL_MVM_VENDOR_ATTR = NUM_IWL_MVM_VENDOR_ATTR - 1,
+};
+
+#endif /* __VENDOR_CMD_INTEL_H__ */
index db47499..c2efea9 100644 (file)
  *     passed using %NL80211_ATTR_SAR_SPEC. %NL80211_ATTR_WIPHY is used to
  *     specify the wiphy index to be applied to.
  *
+ * @NL80211_CMD_OBSS_COLOR_COLLISION: This notification is sent out whenever
+ *     mac80211/drv detects a bss color collision.
+ *
+ * @NL80211_CMD_COLOR_CHANGE_REQUEST: This command is used to indicate that
+ *     userspace wants to change the BSS color.
+ *
+ * @NL80211_CMD_COLOR_CHANGE_STARTED: Notify userland, that a color change has
+ *     started
+ *
+ * @NL80211_CMD_COLOR_CHANGE_ABORTED: Notify userland, that the color change has
+ *     been aborted
+ *
+ * @NL80211_CMD_COLOR_CHANGE_COMPLETED: Notify userland that the color change
+ *     has completed
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1417,6 +1432,14 @@ enum nl80211_commands {
 
        NL80211_CMD_SET_SAR_SPECS,
 
+       NL80211_CMD_OBSS_COLOR_COLLISION,
+
+       NL80211_CMD_COLOR_CHANGE_REQUEST,
+
+       NL80211_CMD_COLOR_CHANGE_STARTED,
+       NL80211_CMD_COLOR_CHANGE_ABORTED,
+       NL80211_CMD_COLOR_CHANGE_COMPLETED,
+
        /* add new commands above here */
 
        /* used to define NL80211_CMD_MAX below */
@@ -2560,6 +2583,16 @@ enum nl80211_commands {
  *     disassoc events to indicate that an immediate reconnect to the AP
  *     is desired.
  *
+ * @NL80211_ATTR_OBSS_COLOR_BITMAP: bitmap of the u64 BSS colors for the
+ *     %NL80211_CMD_OBSS_COLOR_COLLISION event.
+ *
+ * @NL80211_ATTR_COLOR_CHANGE_COUNT: u8 attribute specifying the number of TBTT's
+ *     until the color switch event.
+ * @NL80211_ATTR_COLOR_CHANGE_COLOR: u8 attribute specifying the color that we are
+ *     switching to
+ * @NL80211_ATTR_COLOR_CHANGE_ELEMS: Nested set of attributes containing the IE
+ *     information for the time while performing a color switch.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -3057,6 +3090,12 @@ enum nl80211_attrs {
 
        NL80211_ATTR_DISABLE_HE,
 
+       NL80211_ATTR_OBSS_COLOR_BITMAP,
+
+       NL80211_ATTR_COLOR_CHANGE_COUNT,
+       NL80211_ATTR_COLOR_CHANGE_COLOR,
+       NL80211_ATTR_COLOR_CHANGE_ELEMS,
+
        /* add attributes here, update the policy in nl80211.c */
 
        __NL80211_ATTR_AFTER_LAST,
@@ -5953,6 +5992,9 @@ enum nl80211_feature_flags {
  *      frame protection for all management frames exchanged during the
  *      negotiation and range measurement procedure.
  *
+ * @NL80211_EXT_FEATURE_BSS_COLOR: The driver supports BSS color collision
+ *     detection and change announcemnts.
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -6017,6 +6059,7 @@ enum nl80211_ext_feature_index {
        NL80211_EXT_FEATURE_SECURE_LTF,
        NL80211_EXT_FEATURE_SECURE_RTT,
        NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE,
+       NL80211_EXT_FEATURE_BSS_COLOR,
 
        /* add new features before the definition below */
        NUM_NL80211_EXT_FEATURES,
index 8d16744..150bcff 100644 (file)
@@ -70,6 +70,8 @@ enum ovs_datapath_cmd {
  * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
  * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
  * not be sent.
+ * @OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
+ * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
  * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
  * datapath.  Always present in notifications.
  * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the
@@ -87,6 +89,9 @@ enum ovs_datapath_attr {
        OVS_DP_ATTR_USER_FEATURES,      /* OVS_DP_F_*  */
        OVS_DP_ATTR_PAD,
        OVS_DP_ATTR_MASKS_CACHE_SIZE,
+       OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls in
+                                    * per-cpu dispatch mode
+                                    */
        __OVS_DP_ATTR_MAX
 };
 
@@ -127,6 +132,9 @@ struct ovs_vport_stats {
 /* Allow tc offload recirc sharing */
 #define OVS_DP_F_TC_RECIRC_SHARING     (1 << 2)
 
+/* Allow per-cpu dispatch of upcalls */
+#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU       (1 << 3)
+
 /* Fixed logical ports. */
 #define OVSP_LOCAL      ((__u32)0)
 
index 025c40f..6836ccb 100644 (file)
@@ -22,6 +22,7 @@ enum {
        __TCA_ACT_MAX
 };
 
+/* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */
 #define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for
                                         * actions stats.
                                         */
index c3409c8..eb0a9a5 100644 (file)
@@ -26,4 +26,9 @@ struct __kernel_sockaddr_storage {
        };
 };
 
+#define SOCK_SNDBUF_LOCK       1
+#define SOCK_RCVBUF_LOCK       2
+
+#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
+
 #endif /* _UAPI_LINUX_SOCKET_H */
index c525b35..af6ef2c 100644 (file)
@@ -17,6 +17,7 @@
 #define SKBMOD_F_SMAC  0x2
 #define SKBMOD_F_ETYPE 0x4
 #define SKBMOD_F_SWAPMAC 0x8
+#define SKBMOD_F_ECN   0x10
 
 struct tc_skbmod {
        tc_gen;
index ffc6a53..b96c1ea 100644 (file)
@@ -213,6 +213,11 @@ enum {
        XFRM_MSG_GETSPDINFO,
 #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO
 
+       XFRM_MSG_SETDEFAULT,
+#define XFRM_MSG_SETDEFAULT XFRM_MSG_SETDEFAULT
+       XFRM_MSG_GETDEFAULT,
+#define XFRM_MSG_GETDEFAULT XFRM_MSG_GETDEFAULT
+
        XFRM_MSG_MAPPING,
 #define XFRM_MSG_MAPPING XFRM_MSG_MAPPING
        __XFRM_MSG_MAX
@@ -508,6 +513,12 @@ struct xfrm_user_offload {
 #define XFRM_OFFLOAD_IPV6      1
 #define XFRM_OFFLOAD_INBOUND   2
 
+struct xfrm_userpolicy_default {
+#define XFRM_USERPOLICY_DIRMASK_MAX    (sizeof(__u8) * 8)
+       __u8                            dirmask;
+       __u8                            action;
+};
+
 #ifndef __KERNEL__
 /* backwards compatibility for userspace */
 #define XFRMGRP_ACQUIRE                1
index 8d97aba..daad697 100644 (file)
@@ -1226,7 +1226,7 @@ trace_initcall_start_cb(void *data, initcall_t fn)
 {
        ktime_t *calltime = (ktime_t *)data;
 
-       printk(KERN_DEBUG "calling  %pS @ %i\n", fn, task_pid_nr(current));
+       printk(KERN_DEBUG "calling  %pS @ %i irqs_disabled() %d\n", fn, task_pid_nr(current), irqs_disabled());
        *calltime = ktime_get();
 }
 
@@ -1240,8 +1240,8 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
        rettime = ktime_get();
        delta = ktime_sub(rettime, *calltime);
        duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-       printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
-                fn, ret, duration);
+       printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs, irqs_disabled() %d\n",
+                fn, ret, duration, irqs_disabled());
 }
 
 static ktime_t initcall_calltime;
index bd04f4a..a82d6de 100644 (file)
@@ -29,7 +29,7 @@ config BPF_SYSCALL
        select IRQ_WORK
        select TASKS_TRACE_RCU
        select BINARY_PRINTF
-       select NET_SOCK_MSG if INET
+       select NET_SOCK_MSG if NET
        default n
        help
          Enable the bpf() system call that allows to manipulate BPF programs
index 3c41056..cebd4fb 100644 (file)
@@ -287,6 +287,12 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key
        return 0;
 }
 
+static void check_and_free_timer_in_array(struct bpf_array *arr, void *val)
+{
+       if (unlikely(map_value_has_timer(&arr->map)))
+               bpf_timer_cancel_and_free(val + arr->map.timer_off);
+}
+
 /* Called from syscall or from eBPF program */
 static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
                                 u64 map_flags)
@@ -321,6 +327,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
                        copy_map_value_locked(map, val, value, false);
                else
                        copy_map_value(map, val, value);
+               check_and_free_timer_in_array(array, val);
        }
        return 0;
 }
@@ -374,6 +381,19 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
        return (void *)round_down((unsigned long)array, PAGE_SIZE);
 }
 
+static void array_map_free_timers(struct bpf_map *map)
+{
+       struct bpf_array *array = container_of(map, struct bpf_array, map);
+       int i;
+
+       if (likely(!map_value_has_timer(map)))
+               return;
+
+       for (i = 0; i < array->map.max_entries; i++)
+               bpf_timer_cancel_and_free(array->value + array->elem_size * i +
+                                         map->timer_off);
+}
+
 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
 static void array_map_free(struct bpf_map *map)
 {
@@ -668,6 +688,7 @@ const struct bpf_map_ops array_map_ops = {
        .map_alloc = array_map_alloc,
        .map_free = array_map_free,
        .map_get_next_key = array_map_get_next_key,
+       .map_release_uref = array_map_free_timers,
        .map_lookup_elem = array_map_lookup_elem,
        .map_update_elem = array_map_update_elem,
        .map_delete_elem = array_map_delete_elem,
index 2d4fbdb..b2ee450 100644 (file)
@@ -360,6 +360,28 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
        return supported;
 }
 
+const struct bpf_func_proto *
+bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+       const struct bpf_iter_target_info *tinfo;
+       const struct bpf_func_proto *fn = NULL;
+
+       mutex_lock(&targets_mutex);
+       list_for_each_entry(tinfo, &targets, list) {
+               if (tinfo->btf_id == prog->aux->attach_btf_id) {
+                       const struct bpf_iter_reg *reg_info;
+
+                       reg_info = tinfo->reg_info;
+                       if (reg_info->get_func_proto)
+                               fn = reg_info->get_func_proto(func_id, prog);
+                       break;
+               }
+       }
+       mutex_unlock(&targets_mutex);
+
+       return fn;
+}
+
 static void bpf_iter_link_release(struct bpf_link *link)
 {
        struct bpf_iter_link *iter_link =
@@ -664,7 +686,7 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 
        rcu_read_lock();
        migrate_disable();
-       ret = BPF_PROG_RUN(prog, ctx);
+       ret = bpf_prog_run(prog, ctx);
        migrate_enable();
        rcu_read_unlock();
 
index 70f6fd4..d6731c3 100644 (file)
@@ -28,6 +28,7 @@ struct bpf_struct_ops_value {
 
 struct bpf_struct_ops_map {
        struct bpf_map map;
+       struct rcu_head rcu;
        const struct bpf_struct_ops *st_ops;
        /* protect map_update */
        struct mutex lock;
@@ -622,6 +623,14 @@ bool bpf_struct_ops_get(const void *kdata)
        return refcount_inc_not_zero(&kvalue->refcnt);
 }
 
+static void bpf_struct_ops_put_rcu(struct rcu_head *head)
+{
+       struct bpf_struct_ops_map *st_map;
+
+       st_map = container_of(head, struct bpf_struct_ops_map, rcu);
+       bpf_map_put(&st_map->map);
+}
+
 void bpf_struct_ops_put(const void *kdata)
 {
        struct bpf_struct_ops_value *kvalue;
@@ -632,6 +641,17 @@ void bpf_struct_ops_put(const void *kdata)
 
                st_map = container_of(kvalue, struct bpf_struct_ops_map,
                                      kvalue);
-               bpf_map_put(&st_map->map);
+               /* The struct_ops's function may switch to another struct_ops.
+                *
+                * For example, bpf_tcp_cc_x->init() may switch to
+                * another tcp_cc_y by calling
+                * setsockopt(TCP_CONGESTION, "tcp_cc_y").
+                * During the switch,  bpf_struct_ops_put(tcp_cc_x) is called
+                * and its map->refcnt may reach 0 which then free its
+                * trampoline image while tcp_cc_x is still running.
+                *
+                * Thus, a rcu grace period is needed here.
+                */
+               call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu);
        }
 }
index 3ce7575..ebfa8bc 100644 (file)
@@ -317,15 +317,13 @@ const struct bpf_map_ops task_storage_map_ops = {
        .map_owner_storage_ptr = task_storage_ptr,
 };
 
-BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct)
-
 const struct bpf_func_proto bpf_task_storage_get_proto = {
        .func = bpf_task_storage_get,
        .gpl_only = false,
        .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
        .arg1_type = ARG_CONST_MAP_PTR,
        .arg2_type = ARG_PTR_TO_BTF_ID,
-       .arg2_btf_id = &bpf_task_storage_btf_ids[0],
+       .arg2_btf_id = &btf_task_struct_ids[0],
        .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
        .arg4_type = ARG_ANYTHING,
 };
@@ -336,5 +334,5 @@ const struct bpf_func_proto bpf_task_storage_delete_proto = {
        .ret_type = RET_INTEGER,
        .arg1_type = ARG_CONST_MAP_PTR,
        .arg2_type = ARG_PTR_TO_BTF_ID,
-       .arg2_btf_id = &bpf_task_storage_btf_ids[0],
+       .arg2_btf_id = &btf_task_struct_ids[0],
 };
index cb4b729..dfe61df 100644 (file)
@@ -3046,43 +3046,92 @@ static void btf_struct_log(struct btf_verifier_env *env,
        btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
 }
 
-/* find 'struct bpf_spin_lock' in map value.
- * return >= 0 offset if found
- * and < 0 in case of error
- */
-int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
+static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t,
+                                const char *name, int sz, int align)
 {
        const struct btf_member *member;
        u32 i, off = -ENOENT;
 
-       if (!__btf_type_is_struct(t))
-               return -EINVAL;
-
        for_each_member(i, t, member) {
                const struct btf_type *member_type = btf_type_by_id(btf,
                                                                    member->type);
                if (!__btf_type_is_struct(member_type))
                        continue;
-               if (member_type->size != sizeof(struct bpf_spin_lock))
+               if (member_type->size != sz)
                        continue;
-               if (strcmp(__btf_name_by_offset(btf, member_type->name_off),
-                          "bpf_spin_lock"))
+               if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
                        continue;
                if (off != -ENOENT)
-                       /* only one 'struct bpf_spin_lock' is allowed */
+                       /* only one such field is allowed */
                        return -E2BIG;
                off = btf_member_bit_offset(t, member);
                if (off % 8)
                        /* valid C code cannot generate such BTF */
                        return -EINVAL;
                off /= 8;
-               if (off % __alignof__(struct bpf_spin_lock))
-                       /* valid struct bpf_spin_lock will be 4 byte aligned */
+               if (off % align)
+                       return -EINVAL;
+       }
+       return off;
+}
+
+static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
+                               const char *name, int sz, int align)
+{
+       const struct btf_var_secinfo *vsi;
+       u32 i, off = -ENOENT;
+
+       for_each_vsi(i, t, vsi) {
+               const struct btf_type *var = btf_type_by_id(btf, vsi->type);
+               const struct btf_type *var_type = btf_type_by_id(btf, var->type);
+
+               if (!__btf_type_is_struct(var_type))
+                       continue;
+               if (var_type->size != sz)
+                       continue;
+               if (vsi->size != sz)
+                       continue;
+               if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
+                       continue;
+               if (off != -ENOENT)
+                       /* only one such field is allowed */
+                       return -E2BIG;
+               off = vsi->offset;
+               if (off % align)
                        return -EINVAL;
        }
        return off;
 }
 
+static int btf_find_field(const struct btf *btf, const struct btf_type *t,
+                         const char *name, int sz, int align)
+{
+
+       if (__btf_type_is_struct(t))
+               return btf_find_struct_field(btf, t, name, sz, align);
+       else if (btf_type_is_datasec(t))
+               return btf_find_datasec_var(btf, t, name, sz, align);
+       return -EINVAL;
+}
+
+/* find 'struct bpf_spin_lock' in map value.
+ * return >= 0 offset if found
+ * and < 0 in case of error
+ */
+int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
+{
+       return btf_find_field(btf, t, "bpf_spin_lock",
+                             sizeof(struct bpf_spin_lock),
+                             __alignof__(struct bpf_spin_lock));
+}
+
+int btf_find_timer(const struct btf *btf, const struct btf_type *t)
+{
+       return btf_find_field(btf, t, "bpf_timer",
+                             sizeof(struct bpf_timer),
+                             __alignof__(struct bpf_timer));
+}
+
 static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
                              u32 type_id, void *data, u8 bits_offset,
                              struct btf_show *show)
@@ -4776,6 +4825,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
 
                if (ctx_arg_info->offset == off) {
+                       if (!ctx_arg_info->btf_id) {
+                               bpf_log(log,"invalid btf_id for context argument offset %u\n", off);
+                               return false;
+                       }
+
                        info->reg_type = ctx_arg_info->reg_type;
                        info->btf = btf_vmlinux;
                        info->btf_id = ctx_arg_info->btf_id;
@@ -6159,3 +6213,5 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
        .arg3_type      = ARG_ANYTHING,
        .arg4_type      = ARG_ANYTHING,
 };
+
+BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct)
index b567ca4..03145d4 100644 (file)
@@ -19,7 +19,7 @@
 
 #include "../cgroup/cgroup-internal.h"
 
-DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_BPF_ATTACH_TYPE);
+DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
 EXPORT_SYMBOL(cgroup_bpf_enabled_key);
 
 void cgroup_bpf_offline(struct cgroup *cgrp)
@@ -113,12 +113,12 @@ static void cgroup_bpf_release(struct work_struct *work)
        struct list_head *storages = &cgrp->bpf.storages;
        struct bpf_cgroup_storage *storage, *stmp;
 
-       unsigned int type;
+       unsigned int atype;
 
        mutex_lock(&cgroup_mutex);
 
-       for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
-               struct list_head *progs = &cgrp->bpf.progs[type];
+       for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
+               struct list_head *progs = &cgrp->bpf.progs[atype];
                struct bpf_prog_list *pl, *pltmp;
 
                list_for_each_entry_safe(pl, pltmp, progs, node) {
@@ -128,10 +128,10 @@ static void cgroup_bpf_release(struct work_struct *work)
                        if (pl->link)
                                bpf_cgroup_link_auto_detach(pl->link);
                        kfree(pl);
-                       static_branch_dec(&cgroup_bpf_enabled_key[type]);
+                       static_branch_dec(&cgroup_bpf_enabled_key[atype]);
                }
                old_array = rcu_dereference_protected(
-                               cgrp->bpf.effective[type],
+                               cgrp->bpf.effective[atype],
                                lockdep_is_held(&cgroup_mutex));
                bpf_prog_array_free(old_array);
        }
@@ -196,7 +196,7 @@ static u32 prog_list_length(struct list_head *head)
  * if parent has overridable or multi-prog, allow attaching
  */
 static bool hierarchy_allows_attach(struct cgroup *cgrp,
-                                   enum bpf_attach_type type)
+                                   enum cgroup_bpf_attach_type atype)
 {
        struct cgroup *p;
 
@@ -204,12 +204,12 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
        if (!p)
                return true;
        do {
-               u32 flags = p->bpf.flags[type];
+               u32 flags = p->bpf.flags[atype];
                u32 cnt;
 
                if (flags & BPF_F_ALLOW_MULTI)
                        return true;
-               cnt = prog_list_length(&p->bpf.progs[type]);
+               cnt = prog_list_length(&p->bpf.progs[atype]);
                WARN_ON_ONCE(cnt > 1);
                if (cnt == 1)
                        return !!(flags & BPF_F_ALLOW_OVERRIDE);
@@ -225,7 +225,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
  * to programs in this cgroup
  */
 static int compute_effective_progs(struct cgroup *cgrp,
-                                  enum bpf_attach_type type,
+                                  enum cgroup_bpf_attach_type atype,
                                   struct bpf_prog_array **array)
 {
        struct bpf_prog_array_item *item;
@@ -236,8 +236,8 @@ static int compute_effective_progs(struct cgroup *cgrp,
 
        /* count number of effective programs by walking parents */
        do {
-               if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
-                       cnt += prog_list_length(&p->bpf.progs[type]);
+               if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+                       cnt += prog_list_length(&p->bpf.progs[atype]);
                p = cgroup_parent(p);
        } while (p);
 
@@ -249,10 +249,10 @@ static int compute_effective_progs(struct cgroup *cgrp,
        cnt = 0;
        p = cgrp;
        do {
-               if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+               if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
                        continue;
 
-               list_for_each_entry(pl, &p->bpf.progs[type], node) {
+               list_for_each_entry(pl, &p->bpf.progs[atype], node) {
                        if (!prog_list_prog(pl))
                                continue;
 
@@ -269,10 +269,10 @@ static int compute_effective_progs(struct cgroup *cgrp,
 }
 
 static void activate_effective_progs(struct cgroup *cgrp,
-                                    enum bpf_attach_type type,
+                                    enum cgroup_bpf_attach_type atype,
                                     struct bpf_prog_array *old_array)
 {
-       old_array = rcu_replace_pointer(cgrp->bpf.effective[type], old_array,
+       old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array,
                                        lockdep_is_held(&cgroup_mutex));
        /* free prog array after grace period, since __cgroup_bpf_run_*()
         * might be still walking the array
@@ -328,7 +328,7 @@ cleanup:
 }
 
 static int update_effective_progs(struct cgroup *cgrp,
-                                 enum bpf_attach_type type)
+                                 enum cgroup_bpf_attach_type atype)
 {
        struct cgroup_subsys_state *css;
        int err;
@@ -340,7 +340,7 @@ static int update_effective_progs(struct cgroup *cgrp,
                if (percpu_ref_is_zero(&desc->bpf.refcnt))
                        continue;
 
-               err = compute_effective_progs(desc, type, &desc->bpf.inactive);
+               err = compute_effective_progs(desc, atype, &desc->bpf.inactive);
                if (err)
                        goto cleanup;
        }
@@ -357,7 +357,7 @@ static int update_effective_progs(struct cgroup *cgrp,
                        continue;
                }
 
-               activate_effective_progs(desc, type, desc->bpf.inactive);
+               activate_effective_progs(desc, atype, desc->bpf.inactive);
                desc->bpf.inactive = NULL;
        }
 
@@ -436,11 +436,12 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
                        enum bpf_attach_type type, u32 flags)
 {
        u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
-       struct list_head *progs = &cgrp->bpf.progs[type];
        struct bpf_prog *old_prog = NULL;
        struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
        struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+       enum cgroup_bpf_attach_type atype;
        struct bpf_prog_list *pl;
+       struct list_head *progs;
        int err;
 
        if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
@@ -454,10 +455,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
                /* replace_prog implies BPF_F_REPLACE, and vice versa */
                return -EINVAL;
 
-       if (!hierarchy_allows_attach(cgrp, type))
+       atype = to_cgroup_bpf_attach_type(type);
+       if (atype < 0)
+               return -EINVAL;
+
+       progs = &cgrp->bpf.progs[atype];
+
+       if (!hierarchy_allows_attach(cgrp, atype))
                return -EPERM;
 
-       if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags)
+       if (!list_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
                /* Disallow attaching non-overridable on top
                 * of existing overridable in this cgroup.
                 * Disallow attaching multi-prog if overridable or none
@@ -490,16 +497,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
        pl->prog = prog;
        pl->link = link;
        bpf_cgroup_storages_assign(pl->storage, storage);
-       cgrp->bpf.flags[type] = saved_flags;
+       cgrp->bpf.flags[atype] = saved_flags;
 
-       err = update_effective_progs(cgrp, type);
+       err = update_effective_progs(cgrp, atype);
        if (err)
                goto cleanup;
 
        if (old_prog)
                bpf_prog_put(old_prog);
        else
-               static_branch_inc(&cgroup_bpf_enabled_key[type]);
+               static_branch_inc(&cgroup_bpf_enabled_key[atype]);
        bpf_cgroup_storages_link(new_storage, cgrp, type);
        return 0;
 
@@ -520,7 +527,7 @@ cleanup:
  * all descendant cgroups. This function is guaranteed to succeed.
  */
 static void replace_effective_prog(struct cgroup *cgrp,
-                                  enum bpf_attach_type type,
+                                  enum cgroup_bpf_attach_type atype,
                                   struct bpf_cgroup_link *link)
 {
        struct bpf_prog_array_item *item;
@@ -539,10 +546,10 @@ static void replace_effective_prog(struct cgroup *cgrp,
 
                /* find position of link in effective progs array */
                for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
-                       if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+                       if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
                                continue;
 
-                       head = &cg->bpf.progs[type];
+                       head = &cg->bpf.progs[atype];
                        list_for_each_entry(pl, head, node) {
                                if (!prog_list_prog(pl))
                                        continue;
@@ -554,7 +561,7 @@ static void replace_effective_prog(struct cgroup *cgrp,
 found:
                BUG_ON(!cg);
                progs = rcu_dereference_protected(
-                               desc->bpf.effective[type],
+                               desc->bpf.effective[atype],
                                lockdep_is_held(&cgroup_mutex));
                item = &progs->items[pos];
                WRITE_ONCE(item->prog, link->link.prog);
@@ -574,11 +581,18 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
                                struct bpf_cgroup_link *link,
                                struct bpf_prog *new_prog)
 {
-       struct list_head *progs = &cgrp->bpf.progs[link->type];
+       enum cgroup_bpf_attach_type atype;
        struct bpf_prog *old_prog;
        struct bpf_prog_list *pl;
+       struct list_head *progs;
        bool found = false;
 
+       atype = to_cgroup_bpf_attach_type(link->type);
+       if (atype < 0)
+               return -EINVAL;
+
+       progs = &cgrp->bpf.progs[atype];
+
        if (link->link.prog->type != new_prog->type)
                return -EINVAL;
 
@@ -592,7 +606,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
                return -ENOENT;
 
        old_prog = xchg(&link->link.prog, new_prog);
-       replace_effective_prog(cgrp, link->type, link);
+       replace_effective_prog(cgrp, atype, link);
        bpf_prog_put(old_prog);
        return 0;
 }
@@ -667,12 +681,20 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
                        struct bpf_cgroup_link *link, enum bpf_attach_type type)
 {
-       struct list_head *progs = &cgrp->bpf.progs[type];
-       u32 flags = cgrp->bpf.flags[type];
-       struct bpf_prog_list *pl;
+       enum cgroup_bpf_attach_type atype;
        struct bpf_prog *old_prog;
+       struct bpf_prog_list *pl;
+       struct list_head *progs;
+       u32 flags;
        int err;
 
+       atype = to_cgroup_bpf_attach_type(type);
+       if (atype < 0)
+               return -EINVAL;
+
+       progs = &cgrp->bpf.progs[atype];
+       flags = cgrp->bpf.flags[atype];
+
        if (prog && link)
                /* only one of prog or link can be specified */
                return -EINVAL;
@@ -686,7 +708,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
        pl->prog = NULL;
        pl->link = NULL;
 
-       err = update_effective_progs(cgrp, type);
+       err = update_effective_progs(cgrp, atype);
        if (err)
                goto cleanup;
 
@@ -695,10 +717,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
        kfree(pl);
        if (list_empty(progs))
                /* last program was detached, reset flags to zero */
-               cgrp->bpf.flags[type] = 0;
+               cgrp->bpf.flags[atype] = 0;
        if (old_prog)
                bpf_prog_put(old_prog);
-       static_branch_dec(&cgroup_bpf_enabled_key[type]);
+       static_branch_dec(&cgroup_bpf_enabled_key[atype]);
        return 0;
 
 cleanup:
@@ -714,13 +736,21 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 {
        __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
        enum bpf_attach_type type = attr->query.attach_type;
-       struct list_head *progs = &cgrp->bpf.progs[type];
-       u32 flags = cgrp->bpf.flags[type];
+       enum cgroup_bpf_attach_type atype;
        struct bpf_prog_array *effective;
+       struct list_head *progs;
        struct bpf_prog *prog;
        int cnt, ret = 0, i;
+       u32 flags;
 
-       effective = rcu_dereference_protected(cgrp->bpf.effective[type],
+       atype = to_cgroup_bpf_attach_type(type);
+       if (atype < 0)
+               return -EINVAL;
+
+       progs = &cgrp->bpf.progs[atype];
+       flags = cgrp->bpf.flags[atype];
+
+       effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
                                              lockdep_is_held(&cgroup_mutex));
 
        if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
@@ -925,14 +955,14 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
        link->cgroup = cgrp;
        link->type = attr->link_create.attach_type;
 
-       err  = bpf_link_prime(&link->link, &link_primer);
+       err = bpf_link_prime(&link->link, &link_primer);
        if (err) {
                kfree(link);
                goto out_put_cgroup;
        }
 
-       err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type,
-                               BPF_F_ALLOW_MULTI);
+       err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
+                               link->type, BPF_F_ALLOW_MULTI);
        if (err) {
                bpf_link_cleanup(&link_primer);
                goto out_put_cgroup;
@@ -986,7 +1016,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
  */
 int __cgroup_bpf_run_filter_skb(struct sock *sk,
                                struct sk_buff *skb,
-                               enum bpf_attach_type type)
+                               enum cgroup_bpf_attach_type atype)
 {
        unsigned int offset = skb->data - skb_network_header(skb);
        struct sock *save_sk;
@@ -1008,12 +1038,12 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
        /* compute pointers for the bpf prog */
        bpf_compute_and_save_data_end(skb, &saved_data_end);
 
-       if (type == BPF_CGROUP_INET_EGRESS) {
+       if (atype == CGROUP_INET_EGRESS) {
                ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
-                       cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
+                       cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
        } else {
-               ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
-                                         __bpf_prog_run_save_cb);
+               ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
+                                           __bpf_prog_run_save_cb);
                ret = (ret == 1 ? 0 : -EPERM);
        }
        bpf_restore_data_end(skb, saved_data_end);
@@ -1038,12 +1068,12 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
  * and if it returned != 1 during execution. In all other cases, 0 is returned.
  */
 int __cgroup_bpf_run_filter_sk(struct sock *sk,
-                              enum bpf_attach_type type)
+                              enum cgroup_bpf_attach_type atype)
 {
        struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
        int ret;
 
-       ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run);
        return ret == 1 ? 0 : -EPERM;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
@@ -1065,7 +1095,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
  */
 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
                                      struct sockaddr *uaddr,
-                                     enum bpf_attach_type type,
+                                     enum cgroup_bpf_attach_type atype,
                                      void *t_ctx,
                                      u32 *flags)
 {
@@ -1090,8 +1120,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
        }
 
        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       ret = BPF_PROG_RUN_ARRAY_FLAGS(cgrp->bpf.effective[type], &ctx,
-                                      BPF_PROG_RUN, flags);
+       ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
+                                         bpf_prog_run, flags);
 
        return ret == 1 ? 0 : -EPERM;
 }
@@ -1115,19 +1145,19 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
  */
 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
                                     struct bpf_sock_ops_kern *sock_ops,
-                                    enum bpf_attach_type type)
+                                    enum cgroup_bpf_attach_type atype)
 {
        struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
        int ret;
 
-       ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
-                                BPF_PROG_RUN);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
+                                   bpf_prog_run);
        return ret == 1 ? 0 : -EPERM;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
 
 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
-                                     short access, enum bpf_attach_type type)
+                                     short access, enum cgroup_bpf_attach_type atype)
 {
        struct cgroup *cgrp;
        struct bpf_cgroup_dev_ctx ctx = {
@@ -1135,12 +1165,12 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
                .major = major,
                .minor = minor,
        };
-       int allow = 1;
+       int allow;
 
        rcu_read_lock();
        cgrp = task_dfl_cgroup(current);
-       allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
-                                  BPF_PROG_RUN);
+       allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+                                     bpf_prog_run);
        rcu_read_unlock();
 
        return !allow;
@@ -1231,7 +1261,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
                                   struct ctl_table *table, int write,
                                   char **buf, size_t *pcount, loff_t *ppos,
-                                  enum bpf_attach_type type)
+                                  enum cgroup_bpf_attach_type atype)
 {
        struct bpf_sysctl_kern ctx = {
                .head = head,
@@ -1271,7 +1301,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 
        rcu_read_lock();
        cgrp = task_dfl_cgroup(current);
-       ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
        rcu_read_unlock();
 
        kfree(ctx.cur_val);
@@ -1289,7 +1319,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 
 #ifdef CONFIG_NET
 static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
-                                            enum bpf_attach_type attach_type)
+                                            enum cgroup_bpf_attach_type attach_type)
 {
        struct bpf_prog_array *prog_array;
        bool empty;
@@ -1364,7 +1394,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
         * attached to the hook so we don't waste time allocating
         * memory and locking the socket.
         */
-       if (__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
+       if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_SETSOCKOPT))
                return 0;
 
        /* Allocate a bit more than the initial user buffer for
@@ -1385,8 +1415,8 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
        }
 
        lock_sock(sk);
-       ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
-                                &ctx, BPF_PROG_RUN);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
+                                   &ctx, bpf_prog_run);
        release_sock(sk);
 
        if (!ret) {
@@ -1460,7 +1490,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
         * attached to the hook so we don't waste time allocating
         * memory and locking the socket.
         */
-       if (__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
+       if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT))
                return retval;
 
        ctx.optlen = max_optlen;
@@ -1495,8 +1525,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
        }
 
        lock_sock(sk);
-       ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
-                                &ctx, BPF_PROG_RUN);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+                                   &ctx, bpf_prog_run);
        release_sock(sk);
 
        if (!ret) {
@@ -1556,8 +1586,8 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
         * be called if that data shouldn't be "exported".
         */
 
-       ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
-                                &ctx, BPF_PROG_RUN);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+                                   &ctx, bpf_prog_run);
        if (!ret)
                return -EPERM;
 
@@ -1846,15 +1876,41 @@ const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
 const struct bpf_prog_ops cg_sysctl_prog_ops = {
 };
 
+#ifdef CONFIG_NET
+BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx)
+{
+       const struct net *net = ctx ? sock_net(ctx->sk) : &init_net;
+
+       return net->net_cookie;
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = {
+       .func           = bpf_get_netns_cookie_sockopt,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX_OR_NULL,
+};
+#endif
+
 static const struct bpf_func_proto *
 cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
        switch (func_id) {
 #ifdef CONFIG_NET
+       case BPF_FUNC_get_netns_cookie:
+               return &bpf_get_netns_cookie_sockopt_proto;
        case BPF_FUNC_sk_storage_get:
                return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
                return &bpf_sk_storage_delete_proto;
+       case BPF_FUNC_setsockopt:
+               if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
+                       return &bpf_sk_setsockopt_proto;
+               return NULL;
+       case BPF_FUNC_getsockopt:
+               if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
+                       return &bpf_sk_getsockopt_proto;
+               return NULL;
 #endif
 #ifdef CONFIG_INET
        case BPF_FUNC_tcp_sock:
index 0a28a80..9f4636d 100644 (file)
@@ -1879,7 +1879,7 @@ static void bpf_prog_select_func(struct bpf_prog *fp)
  *     @err: pointer to error variable
  *
  * Try to JIT eBPF program, if JIT is not available, use interpreter.
- * The BPF program will be executed via BPF_PROG_RUN() macro.
+ * The BPF program will be executed via bpf_prog_run() function.
  *
  * Return: the &fp argument along with &err set to 0 for success or
  * a negative errno code on failure
@@ -2119,13 +2119,13 @@ int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
 int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                        struct bpf_prog *exclude_prog,
                        struct bpf_prog *include_prog,
+                       u64 bpf_cookie,
                        struct bpf_prog_array **new_array)
 {
        int new_prog_cnt, carry_prog_cnt = 0;
-       struct bpf_prog_array_item *existing;
+       struct bpf_prog_array_item *existing, *new;
        struct bpf_prog_array *array;
        bool found_exclude = false;
-       int new_prog_idx = 0;
 
        /* Figure out how many existing progs we need to carry over to
         * the new array.
@@ -2162,20 +2162,27 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
        array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
        if (!array)
                return -ENOMEM;
+       new = array->items;
 
        /* Fill in the new prog array */
        if (carry_prog_cnt) {
                existing = old_array->items;
-               for (; existing->prog; existing++)
-                       if (existing->prog != exclude_prog &&
-                           existing->prog != &dummy_bpf_prog.prog) {
-                               array->items[new_prog_idx++].prog =
-                                       existing->prog;
-                       }
+               for (; existing->prog; existing++) {
+                       if (existing->prog == exclude_prog ||
+                           existing->prog == &dummy_bpf_prog.prog)
+                               continue;
+
+                       new->prog = existing->prog;
+                       new->bpf_cookie = existing->bpf_cookie;
+                       new++;
+               }
        }
-       if (include_prog)
-               array->items[new_prog_idx++].prog = include_prog;
-       array->items[new_prog_idx].prog = NULL;
+       if (include_prog) {
+               new->prog = include_prog;
+               new->bpf_cookie = bpf_cookie;
+               new++;
+       }
+       new->prog = NULL;
        *new_array = array;
        return 0;
 }
index 480e936..585b2b7 100644 (file)
@@ -16,6 +16,7 @@
  * netstack, and assigning dedicated CPUs for this stage.  This
  * basically allows for 10G wirespeed pre-filtering via bpf.
  */
+#include <linux/bitops.h>
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/ptr_ring.h>
@@ -168,6 +169,46 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
        }
 }
 
+static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
+                                    struct list_head *listp,
+                                    struct xdp_cpumap_stats *stats)
+{
+       struct sk_buff *skb, *tmp;
+       struct xdp_buff xdp;
+       u32 act;
+       int err;
+
+       list_for_each_entry_safe(skb, tmp, listp, list) {
+               act = bpf_prog_run_generic_xdp(skb, &xdp, rcpu->prog);
+               switch (act) {
+               case XDP_PASS:
+                       break;
+               case XDP_REDIRECT:
+                       skb_list_del_init(skb);
+                       err = xdp_do_generic_redirect(skb->dev, skb, &xdp,
+                                                     rcpu->prog);
+                       if (unlikely(err)) {
+                               kfree_skb(skb);
+                               stats->drop++;
+                       } else {
+                               stats->redirect++;
+                       }
+                       return;
+               default:
+                       bpf_warn_invalid_xdp_action(act);
+                       fallthrough;
+               case XDP_ABORTED:
+                       trace_xdp_exception(skb->dev, rcpu->prog, act);
+                       fallthrough;
+               case XDP_DROP:
+                       skb_list_del_init(skb);
+                       kfree_skb(skb);
+                       stats->drop++;
+                       return;
+               }
+       }
+}
+
 static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
                                    void **frames, int n,
                                    struct xdp_cpumap_stats *stats)
@@ -176,11 +217,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
        struct xdp_buff xdp;
        int i, nframes = 0;
 
-       if (!rcpu->prog)
-               return n;
-
-       rcu_read_lock_bh();
-
        xdp_set_return_frame_no_direct();
        xdp.rxq = &rxq;
 
@@ -227,17 +263,37 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
                }
        }
 
+       xdp_clear_return_frame_no_direct();
+
+       return nframes;
+}
+
+#define CPUMAP_BATCH 8
+
+static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
+                               int xdp_n, struct xdp_cpumap_stats *stats,
+                               struct list_head *list)
+{
+       int nframes;
+
+       if (!rcpu->prog)
+               return xdp_n;
+
+       rcu_read_lock_bh();
+
+       nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats);
+
        if (stats->redirect)
-               xdp_do_flush_map();
+               xdp_do_flush();
 
-       xdp_clear_return_frame_no_direct();
+       if (unlikely(!list_empty(list)))
+               cpu_map_bpf_prog_run_skb(rcpu, list, stats);
 
        rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
 
        return nframes;
 }
 
-#define CPUMAP_BATCH 8
 
 static int cpu_map_kthread_run(void *data)
 {
@@ -254,9 +310,9 @@ static int cpu_map_kthread_run(void *data)
                struct xdp_cpumap_stats stats = {}; /* zero stats */
                unsigned int kmem_alloc_drops = 0, sched = 0;
                gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
+               int i, n, m, nframes, xdp_n;
                void *frames[CPUMAP_BATCH];
                void *skbs[CPUMAP_BATCH];
-               int i, n, m, nframes;
                LIST_HEAD(list);
 
                /* Release CPU reschedule checks */
@@ -280,9 +336,20 @@ static int cpu_map_kthread_run(void *data)
                 */
                n = __ptr_ring_consume_batched(rcpu->queue, frames,
                                               CPUMAP_BATCH);
-               for (i = 0; i < n; i++) {
+               for (i = 0, xdp_n = 0; i < n; i++) {
                        void *f = frames[i];
-                       struct page *page = virt_to_page(f);
+                       struct page *page;
+
+                       if (unlikely(__ptr_test_bit(0, &f))) {
+                               struct sk_buff *skb = f;
+
+                               __ptr_clear_bit(0, &skb);
+                               list_add_tail(&skb->list, &list);
+                               continue;
+                       }
+
+                       frames[xdp_n++] = f;
+                       page = virt_to_page(f);
 
                        /* Bring struct page memory area to curr CPU. Read by
                         * build_skb_around via page_is_pfmemalloc(), and when
@@ -292,7 +359,7 @@ static int cpu_map_kthread_run(void *data)
                }
 
                /* Support running another XDP prog on this CPU */
-               nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats);
+               nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list);
                if (nframes) {
                        m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
                        if (unlikely(m == 0)) {
@@ -330,12 +397,6 @@ static int cpu_map_kthread_run(void *data)
        return 0;
 }
 
-bool cpu_map_prog_allowed(struct bpf_map *map)
-{
-       return map->map_type == BPF_MAP_TYPE_CPUMAP &&
-              map->value_size != offsetofend(struct bpf_cpumap_val, qsize);
-}
-
 static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
 {
        struct bpf_prog *prog;
@@ -701,6 +762,25 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
        return 0;
 }
 
+int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
+                            struct sk_buff *skb)
+{
+       int ret;
+
+       __skb_pull(skb, skb->mac_len);
+       skb_set_redirected(skb, false);
+       __ptr_set_bit(0, &skb);
+
+       ret = ptr_ring_produce(rcpu->queue, skb);
+       if (ret < 0)
+               goto trace;
+
+       wake_up_process(rcpu->kthread);
+trace:
+       trace_xdp_cpumap_enqueue(rcpu->map_id, !ret, !!ret, rcpu->cpu);
+       return ret;
+}
+
 void __cpu_map_flush(void)
 {
        struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
index fdc2089..f02d045 100644 (file)
@@ -322,16 +322,6 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
        return -ENOENT;
 }
 
-bool dev_map_can_have_prog(struct bpf_map *map)
-{
-       if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
-            map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
-           map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
-               return true;
-
-       return false;
-}
-
 static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
                                struct xdp_frame **frames, int n,
                                struct net_device *dev)
@@ -499,6 +489,37 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
        return 0;
 }
 
+static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev *dst)
+{
+       struct xdp_txq_info txq = { .dev = dst->dev };
+       struct xdp_buff xdp;
+       u32 act;
+
+       if (!dst->xdp_prog)
+               return XDP_PASS;
+
+       __skb_pull(skb, skb->mac_len);
+       xdp.txq = &txq;
+
+       act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog);
+       switch (act) {
+       case XDP_PASS:
+               __skb_push(skb, skb->mac_len);
+               break;
+       default:
+               bpf_warn_invalid_xdp_action(act);
+               fallthrough;
+       case XDP_ABORTED:
+               trace_xdp_exception(dst->dev, dst->xdp_prog, act);
+               fallthrough;
+       case XDP_DROP:
+               kfree_skb(skb);
+               break;
+       }
+
+       return act;
+}
+
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
                    struct net_device *dev_rx)
 {
@@ -513,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
        return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
 }
 
-static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
-                        int exclude_ifindex)
+static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
 {
-       if (!obj || obj->dev->ifindex == exclude_ifindex ||
+       if (!obj ||
            !obj->dev->netdev_ops->ndo_xdp_xmit)
                return false;
 
@@ -541,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
        return 0;
 }
 
+static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex)
+{
+       while (num_excluded--) {
+               if (ifindex == excluded[num_excluded])
+                       return true;
+       }
+       return false;
+}
+
+/* Get ifindex of each upper device. 'indexes' must be able to hold at
+ * least MAX_NEST_DEV elements.
+ * Returns the number of ifindexes added.
+ */
+static int get_upper_ifindexes(struct net_device *dev, int *indexes)
+{
+       struct net_device *upper;
+       struct list_head *iter;
+       int n = 0;
+
+       netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+               indexes[n++] = upper->ifindex;
+       }
+       return n;
+}
+
 int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
                          struct bpf_map *map, bool exclude_ingress)
 {
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-       int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
        struct bpf_dtab_netdev *dst, *last_dst = NULL;
+       int excluded_devices[1+MAX_NEST_DEV];
        struct hlist_head *head;
        struct xdp_frame *xdpf;
+       int num_excluded = 0;
        unsigned int i;
        int err;
 
+       if (exclude_ingress) {
+               num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
+               excluded_devices[num_excluded++] = dev_rx->ifindex;
+       }
+
        xdpf = xdp_convert_buff_to_frame(xdp);
        if (unlikely(!xdpf))
                return -EOVERFLOW;
@@ -560,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
                for (i = 0; i < map->max_entries; i++) {
                        dst = rcu_dereference_check(dtab->netdev_map[i],
                                                    rcu_read_lock_bh_held());
-                       if (!is_valid_dst(dst, xdp, exclude_ifindex))
+                       if (!is_valid_dst(dst, xdp))
+                               continue;
+
+                       if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
                                continue;
 
                        /* we only need n-1 clones; last_dst enqueued below */
@@ -580,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
                        head = dev_map_index_hash(dtab, i);
                        hlist_for_each_entry_rcu(dst, head, index_hlist,
                                                 lockdep_is_held(&dtab->index_lock)) {
-                               if (!is_valid_dst(dst, xdp, exclude_ifindex))
+                               if (!is_valid_dst(dst, xdp))
+                                       continue;
+
+                               if (is_ifindex_excluded(excluded_devices, num_excluded,
+                                                       dst->dev->ifindex))
                                        continue;
 
                                /* we only need n-1 clones; last_dst enqueued below */
@@ -615,6 +673,14 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
        err = xdp_ok_fwd_dev(dst->dev, skb->len);
        if (unlikely(err))
                return err;
+
+       /* Redirect has already succeeded semantically at this point, so we just
+        * return 0 even if packet is dropped. Helper below takes care of
+        * freeing skb.
+        */
+       if (dev_map_bpf_prog_run_skb(skb, dst) != XDP_PASS)
+               return 0;
+
        skb->dev = dst->dev;
        generic_xdp_tx(skb, xdp_prog);
 
@@ -646,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
                           bool exclude_ingress)
 {
        struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-       int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
        struct bpf_dtab_netdev *dst, *last_dst = NULL;
+       int excluded_devices[1+MAX_NEST_DEV];
        struct hlist_head *head;
        struct hlist_node *next;
+       int num_excluded = 0;
        unsigned int i;
        int err;
 
+       if (exclude_ingress) {
+               num_excluded = get_upper_ifindexes(dev, excluded_devices);
+               excluded_devices[num_excluded++] = dev->ifindex;
+       }
+
        if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
                for (i = 0; i < map->max_entries; i++) {
                        dst = rcu_dereference_check(dtab->netdev_map[i],
                                                    rcu_read_lock_bh_held());
-                       if (!dst || dst->dev->ifindex == exclude_ifindex)
+                       if (!dst)
+                               continue;
+
+                       if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
                                continue;
 
                        /* we only need n-1 clones; last_dst enqueued below */
@@ -671,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
                                return err;
 
                        last_dst = dst;
+
                }
        } else { /* BPF_MAP_TYPE_DEVMAP_HASH */
                for (i = 0; i < dtab->n_buckets; i++) {
                        head = dev_map_index_hash(dtab, i);
                        hlist_for_each_entry_safe(dst, next, head, index_hlist) {
-                               if (!dst || dst->dev->ifindex == exclude_ifindex)
+                               if (!dst)
+                                       continue;
+
+                               if (is_ifindex_excluded(excluded_devices, num_excluded,
+                                                       dst->dev->ifindex))
                                        continue;
 
                                /* we only need n-1 clones; last_dst enqueued below */
index 9c011f3..32471ba 100644 (file)
@@ -228,6 +228,32 @@ static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
        return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size);
 }
 
+static bool htab_has_extra_elems(struct bpf_htab *htab)
+{
+       return !htab_is_percpu(htab) && !htab_is_lru(htab);
+}
+
+static void htab_free_prealloced_timers(struct bpf_htab *htab)
+{
+       u32 num_entries = htab->map.max_entries;
+       int i;
+
+       if (likely(!map_value_has_timer(&htab->map)))
+               return;
+       if (htab_has_extra_elems(htab))
+               num_entries += num_possible_cpus();
+
+       for (i = 0; i < num_entries; i++) {
+               struct htab_elem *elem;
+
+               elem = get_htab_elem(htab, i);
+               bpf_timer_cancel_and_free(elem->key +
+                                         round_up(htab->map.key_size, 8) +
+                                         htab->map.timer_off);
+               cond_resched();
+       }
+}
+
 static void htab_free_elems(struct bpf_htab *htab)
 {
        int i;
@@ -265,8 +291,12 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
        struct htab_elem *l;
 
        if (node) {
+               u32 key_size = htab->map.key_size;
+
                l = container_of(node, struct htab_elem, lru_node);
-               memcpy(l->key, key, htab->map.key_size);
+               memcpy(l->key, key, key_size);
+               check_and_init_map_value(&htab->map,
+                                        l->key + round_up(key_size, 8));
                return l;
        }
 
@@ -278,7 +308,7 @@ static int prealloc_init(struct bpf_htab *htab)
        u32 num_entries = htab->map.max_entries;
        int err = -ENOMEM, i;
 
-       if (!htab_is_percpu(htab) && !htab_is_lru(htab))
+       if (htab_has_extra_elems(htab))
                num_entries += num_possible_cpus();
 
        htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries,
@@ -695,6 +725,14 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
        return insn - insn_buf;
 }
 
+static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem)
+{
+       if (unlikely(map_value_has_timer(&htab->map)))
+               bpf_timer_cancel_and_free(elem->key +
+                                         round_up(htab->map.key_size, 8) +
+                                         htab->map.timer_off);
+}
+
 /* It is called from the bpf_lru_list when the LRU needs to delete
  * older elements from the htab.
  */
@@ -719,6 +757,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
        hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
                if (l == tgt_l) {
                        hlist_nulls_del_rcu(&l->hash_node);
+                       check_and_free_timer(htab, l);
                        break;
                }
 
@@ -790,6 +829,7 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
 {
        if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
                free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
+       check_and_free_timer(htab, l);
        kfree(l);
 }
 
@@ -817,6 +857,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
        htab_put_fd_value(htab, l);
 
        if (htab_is_prealloc(htab)) {
+               check_and_free_timer(htab, l);
                __pcpu_freelist_push(&htab->freelist, &l->fnode);
        } else {
                atomic_dec(&htab->count);
@@ -920,8 +961,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
                        l_new = ERR_PTR(-ENOMEM);
                        goto dec_count;
                }
-               check_and_init_map_lock(&htab->map,
-                                       l_new->key + round_up(key_size, 8));
+               check_and_init_map_value(&htab->map,
+                                        l_new->key + round_up(key_size, 8));
        }
 
        memcpy(l_new->key, key, key_size);
@@ -1062,6 +1103,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
                hlist_nulls_del_rcu(&l_old->hash_node);
                if (!htab_is_prealloc(htab))
                        free_htab_elem(htab, l_old);
+               else
+                       check_and_free_timer(htab, l_old);
        }
        ret = 0;
 err:
@@ -1069,6 +1112,12 @@ err:
        return ret;
 }
 
+static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
+{
+       check_and_free_timer(htab, elem);
+       bpf_lru_push_free(&htab->lru, &elem->lru_node);
+}
+
 static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
                                    u64 map_flags)
 {
@@ -1102,7 +1151,8 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
        l_new = prealloc_lru_pop(htab, key, hash);
        if (!l_new)
                return -ENOMEM;
-       memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
+       copy_map_value(&htab->map,
+                      l_new->key + round_up(map->key_size, 8), value);
 
        ret = htab_lock_bucket(htab, b, hash, &flags);
        if (ret)
@@ -1128,9 +1178,9 @@ err:
        htab_unlock_bucket(htab, b, hash, flags);
 
        if (ret)
-               bpf_lru_push_free(&htab->lru, &l_new->lru_node);
+               htab_lru_push_free(htab, l_new);
        else if (l_old)
-               bpf_lru_push_free(&htab->lru, &l_old->lru_node);
+               htab_lru_push_free(htab, l_old);
 
        return ret;
 }
@@ -1339,7 +1389,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
 
        htab_unlock_bucket(htab, b, hash, flags);
        if (l)
-               bpf_lru_push_free(&htab->lru, &l->lru_node);
+               htab_lru_push_free(htab, l);
        return ret;
 }
 
@@ -1359,6 +1409,35 @@ static void delete_all_elements(struct bpf_htab *htab)
        }
 }
 
+static void htab_free_malloced_timers(struct bpf_htab *htab)
+{
+       int i;
+
+       rcu_read_lock();
+       for (i = 0; i < htab->n_buckets; i++) {
+               struct hlist_nulls_head *head = select_bucket(htab, i);
+               struct hlist_nulls_node *n;
+               struct htab_elem *l;
+
+               hlist_nulls_for_each_entry(l, n, head, hash_node)
+                       check_and_free_timer(htab, l);
+               cond_resched_rcu();
+       }
+       rcu_read_unlock();
+}
+
+static void htab_map_free_timers(struct bpf_map *map)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+
+       if (likely(!map_value_has_timer(&htab->map)))
+               return;
+       if (!htab_is_prealloc(htab))
+               htab_free_malloced_timers(htab);
+       else
+               htab_free_prealloced_timers(htab);
+}
+
 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
 static void htab_map_free(struct bpf_map *map)
 {
@@ -1456,7 +1535,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
                        else
                                copy_map_value(map, value, l->key +
                                               roundup_key_size);
-                       check_and_init_map_lock(map, value);
+                       check_and_init_map_value(map, value);
                }
 
                hlist_nulls_del_rcu(&l->hash_node);
@@ -1467,7 +1546,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
        htab_unlock_bucket(htab, b, hash, bflags);
 
        if (is_lru_map && l)
-               bpf_lru_push_free(&htab->lru, &l->lru_node);
+               htab_lru_push_free(htab, l);
 
        return ret;
 }
@@ -1645,7 +1724,7 @@ again_nocopy:
                                                      true);
                        else
                                copy_map_value(map, dst_val, value);
-                       check_and_init_map_lock(map, dst_val);
+                       check_and_init_map_value(map, dst_val);
                }
                if (do_delete) {
                        hlist_nulls_del_rcu(&l->hash_node);
@@ -1672,7 +1751,7 @@ again_nocopy:
        while (node_to_free) {
                l = node_to_free;
                node_to_free = node_to_free->batch_flink;
-               bpf_lru_push_free(&htab->lru, &l->lru_node);
+               htab_lru_push_free(htab, l);
        }
 
 next_batch:
@@ -2034,6 +2113,7 @@ const struct bpf_map_ops htab_map_ops = {
        .map_alloc = htab_map_alloc,
        .map_free = htab_map_free,
        .map_get_next_key = htab_map_get_next_key,
+       .map_release_uref = htab_map_free_timers,
        .map_lookup_elem = htab_map_lookup_elem,
        .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
        .map_update_elem = htab_map_update_elem,
@@ -2055,6 +2135,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
        .map_alloc = htab_map_alloc,
        .map_free = htab_map_free,
        .map_get_next_key = htab_map_get_next_key,
+       .map_release_uref = htab_map_free_timers,
        .map_lookup_elem = htab_lru_map_lookup_elem,
        .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
        .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
index 55f83ea..9aabf84 100644 (file)
@@ -289,13 +289,18 @@ static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
 
 static DEFINE_PER_CPU(unsigned long, irqsave_flags);
 
-notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+static inline void __bpf_spin_lock_irqsave(struct bpf_spin_lock *lock)
 {
        unsigned long flags;
 
        local_irq_save(flags);
        __bpf_spin_lock(lock);
        __this_cpu_write(irqsave_flags, flags);
+}
+
+notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+{
+       __bpf_spin_lock_irqsave(lock);
        return 0;
 }
 
@@ -306,13 +311,18 @@ const struct bpf_func_proto bpf_spin_lock_proto = {
        .arg1_type      = ARG_PTR_TO_SPIN_LOCK,
 };
 
-notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+static inline void __bpf_spin_unlock_irqrestore(struct bpf_spin_lock *lock)
 {
        unsigned long flags;
 
        flags = __this_cpu_read(irqsave_flags);
        __bpf_spin_unlock(lock);
        local_irq_restore(flags);
+}
+
+notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+{
+       __bpf_spin_unlock_irqrestore(lock);
        return 0;
 }
 
@@ -333,9 +343,9 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
        else
                lock = dst + map->spin_lock_off;
        preempt_disable();
-       ____bpf_spin_lock(lock);
+       __bpf_spin_lock_irqsave(lock);
        copy_map_value(map, dst, src);
-       ____bpf_spin_unlock(lock);
+       __bpf_spin_unlock_irqrestore(lock);
        preempt_enable();
 }
 
@@ -393,8 +403,6 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
 };
 
 #ifdef CONFIG_CGROUP_BPF
-DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
-               bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
 
 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 {
@@ -403,17 +411,13 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
         * verifier checks that its value is correct.
         */
        enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
-       struct bpf_cgroup_storage *storage = NULL;
+       struct bpf_cgroup_storage *storage;
+       struct bpf_cg_run_ctx *ctx;
        void *ptr;
-       int i;
 
-       for (i = BPF_CGROUP_STORAGE_NEST_MAX - 1; i >= 0; i--) {
-               if (likely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
-                       continue;
-
-               storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
-               break;
-       }
+       /* get current cgroup storage from BPF run context */
+       ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+       storage = ctx->prog_item->cgroup_storage[stype];
 
        if (stype == BPF_CGROUP_STORAGE_SHARED)
                ptr = &READ_ONCE(storage->buf)->data[0];
@@ -913,6 +917,20 @@ fmt_str:
                        tmp_buf += err;
                        num_spec++;
 
+                       continue;
+               } else if (fmt[i] == 'c') {
+                       if (!tmp_buf)
+                               goto nocopy_fmt;
+
+                       if (tmp_buf_end == tmp_buf) {
+                               err = -ENOSPC;
+                               goto out;
+                       }
+
+                       *tmp_buf = raw_args[num_spec];
+                       tmp_buf++;
+                       num_spec++;
+
                        continue;
                }
 
@@ -999,11 +1017,327 @@ const struct bpf_func_proto bpf_snprintf_proto = {
        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
 };
 
+/* BPF map elements can contain 'struct bpf_timer'.
+ * Such map owns all of its BPF timers.
+ * 'struct bpf_timer' is allocated as part of map element allocation
+ * and it's zero initialized.
+ * That space is used to keep 'struct bpf_timer_kern'.
+ * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and
+ * remembers 'struct bpf_map *' pointer it's part of.
+ * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn.
+ * bpf_timer_start() arms the timer.
+ * If user space reference to a map goes to zero at this point
+ * ops->map_release_uref callback is responsible for cancelling the timers,
+ * freeing their memory, and decrementing prog's refcnts.
+ * bpf_timer_cancel() cancels the timer and decrements prog's refcnt.
+ * Inner maps can contain bpf timers as well. ops->map_release_uref is
+ * freeing the timers when inner map is replaced or deleted by user space.
+ */
+struct bpf_hrtimer {
+       struct hrtimer timer;
+       struct bpf_map *map;
+       struct bpf_prog *prog;
+       void __rcu *callback_fn;
+       void *value;
+};
+
+/* the actual struct hidden inside uapi struct bpf_timer */
+struct bpf_timer_kern {
+       struct bpf_hrtimer *timer;
+       /* bpf_spin_lock is used here instead of spinlock_t to make
+        * sure that it always fits into space resereved by struct bpf_timer
+        * regardless of LOCKDEP and spinlock debug flags.
+        */
+       struct bpf_spin_lock lock;
+} __attribute__((aligned(8)));
+
+static DEFINE_PER_CPU(struct bpf_hrtimer *, hrtimer_running);
+
+static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
+{
+       struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
+       struct bpf_map *map = t->map;
+       void *value = t->value;
+       void *callback_fn;
+       void *key;
+       u32 idx;
+
+       callback_fn = rcu_dereference_check(t->callback_fn, rcu_read_lock_bh_held());
+       if (!callback_fn)
+               goto out;
+
+       /* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and
+        * cannot be preempted by another bpf_timer_cb() on the same cpu.
+        * Remember the timer this callback is servicing to prevent
+        * deadlock if callback_fn() calls bpf_timer_cancel() or
+        * bpf_map_delete_elem() on the same timer.
+        */
+       this_cpu_write(hrtimer_running, t);
+       if (map->map_type == BPF_MAP_TYPE_ARRAY) {
+               struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+               /* compute the key */
+               idx = ((char *)value - array->value) / array->elem_size;
+               key = &idx;
+       } else { /* hash or lru */
+               key = value - round_up(map->key_size, 8);
+       }
+
+       BPF_CAST_CALL(callback_fn)((u64)(long)map, (u64)(long)key,
+                                  (u64)(long)value, 0, 0);
+       /* The verifier checked that return value is zero. */
+
+       this_cpu_write(hrtimer_running, NULL);
+out:
+       return HRTIMER_NORESTART;
+}
+
+BPF_CALL_3(bpf_timer_init, struct bpf_timer_kern *, timer, struct bpf_map *, map,
+          u64, flags)
+{
+       clockid_t clockid = flags & (MAX_CLOCKS - 1);
+       struct bpf_hrtimer *t;
+       int ret = 0;
+
+       BUILD_BUG_ON(MAX_CLOCKS != 16);
+       BUILD_BUG_ON(sizeof(struct bpf_timer_kern) > sizeof(struct bpf_timer));
+       BUILD_BUG_ON(__alignof__(struct bpf_timer_kern) != __alignof__(struct bpf_timer));
+
+       if (in_nmi())
+               return -EOPNOTSUPP;
+
+       if (flags >= MAX_CLOCKS ||
+           /* similar to timerfd except _ALARM variants are not supported */
+           (clockid != CLOCK_MONOTONIC &&
+            clockid != CLOCK_REALTIME &&
+            clockid != CLOCK_BOOTTIME))
+               return -EINVAL;
+       __bpf_spin_lock_irqsave(&timer->lock);
+       t = timer->timer;
+       if (t) {
+               ret = -EBUSY;
+               goto out;
+       }
+       if (!atomic64_read(&map->usercnt)) {
+               /* maps with timers must be either held by user space
+                * or pinned in bpffs.
+                */
+               ret = -EPERM;
+               goto out;
+       }
+       /* allocate hrtimer via map_kmalloc to use memcg accounting */
+       t = bpf_map_kmalloc_node(map, sizeof(*t), GFP_ATOMIC, map->numa_node);
+       if (!t) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       t->value = (void *)timer - map->timer_off;
+       t->map = map;
+       t->prog = NULL;
+       rcu_assign_pointer(t->callback_fn, NULL);
+       hrtimer_init(&t->timer, clockid, HRTIMER_MODE_REL_SOFT);
+       t->timer.function = bpf_timer_cb;
+       timer->timer = t;
+out:
+       __bpf_spin_unlock_irqrestore(&timer->lock);
+       return ret;
+}
+
+static const struct bpf_func_proto bpf_timer_init_proto = {
+       .func           = bpf_timer_init,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_TIMER,
+       .arg2_type      = ARG_CONST_MAP_PTR,
+       .arg3_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_timer_set_callback, struct bpf_timer_kern *, timer, void *, callback_fn,
+          struct bpf_prog_aux *, aux)
+{
+       struct bpf_prog *prev, *prog = aux->prog;
+       struct bpf_hrtimer *t;
+       int ret = 0;
+
+       if (in_nmi())
+               return -EOPNOTSUPP;
+       __bpf_spin_lock_irqsave(&timer->lock);
+       t = timer->timer;
+       if (!t) {
+               ret = -EINVAL;
+               goto out;
+       }
+       if (!atomic64_read(&t->map->usercnt)) {
+               /* maps with timers must be either held by user space
+                * or pinned in bpffs. Otherwise timer might still be
+                * running even when bpf prog is detached and user space
+                * is gone, since map_release_uref won't ever be called.
+                */
+               ret = -EPERM;
+               goto out;
+       }
+       prev = t->prog;
+       if (prev != prog) {
+               /* Bump prog refcnt once. Every bpf_timer_set_callback()
+                * can pick different callback_fn-s within the same prog.
+                */
+               prog = bpf_prog_inc_not_zero(prog);
+               if (IS_ERR(prog)) {
+                       ret = PTR_ERR(prog);
+                       goto out;
+               }
+               if (prev)
+                       /* Drop prev prog refcnt when swapping with new prog */
+                       bpf_prog_put(prev);
+               t->prog = prog;
+       }
+       rcu_assign_pointer(t->callback_fn, callback_fn);
+out:
+       __bpf_spin_unlock_irqrestore(&timer->lock);
+       return ret;
+}
+
+static const struct bpf_func_proto bpf_timer_set_callback_proto = {
+       .func           = bpf_timer_set_callback,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_TIMER,
+       .arg2_type      = ARG_PTR_TO_FUNC,
+};
+
+BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, flags)
+{
+       struct bpf_hrtimer *t;
+       int ret = 0;
+
+       if (in_nmi())
+               return -EOPNOTSUPP;
+       if (flags)
+               return -EINVAL;
+       __bpf_spin_lock_irqsave(&timer->lock);
+       t = timer->timer;
+       if (!t || !t->prog) {
+               ret = -EINVAL;
+               goto out;
+       }
+       hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT);
+out:
+       __bpf_spin_unlock_irqrestore(&timer->lock);
+       return ret;
+}
+
+static const struct bpf_func_proto bpf_timer_start_proto = {
+       .func           = bpf_timer_start,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_TIMER,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+};
+
+static void drop_prog_refcnt(struct bpf_hrtimer *t)
+{
+       struct bpf_prog *prog = t->prog;
+
+       if (prog) {
+               bpf_prog_put(prog);
+               t->prog = NULL;
+               rcu_assign_pointer(t->callback_fn, NULL);
+       }
+}
+
+BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
+{
+       struct bpf_hrtimer *t;
+       int ret = 0;
+
+       if (in_nmi())
+               return -EOPNOTSUPP;
+       __bpf_spin_lock_irqsave(&timer->lock);
+       t = timer->timer;
+       if (!t) {
+               ret = -EINVAL;
+               goto out;
+       }
+       if (this_cpu_read(hrtimer_running) == t) {
+               /* If bpf callback_fn is trying to bpf_timer_cancel()
+                * its own timer the hrtimer_cancel() will deadlock
+                * since it waits for callback_fn to finish
+                */
+               ret = -EDEADLK;
+               goto out;
+       }
+       drop_prog_refcnt(t);
+out:
+       __bpf_spin_unlock_irqrestore(&timer->lock);
+       /* Cancel the timer and wait for associated callback to finish
+        * if it was running.
+        */
+       ret = ret ?: hrtimer_cancel(&t->timer);
+       return ret;
+}
+
+static const struct bpf_func_proto bpf_timer_cancel_proto = {
+       .func           = bpf_timer_cancel,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_TIMER,
+};
+
+/* This function is called by map_delete/update_elem for individual element and
+ * by ops->map_release_uref when the user space reference to a map reaches zero.
+ */
+void bpf_timer_cancel_and_free(void *val)
+{
+       struct bpf_timer_kern *timer = val;
+       struct bpf_hrtimer *t;
+
+       /* Performance optimization: read timer->timer without lock first. */
+       if (!READ_ONCE(timer->timer))
+               return;
+
+       __bpf_spin_lock_irqsave(&timer->lock);
+       /* re-read it under lock */
+       t = timer->timer;
+       if (!t)
+               goto out;
+       drop_prog_refcnt(t);
+       /* The subsequent bpf_timer_start/cancel() helpers won't be able to use
+        * this timer, since it won't be initialized.
+        */
+       timer->timer = NULL;
+out:
+       __bpf_spin_unlock_irqrestore(&timer->lock);
+       if (!t)
+               return;
+       /* Cancel the timer and wait for callback to complete if it was running.
+        * If hrtimer_cancel() can be safely called it's safe to call kfree(t)
+        * right after for both preallocated and non-preallocated maps.
+        * The timer->timer = NULL was already done and no code path can
+        * see address 't' anymore.
+        *
+        * Check that bpf_map_delete/update_elem() wasn't called from timer
+        * callback_fn. In such case don't call hrtimer_cancel() (since it will
+        * deadlock) and don't call hrtimer_try_to_cancel() (since it will just
+        * return -1). Though callback_fn is still running on this cpu it's
+        * safe to do kfree(t) because bpf_timer_cb() read everything it needed
+        * from 't'. The bpf subprog callback_fn won't be able to access 't',
+        * since timer->timer = NULL was already done. The timer will be
+        * effectively cancelled because bpf_timer_cb() will return
+        * HRTIMER_NORESTART.
+        */
+       if (this_cpu_read(hrtimer_running) != t)
+               hrtimer_cancel(&t->timer);
+       kfree(t);
+}
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
+const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
 const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
 const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
+const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
 
 const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
@@ -1065,6 +1399,14 @@ bpf_base_func_proto(enum bpf_func_id func_id)
                return &bpf_per_cpu_ptr_proto;
        case BPF_FUNC_this_cpu_ptr:
                return &bpf_this_cpu_ptr_proto;
+       case BPF_FUNC_timer_init:
+               return &bpf_timer_init_proto;
+       case BPF_FUNC_timer_set_callback:
+               return &bpf_timer_set_callback_proto;
+       case BPF_FUNC_timer_start:
+               return &bpf_timer_start_proto;
+       case BPF_FUNC_timer_cancel:
+               return &bpf_timer_cancel_proto;
        default:
                break;
        }
@@ -1077,6 +1419,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
                return bpf_get_trace_printk_proto();
        case BPF_FUNC_get_current_task:
                return &bpf_get_current_task_proto;
+       case BPF_FUNC_get_current_task_btf:
+               return &bpf_get_current_task_btf_proto;
        case BPF_FUNC_probe_read_user:
                return &bpf_probe_read_user_proto;
        case BPF_FUNC_probe_read_kernel:
@@ -1091,6 +1435,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
                return &bpf_snprintf_btf_proto;
        case BPF_FUNC_snprintf:
                return &bpf_snprintf_proto;
+       case BPF_FUNC_task_pt_regs:
+               return &bpf_task_pt_regs_proto;
        default:
                return NULL;
        }
index bd11db9..035e9e3 100644 (file)
@@ -1,6 +1,7 @@
 //SPDX-License-Identifier: GPL-2.0
 #include <linux/bpf-cgroup.h>
 #include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
 #include <linux/btf.h>
 #include <linux/bug.h>
 #include <linux/filter.h>
@@ -11,9 +12,6 @@
 
 #ifdef CONFIG_CGROUP_BPF
 
-DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
-              bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
-
 #include "../cgroup/cgroup-internal.h"
 
 #define LOCAL_STORAGE_CREATE_FLAG_MASK                                 \
@@ -173,7 +171,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
                return -ENOMEM;
 
        memcpy(&new->data[0], value, map->value_size);
-       check_and_init_map_lock(map, new->data);
+       check_and_init_map_value(map, new->data);
 
        new = xchg(&storage->buf, new);
        kfree_rcu(new, rcu);
@@ -286,9 +284,17 @@ enoent:
 
 static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
 {
+       __u32 max_value_size = BPF_LOCAL_STORAGE_MAX_VALUE_SIZE;
        int numa_node = bpf_map_attr_numa_node(attr);
        struct bpf_cgroup_storage_map *map;
 
+       /* percpu is bound by PCPU_MIN_UNIT_SIZE, non-percu
+        * is the same as other local storages.
+        */
+       if (attr->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+               max_value_size = min_t(__u32, max_value_size,
+                                      PCPU_MIN_UNIT_SIZE);
+
        if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) &&
            attr->key_size != sizeof(__u64))
                return ERR_PTR(-EINVAL);
@@ -296,7 +302,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
        if (attr->value_size == 0)
                return ERR_PTR(-EINVAL);
 
-       if (attr->value_size > PAGE_SIZE)
+       if (attr->value_size > max_value_size)
                return ERR_PTR(-E2BIG);
 
        if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||
@@ -409,7 +415,7 @@ static int cgroup_storage_check_btf(const struct bpf_map *map,
 static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
                                         struct seq_file *m)
 {
-       enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
+       enum bpf_cgroup_storage_type stype;
        struct bpf_cgroup_storage *storage;
        int cpu;
 
@@ -509,7 +515,7 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
                                                    map->numa_node);
                if (!storage->buf)
                        goto enomem;
-               check_and_init_map_lock(map, storage->buf->data);
+               check_and_init_map_value(map, storage->buf->data);
        } else {
                storage->percpu_buf = bpf_map_alloc_percpu(map, size, 8, gfp);
                if (!storage->percpu_buf)
index 39ab0b6..5cd8f52 100644 (file)
@@ -3,6 +3,7 @@
  */
 #include <linux/slab.h>
 #include <linux/bpf.h>
+#include <linux/btf.h>
 
 #include "map_in_map.h"
 
@@ -50,6 +51,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
        inner_map_meta->map_flags = inner_map->map_flags;
        inner_map_meta->max_entries = inner_map->max_entries;
        inner_map_meta->spin_lock_off = inner_map->spin_lock_off;
+       inner_map_meta->timer_off = inner_map->timer_off;
+       if (inner_map->btf) {
+               btf_get(inner_map->btf);
+               inner_map_meta->btf = inner_map->btf;
+       }
 
        /* Misc members not needed in bpf_map_meta_equal() check. */
        inner_map_meta->ops = inner_map->ops;
@@ -65,6 +71,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 
 void bpf_map_meta_free(struct bpf_map *map_meta)
 {
+       btf_put(map_meta->btf);
        kfree(map_meta);
 }
 
@@ -75,6 +82,7 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
        return meta0->map_type == meta1->map_type &&
                meta0->key_size == meta1->key_size &&
                meta0->value_size == meta1->value_size &&
+               meta0->timer_off == meta1->timer_off &&
                meta0->map_flags == meta1->map_flags;
 }
 
index 6fbc2ab..e8eefdf 100644 (file)
@@ -530,14 +530,12 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
        return res;
 }
 
-BTF_ID_LIST_SINGLE(bpf_get_task_stack_btf_ids, struct, task_struct)
-
 const struct bpf_func_proto bpf_get_task_stack_proto = {
        .func           = bpf_get_task_stack,
        .gpl_only       = false,
        .ret_type       = RET_INTEGER,
        .arg1_type      = ARG_PTR_TO_BTF_ID,
-       .arg1_btf_id    = &bpf_get_task_stack_btf_ids[0],
+       .arg1_btf_id    = &btf_task_struct_ids[0],
        .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
        .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
        .arg4_type      = ARG_ANYTHING,
index e343f15..4e50c0b 100644 (file)
@@ -260,8 +260,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
                                copy_map_value_locked(map, value, ptr, true);
                        else
                                copy_map_value(map, value, ptr);
-                       /* mask lock, since value wasn't zero inited */
-                       check_and_init_map_lock(map, value);
+                       /* mask lock and timer, since value wasn't zero inited */
+                       check_and_init_map_value(map, value);
                }
                rcu_read_unlock();
        }
@@ -623,7 +623,8 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
        struct bpf_map *map = filp->private_data;
        int err;
 
-       if (!map->ops->map_mmap || map_value_has_spin_lock(map))
+       if (!map->ops->map_mmap || map_value_has_spin_lock(map) ||
+           map_value_has_timer(map))
                return -ENOTSUPP;
 
        if (!(vma->vm_flags & VM_SHARED))
@@ -793,6 +794,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
                }
        }
 
+       map->timer_off = btf_find_timer(btf, value_type);
+       if (map_value_has_timer(map)) {
+               if (map->map_flags & BPF_F_RDONLY_PROG)
+                       return -EACCES;
+               if (map->map_type != BPF_MAP_TYPE_HASH &&
+                   map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+                   map->map_type != BPF_MAP_TYPE_ARRAY)
+                       return -EOPNOTSUPP;
+       }
+
        if (map->ops->map_check_btf)
                ret = map->ops->map_check_btf(map, btf, key_type, value_type);
 
@@ -844,6 +855,7 @@ static int map_create(union bpf_attr *attr)
        mutex_init(&map->freeze_mutex);
 
        map->spin_lock_off = -EINVAL;
+       map->timer_off = -EINVAL;
        if (attr->btf_key_type_id || attr->btf_value_type_id ||
            /* Even the map's value is a kernel's struct,
             * the bpf_prog.o must have BTF to begin with
@@ -1001,7 +1013,7 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 static void *__bpf_copy_key(void __user *ukey, u64 key_size)
 {
        if (key_size)
-               return memdup_user(ukey, key_size);
+               return vmemdup_user(ukey, key_size);
 
        if (ukey)
                return ERR_PTR(-EINVAL);
@@ -1012,7 +1024,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
 {
        if (key_size)
-               return memdup_bpfptr(ukey, key_size);
+               return kvmemdup_bpfptr(ukey, key_size);
 
        if (!bpfptr_is_null(ukey))
                return ERR_PTR(-EINVAL);
@@ -1064,7 +1076,7 @@ static int map_lookup_elem(union bpf_attr *attr)
        value_size = bpf_map_value_size(map);
 
        err = -ENOMEM;
-       value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+       value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
        if (!value)
                goto free_key;
 
@@ -1079,9 +1091,9 @@ static int map_lookup_elem(union bpf_attr *attr)
        err = 0;
 
 free_value:
-       kfree(value);
+       kvfree(value);
 free_key:
-       kfree(key);
+       kvfree(key);
 err_put:
        fdput(f);
        return err;
@@ -1125,16 +1137,10 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
                goto err_put;
        }
 
-       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
-           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-           map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-           map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
-               value_size = round_up(map->value_size, 8) * num_possible_cpus();
-       else
-               value_size = map->value_size;
+       value_size = bpf_map_value_size(map);
 
        err = -ENOMEM;
-       value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+       value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
        if (!value)
                goto free_key;
 
@@ -1145,9 +1151,9 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
        err = bpf_map_update_value(map, f, key, value, attr->flags);
 
 free_value:
-       kfree(value);
+       kvfree(value);
 free_key:
-       kfree(key);
+       kvfree(key);
 err_put:
        fdput(f);
        return err;
@@ -1199,7 +1205,7 @@ static int map_delete_elem(union bpf_attr *attr)
        bpf_enable_instrumentation();
        maybe_wait_bpf_programs(map);
 out:
-       kfree(key);
+       kvfree(key);
 err_put:
        fdput(f);
        return err;
@@ -1241,7 +1247,7 @@ static int map_get_next_key(union bpf_attr *attr)
        }
 
        err = -ENOMEM;
-       next_key = kmalloc(map->key_size, GFP_USER);
+       next_key = kvmalloc(map->key_size, GFP_USER);
        if (!next_key)
                goto free_key;
 
@@ -1264,9 +1270,9 @@ out:
        err = 0;
 
 free_next_key:
-       kfree(next_key);
+       kvfree(next_key);
 free_key:
-       kfree(key);
+       kvfree(key);
 err_put:
        fdput(f);
        return err;
@@ -1293,7 +1299,7 @@ int generic_map_delete_batch(struct bpf_map *map,
        if (!max_count)
                return 0;
 
-       key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+       key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
        if (!key)
                return -ENOMEM;
 
@@ -1320,7 +1326,7 @@ int generic_map_delete_batch(struct bpf_map *map,
        if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
                err = -EFAULT;
 
-       kfree(key);
+       kvfree(key);
        return err;
 }
 
@@ -1351,13 +1357,13 @@ int generic_map_update_batch(struct bpf_map *map,
        if (!max_count)
                return 0;
 
-       key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+       key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
        if (!key)
                return -ENOMEM;
 
-       value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+       value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
        if (!value) {
-               kfree(key);
+               kvfree(key);
                return -ENOMEM;
        }
 
@@ -1378,8 +1384,8 @@ int generic_map_update_batch(struct bpf_map *map,
        if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
                err = -EFAULT;
 
-       kfree(value);
-       kfree(key);
+       kvfree(value);
+       kvfree(key);
        return err;
 }
 
@@ -1413,13 +1419,13 @@ int generic_map_lookup_batch(struct bpf_map *map,
        if (put_user(0, &uattr->batch.count))
                return -EFAULT;
 
-       buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+       buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
        if (!buf_prevkey)
                return -ENOMEM;
 
-       buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
+       buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
        if (!buf) {
-               kfree(buf_prevkey);
+               kvfree(buf_prevkey);
                return -ENOMEM;
        }
 
@@ -1479,8 +1485,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
                err = -EFAULT;
 
 free_buf:
-       kfree(buf_prevkey);
-       kfree(buf);
+       kvfree(buf_prevkey);
+       kvfree(buf);
        return err;
 }
 
@@ -1535,7 +1541,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
        value_size = bpf_map_value_size(map);
 
        err = -ENOMEM;
-       value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+       value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
        if (!value)
                goto free_key;
 
@@ -1567,9 +1573,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
        err = 0;
 
 free_value:
-       kfree(value);
+       kvfree(value);
 free_key:
-       kfree(key);
+       kvfree(key);
 err_put:
        fdput(f);
        return err;
@@ -1591,7 +1597,8 @@ static int map_freeze(const union bpf_attr *attr)
        if (IS_ERR(map))
                return PTR_ERR(map);
 
-       if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
+       if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
+           map_value_has_timer(map)) {
                fdput(f);
                return -ENOTSUPP;
        }
@@ -1699,6 +1706,8 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
 
 void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
 {
+       unsigned long flags;
+
        /* cBPF to eBPF migrations are currently not in the idr store.
         * Offloaded programs are removed from the store when their device
         * disappears - even if someone grabs an fd to them they are unusable,
@@ -1708,7 +1717,7 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
                return;
 
        if (do_idr_lock)
-               spin_lock_bh(&prog_idr_lock);
+               spin_lock_irqsave(&prog_idr_lock, flags);
        else
                __acquire(&prog_idr_lock);
 
@@ -1716,7 +1725,7 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
        prog->aux->id = 0;
 
        if (do_idr_lock)
-               spin_unlock_bh(&prog_idr_lock);
+               spin_unlock_irqrestore(&prog_idr_lock, flags);
        else
                __release(&prog_idr_lock);
 }
@@ -1752,14 +1761,32 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
        }
 }
 
+static void bpf_prog_put_deferred(struct work_struct *work)
+{
+       struct bpf_prog_aux *aux;
+       struct bpf_prog *prog;
+
+       aux = container_of(work, struct bpf_prog_aux, work);
+       prog = aux->prog;
+       perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
+       bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
+       __bpf_prog_put_noref(prog, true);
+}
+
 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 {
-       if (atomic64_dec_and_test(&prog->aux->refcnt)) {
-               perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
-               bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
+       struct bpf_prog_aux *aux = prog->aux;
+
+       if (atomic64_dec_and_test(&aux->refcnt)) {
                /* bpf_prog_free_id() must be called first */
                bpf_prog_free_id(prog, do_idr_lock);
-               __bpf_prog_put_noref(prog, true);
+
+               if (in_irq() || irqs_disabled()) {
+                       INIT_WORK(&aux->work, bpf_prog_put_deferred);
+                       schedule_work(&aux->work);
+               } else {
+                       bpf_prog_put_deferred(&aux->work);
+               }
        }
 }
 
@@ -2873,6 +2900,79 @@ static const struct bpf_link_ops bpf_raw_tp_link_lops = {
        .fill_link_info = bpf_raw_tp_link_fill_link_info,
 };
 
+#ifdef CONFIG_PERF_EVENTS
+struct bpf_perf_link {
+       struct bpf_link link;
+       struct file *perf_file;
+};
+
+static void bpf_perf_link_release(struct bpf_link *link)
+{
+       struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
+       struct perf_event *event = perf_link->perf_file->private_data;
+
+       perf_event_free_bpf_prog(event);
+       fput(perf_link->perf_file);
+}
+
+static void bpf_perf_link_dealloc(struct bpf_link *link)
+{
+       struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
+
+       kfree(perf_link);
+}
+
+static const struct bpf_link_ops bpf_perf_link_lops = {
+       .release = bpf_perf_link_release,
+       .dealloc = bpf_perf_link_dealloc,
+};
+
+static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+       struct bpf_link_primer link_primer;
+       struct bpf_perf_link *link;
+       struct perf_event *event;
+       struct file *perf_file;
+       int err;
+
+       if (attr->link_create.flags)
+               return -EINVAL;
+
+       perf_file = perf_event_get(attr->link_create.target_fd);
+       if (IS_ERR(perf_file))
+               return PTR_ERR(perf_file);
+
+       link = kzalloc(sizeof(*link), GFP_USER);
+       if (!link) {
+               err = -ENOMEM;
+               goto out_put_file;
+       }
+       bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog);
+       link->perf_file = perf_file;
+
+       err = bpf_link_prime(&link->link, &link_primer);
+       if (err) {
+               kfree(link);
+               goto out_put_file;
+       }
+
+       event = perf_file->private_data;
+       err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie);
+       if (err) {
+               bpf_link_cleanup(&link_primer);
+               goto out_put_file;
+       }
+       /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */
+       bpf_prog_inc(prog);
+
+       return bpf_link_settle(&link_primer);
+
+out_put_file:
+       fput(perf_file);
+       return err;
+}
+#endif /* CONFIG_PERF_EVENTS */
+
 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
 
 static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
@@ -4114,15 +4214,26 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
        if (ret)
                goto out;
 
-       if (prog->type == BPF_PROG_TYPE_EXT) {
+       switch (prog->type) {
+       case BPF_PROG_TYPE_EXT:
                ret = tracing_bpf_link_attach(attr, uattr, prog);
                goto out;
-       }
-
-       ptype = attach_type_to_prog_type(attr->link_create.attach_type);
-       if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
-               ret = -EINVAL;
-               goto out;
+       case BPF_PROG_TYPE_PERF_EVENT:
+       case BPF_PROG_TYPE_KPROBE:
+       case BPF_PROG_TYPE_TRACEPOINT:
+               if (attr->link_create.attach_type != BPF_PERF_EVENT) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               ptype = prog->type;
+               break;
+       default:
+               ptype = attach_type_to_prog_type(attr->link_create.attach_type);
+               if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               break;
        }
 
        switch (ptype) {
@@ -4146,6 +4257,13 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
        case BPF_PROG_TYPE_XDP:
                ret = bpf_xdp_link_attach(attr, prog);
                break;
+#endif
+#ifdef CONFIG_PERF_EVENTS
+       case BPF_PROG_TYPE_PERF_EVENT:
+       case BPF_PROG_TYPE_TRACEPOINT:
+       case BPF_PROG_TYPE_KPROBE:
+               ret = bpf_perf_link_attach(attr, prog);
+               break;
 #endif
        default:
                ret = -EINVAL;
index b68cb5d..b48750b 100644 (file)
@@ -525,7 +525,6 @@ static const struct seq_operations task_vma_seq_ops = {
 };
 
 BTF_ID_LIST(btf_task_file_ids)
-BTF_ID(struct, task_struct)
 BTF_ID(struct, file)
 BTF_ID(struct, vm_area_struct)
 
@@ -591,19 +590,19 @@ static int __init task_iter_init(void)
 {
        int ret;
 
-       task_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
+       task_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0];
        ret = bpf_iter_reg_target(&task_reg_info);
        if (ret)
                return ret;
 
-       task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
-       task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1];
+       task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0];
+       task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[0];
        ret =  bpf_iter_reg_target(&task_file_reg_info);
        if (ret)
                return ret;
 
-       task_vma_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
-       task_vma_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[2];
+       task_vma_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0];
+       task_vma_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1];
        return bpf_iter_reg_target(&task_vma_reg_info);
 }
 late_initcall(task_iter_init);
index 28a3630..fe1e857 100644 (file)
@@ -172,7 +172,7 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
 }
 
 static struct bpf_tramp_progs *
-bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
+bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
 {
        const struct bpf_prog_aux *aux;
        struct bpf_tramp_progs *tprogs;
@@ -189,8 +189,10 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
                *total += tr->progs_cnt[kind];
                progs = tprogs[kind].progs;
 
-               hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist)
+               hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) {
+                       *ip_arg |= aux->prog->call_get_func_ip;
                        *progs++ = aux->prog;
+               }
        }
        return tprogs;
 }
@@ -333,9 +335,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
        struct bpf_tramp_image *im;
        struct bpf_tramp_progs *tprogs;
        u32 flags = BPF_TRAMP_F_RESTORE_REGS;
+       bool ip_arg = false;
        int err, total;
 
-       tprogs = bpf_trampoline_get_progs(tr, &total);
+       tprogs = bpf_trampoline_get_progs(tr, &total, &ip_arg);
        if (IS_ERR(tprogs))
                return PTR_ERR(tprogs);
 
@@ -357,6 +360,9 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
            tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
                flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
 
+       if (ip_arg)
+               flags |= BPF_TRAMP_F_IP_ARG;
+
        err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
                                          &tr->func.model, flags, tprogs,
                                          tr->func.addr);
@@ -542,7 +548,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
        u64_stats_update_end(&stats->syncp);
 }
 
-/* The logic is similar to BPF_PROG_RUN, but with an explicit
+/* The logic is similar to bpf_prog_run(), but with an explicit
  * rcu_read_lock() and migrate_disable() which are required
  * for the trampoline. The macro is split into
  * call __bpf_prog_enter
index 49f07e2..047ac4b 100644 (file)
@@ -255,6 +255,7 @@ struct bpf_call_arg_meta {
        int mem_size;
        u64 msize_max_value;
        int ref_obj_id;
+       int map_uid;
        int func_id;
        struct btf *btf;
        u32 btf_id;
@@ -734,6 +735,10 @@ static void print_verifier_state(struct bpf_verifier_env *env,
                        if (state->refs[i].id)
                                verbose(env, ",%d", state->refs[i].id);
        }
+       if (state->in_callback_fn)
+               verbose(env, " cb");
+       if (state->in_async_callback_fn)
+               verbose(env, " async_cb");
        verbose(env, "\n");
 }
 
@@ -1135,6 +1140,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
                if (map->inner_map_meta) {
                        reg->type = CONST_PTR_TO_MAP;
                        reg->map_ptr = map->inner_map_meta;
+                       /* transfer reg's id which is unique for every map_lookup_elem
+                        * as UID of the inner map.
+                        */
+                       reg->map_uid = reg->id;
                } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
                        reg->type = PTR_TO_XDP_SOCK;
                } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
@@ -1522,6 +1531,54 @@ static void init_func_state(struct bpf_verifier_env *env,
        init_reg_state(env, state);
 }
 
+/* Similar to push_stack(), but for async callbacks */
+static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
+                                               int insn_idx, int prev_insn_idx,
+                                               int subprog)
+{
+       struct bpf_verifier_stack_elem *elem;
+       struct bpf_func_state *frame;
+
+       elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
+       if (!elem)
+               goto err;
+
+       elem->insn_idx = insn_idx;
+       elem->prev_insn_idx = prev_insn_idx;
+       elem->next = env->head;
+       elem->log_pos = env->log.len_used;
+       env->head = elem;
+       env->stack_size++;
+       if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
+               verbose(env,
+                       "The sequence of %d jumps is too complex for async cb.\n",
+                       env->stack_size);
+               goto err;
+       }
+       /* Unlike push_stack() do not copy_verifier_state().
+        * The caller state doesn't matter.
+        * This is async callback. It starts in a fresh stack.
+        * Initialize it similar to do_check_common().
+        */
+       elem->st.branches = 1;
+       frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+       if (!frame)
+               goto err;
+       init_func_state(env, frame,
+                       BPF_MAIN_FUNC /* callsite */,
+                       0 /* frameno within this callchain */,
+                       subprog /* subprog number within this prog */);
+       elem->st.frame[0] = frame;
+       return &elem->st;
+err:
+       free_verifier_state(env->cur_state, true);
+       env->cur_state = NULL;
+       /* pop all elements and return */
+       while (!pop_stack(env, NULL, NULL, false));
+       return NULL;
+}
+
+
 enum reg_arg_type {
        SRC_OP,         /* register is used as source operand */
        DST_OP,         /* register is used as destination operand */
@@ -3217,6 +3274,15 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
                        return -EACCES;
                }
        }
+       if (map_value_has_timer(map)) {
+               u32 t = map->timer_off;
+
+               if (reg->smin_value + off < t + sizeof(struct bpf_timer) &&
+                    t < reg->umax_value + off + size) {
+                       verbose(env, "bpf_timer cannot be accessed directly by load/store\n");
+                       return -EACCES;
+               }
+       }
        return err;
 }
 
@@ -3619,6 +3685,8 @@ process_func:
 continue_func:
        subprog_end = subprog[idx + 1].start;
        for (; i < subprog_end; i++) {
+               int next_insn;
+
                if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
                        continue;
                /* remember insn and function to return to */
@@ -3626,13 +3694,22 @@ continue_func:
                ret_prog[frame] = idx;
 
                /* find the callee */
-               i = i + insn[i].imm + 1;
-               idx = find_subprog(env, i);
+               next_insn = i + insn[i].imm + 1;
+               idx = find_subprog(env, next_insn);
                if (idx < 0) {
                        WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
-                                 i);
+                                 next_insn);
                        return -EFAULT;
                }
+               if (subprog[idx].is_async_cb) {
+                       if (subprog[idx].has_tail_call) {
+                               verbose(env, "verifier bug. subprog has tail_call and async cb\n");
+                               return -EFAULT;
+                       }
+                        /* async callbacks don't increase bpf prog stack size */
+                       continue;
+               }
+               i = next_insn;
 
                if (subprog[idx].has_tail_call)
                        tail_call_reachable = true;
@@ -4634,6 +4711,54 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
        return 0;
 }
 
+static int process_timer_func(struct bpf_verifier_env *env, int regno,
+                             struct bpf_call_arg_meta *meta)
+{
+       struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+       bool is_const = tnum_is_const(reg->var_off);
+       struct bpf_map *map = reg->map_ptr;
+       u64 val = reg->var_off.value;
+
+       if (!is_const) {
+               verbose(env,
+                       "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
+                       regno);
+               return -EINVAL;
+       }
+       if (!map->btf) {
+               verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
+                       map->name);
+               return -EINVAL;
+       }
+       if (!map_value_has_timer(map)) {
+               if (map->timer_off == -E2BIG)
+                       verbose(env,
+                               "map '%s' has more than one 'struct bpf_timer'\n",
+                               map->name);
+               else if (map->timer_off == -ENOENT)
+                       verbose(env,
+                               "map '%s' doesn't have 'struct bpf_timer'\n",
+                               map->name);
+               else
+                       verbose(env,
+                               "map '%s' is not a struct type or bpf_timer is mangled\n",
+                               map->name);
+               return -EINVAL;
+       }
+       if (map->timer_off != val + reg->off) {
+               verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
+                       val + reg->off, map->timer_off);
+               return -EINVAL;
+       }
+       if (meta->map_ptr) {
+               verbose(env, "verifier bug. Two map pointers in a timer helper\n");
+               return -EFAULT;
+       }
+       meta->map_uid = reg->map_uid;
+       meta->map_ptr = map;
+       return 0;
+}
+
 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
 {
        return type == ARG_PTR_TO_MEM ||
@@ -4766,6 +4891,7 @@ static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PER
 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
 
 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
        [ARG_PTR_TO_MAP_KEY]            = &map_key_value_types,
@@ -4797,6 +4923,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
        [ARG_PTR_TO_FUNC]               = &func_ptr_types,
        [ARG_PTR_TO_STACK_OR_NULL]      = &stack_ptr_types,
        [ARG_PTR_TO_CONST_STR]          = &const_str_ptr_types,
+       [ARG_PTR_TO_TIMER]              = &timer_types,
 };
 
 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -4926,7 +5053,29 @@ skip_type_check:
 
        if (arg_type == ARG_CONST_MAP_PTR) {
                /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
+               if (meta->map_ptr) {
+                       /* Use map_uid (which is unique id of inner map) to reject:
+                        * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
+                        * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
+                        * if (inner_map1 && inner_map2) {
+                        *     timer = bpf_map_lookup_elem(inner_map1);
+                        *     if (timer)
+                        *         // mismatch would have been allowed
+                        *         bpf_timer_init(timer, inner_map2);
+                        * }
+                        *
+                        * Comparing map_ptr is enough to distinguish normal and outer maps.
+                        */
+                       if (meta->map_ptr != reg->map_ptr ||
+                           meta->map_uid != reg->map_uid) {
+                               verbose(env,
+                                       "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
+                                       meta->map_uid, reg->map_uid);
+                               return -EINVAL;
+                       }
+               }
                meta->map_ptr = reg->map_ptr;
+               meta->map_uid = reg->map_uid;
        } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
                /* bpf_map_xxx(..., map_ptr, ..., key) call:
                 * check that [key, key + map->key_size) are within
@@ -4978,6 +5127,9 @@ skip_type_check:
                        verbose(env, "verifier internal error\n");
                        return -EFAULT;
                }
+       } else if (arg_type == ARG_PTR_TO_TIMER) {
+               if (process_timer_func(env, regno, meta))
+                       return -EACCES;
        } else if (arg_type == ARG_PTR_TO_FUNC) {
                meta->subprogno = reg->subprogno;
        } else if (arg_type_is_mem_ptr(arg_type)) {
@@ -5597,6 +5749,31 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                }
        }
 
+       if (insn->code == (BPF_JMP | BPF_CALL) &&
+           insn->imm == BPF_FUNC_timer_set_callback) {
+               struct bpf_verifier_state *async_cb;
+
+               /* there is no real recursion here. timer callbacks are async */
+               env->subprog_info[subprog].is_async_cb = true;
+               async_cb = push_async_cb(env, env->subprog_info[subprog].start,
+                                        *insn_idx, subprog);
+               if (!async_cb)
+                       return -EFAULT;
+               callee = async_cb->frame[0];
+               callee->async_entry_cnt = caller->async_entry_cnt + 1;
+
+               /* Convert bpf_timer_set_callback() args into timer callback args */
+               err = set_callee_state_cb(env, caller, callee, *insn_idx);
+               if (err)
+                       return err;
+
+               clear_caller_saved_regs(env, caller->regs);
+               mark_reg_unknown(env, caller->regs, BPF_REG_0);
+               caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
+               /* continue with next insn after call */
+               return 0;
+       }
+
        callee = kzalloc(sizeof(*callee), GFP_KERNEL);
        if (!callee)
                return -ENOMEM;
@@ -5724,6 +5901,35 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
        return 0;
 }
 
+static int set_timer_callback_state(struct bpf_verifier_env *env,
+                                   struct bpf_func_state *caller,
+                                   struct bpf_func_state *callee,
+                                   int insn_idx)
+{
+       struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
+
+       /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
+        * callback_fn(struct bpf_map *map, void *key, void *value);
+        */
+       callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
+       __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
+       callee->regs[BPF_REG_1].map_ptr = map_ptr;
+
+       callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
+       __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
+       callee->regs[BPF_REG_2].map_ptr = map_ptr;
+
+       callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
+       __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
+       callee->regs[BPF_REG_3].map_ptr = map_ptr;
+
+       /* unused */
+       __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+       __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+       callee->in_async_callback_fn = true;
+       return 0;
+}
+
 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 {
        struct bpf_verifier_state *state = env->cur_state;
@@ -5937,6 +6143,29 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
        return err;
 }
 
+static int check_get_func_ip(struct bpf_verifier_env *env)
+{
+       enum bpf_attach_type eatype = env->prog->expected_attach_type;
+       enum bpf_prog_type type = resolve_prog_type(env->prog);
+       int func_id = BPF_FUNC_get_func_ip;
+
+       if (type == BPF_PROG_TYPE_TRACING) {
+               if (eatype != BPF_TRACE_FENTRY && eatype != BPF_TRACE_FEXIT &&
+                   eatype != BPF_MODIFY_RETURN) {
+                       verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
+                               func_id_name(func_id), func_id);
+                       return -ENOTSUPP;
+               }
+               return 0;
+       } else if (type == BPF_PROG_TYPE_KPROBE) {
+               return 0;
+       }
+
+       verbose(env, "func %s#%d not supported for program type %d\n",
+               func_id_name(func_id), func_id, type);
+       return -ENOTSUPP;
+}
+
 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                             int *insn_idx_p)
 {
@@ -6051,6 +6280,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                        return -EINVAL;
        }
 
+       if (func_id == BPF_FUNC_timer_set_callback) {
+               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+                                       set_timer_callback_state);
+               if (err < 0)
+                       return -EINVAL;
+       }
+
        if (func_id == BPF_FUNC_snprintf) {
                err = check_bpf_snprintf_call(env, regs);
                if (err < 0)
@@ -6086,6 +6322,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                        return -EINVAL;
                }
                regs[BPF_REG_0].map_ptr = meta.map_ptr;
+               regs[BPF_REG_0].map_uid = meta.map_uid;
                if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
                        regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
                        if (map_value_has_spin_lock(meta.map_ptr))
@@ -6207,6 +6444,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
        if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
                env->prog->call_get_stack = true;
 
+       if (func_id == BPF_FUNC_get_func_ip) {
+               if (check_get_func_ip(env))
+                       return -ENOTSUPP;
+               env->prog->call_get_func_ip = true;
+       }
+
        if (changes_data)
                clear_all_pkt_pointers(env);
        return 0;
@@ -9087,7 +9330,8 @@ static int check_return_code(struct bpf_verifier_env *env)
        struct tnum range = tnum_range(0, 1);
        enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
        int err;
-       const bool is_subprog = env->cur_state->frame[0]->subprogno;
+       struct bpf_func_state *frame = env->cur_state->frame[0];
+       const bool is_subprog = frame->subprogno;
 
        /* LSM and struct_ops func-ptr's return type could be "void" */
        if (!is_subprog &&
@@ -9112,6 +9356,22 @@ static int check_return_code(struct bpf_verifier_env *env)
        }
 
        reg = cur_regs(env) + BPF_REG_0;
+
+       if (frame->in_async_callback_fn) {
+               /* enforce return zero from async callbacks like timer */
+               if (reg->type != SCALAR_VALUE) {
+                       verbose(env, "In async callback the register R0 is not a known value (%s)\n",
+                               reg_type_str[reg->type]);
+                       return -EINVAL;
+               }
+
+               if (!tnum_in(tnum_const(0), reg->var_off)) {
+                       verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
+                       return -EINVAL;
+               }
+               return 0;
+       }
+
        if (is_subprog) {
                if (reg->type != SCALAR_VALUE) {
                        verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
@@ -9326,8 +9586,12 @@ static int visit_func_call_insn(int t, int insn_cnt,
                init_explored_state(env, t + 1);
        if (visit_callee) {
                init_explored_state(env, t);
-               ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
-                               env, false);
+               ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
+                               /* It's ok to allow recursion from CFG point of
+                                * view. __check_func_call() will do the actual
+                                * check.
+                                */
+                               bpf_pseudo_func(insns + t));
        }
        return ret;
 }
@@ -9355,6 +9619,13 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
                return DONE_EXPLORING;
 
        case BPF_CALL:
+               if (insns[t].imm == BPF_FUNC_timer_set_callback)
+                       /* Mark this call insn to trigger is_state_visited() check
+                        * before call itself is processed by __check_func_call().
+                        * Otherwise new async state will be pushed for further
+                        * exploration.
+                        */
+                       init_explored_state(env, t);
                return visit_func_call_insn(t, insn_cnt, insns, env,
                                            insns[t].src_reg == BPF_PSEUDO_CALL);
 
@@ -10363,9 +10634,25 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
                states_cnt++;
                if (sl->state.insn_idx != insn_idx)
                        goto next;
+
                if (sl->state.branches) {
-                       if (states_maybe_looping(&sl->state, cur) &&
-                           states_equal(env, &sl->state, cur)) {
+                       struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
+
+                       if (frame->in_async_callback_fn &&
+                           frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
+                               /* Different async_entry_cnt means that the verifier is
+                                * processing another entry into async callback.
+                                * Seeing the same state is not an indication of infinite
+                                * loop or infinite recursion.
+                                * But finding the same state doesn't mean that it's safe
+                                * to stop processing the current state. The previous state
+                                * hasn't yet reached bpf_exit, since state.branches > 0.
+                                * Checking in_async_callback_fn alone is not enough either.
+                                * Since the verifier still needs to catch infinite loops
+                                * inside async callbacks.
+                                */
+                       } else if (states_maybe_looping(&sl->state, cur) &&
+                                  states_equal(env, &sl->state, cur)) {
                                verbose_linfo(env, insn_idx, "; ");
                                verbose(env, "infinite loop detected at insn %d\n", insn_idx);
                                return -EINVAL;
@@ -11414,10 +11701,11 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
  * [0, off) and [off, end) to new locations, so the patched range stays zero
  */
-static int adjust_insn_aux_data(struct bpf_verifier_env *env,
-                               struct bpf_prog *new_prog, u32 off, u32 cnt)
+static void adjust_insn_aux_data(struct bpf_verifier_env *env,
+                                struct bpf_insn_aux_data *new_data,
+                                struct bpf_prog *new_prog, u32 off, u32 cnt)
 {
-       struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+       struct bpf_insn_aux_data *old_data = env->insn_aux_data;
        struct bpf_insn *insn = new_prog->insnsi;
        u32 old_seen = old_data[off].seen;
        u32 prog_len;
@@ -11430,12 +11718,9 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
        old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
 
        if (cnt == 1)
-               return 0;
+               return;
        prog_len = new_prog->len;
-       new_data = vzalloc(array_size(prog_len,
-                                     sizeof(struct bpf_insn_aux_data)));
-       if (!new_data)
-               return -ENOMEM;
+
        memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
        memcpy(new_data + off + cnt - 1, old_data + off,
               sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
@@ -11446,7 +11731,6 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
        }
        env->insn_aux_data = new_data;
        vfree(old_data);
-       return 0;
 }
 
 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
@@ -11481,6 +11765,14 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
                                            const struct bpf_insn *patch, u32 len)
 {
        struct bpf_prog *new_prog;
+       struct bpf_insn_aux_data *new_data = NULL;
+
+       if (len > 1) {
+               new_data = vzalloc(array_size(env->prog->len + len - 1,
+                                             sizeof(struct bpf_insn_aux_data)));
+               if (!new_data)
+                       return NULL;
+       }
 
        new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
        if (IS_ERR(new_prog)) {
@@ -11488,10 +11780,10 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
                        verbose(env,
                                "insn %d cannot be patched due to 16-bit range\n",
                                env->insn_aux_data[off].orig_idx);
+               vfree(new_data);
                return NULL;
        }
-       if (adjust_insn_aux_data(env, new_prog, off, len))
-               return NULL;
+       adjust_insn_aux_data(env, new_data, new_prog, off, len);
        adjust_subprog_starts(env, off, len);
        adjust_poke_descs(new_prog, off, len);
        return new_prog;
@@ -12008,6 +12300,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                if (is_narrower_load && size < target_size) {
                        u8 shift = bpf_ctx_narrow_access_offset(
                                off, size, size_default) * 8;
+                       if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
+                               verbose(env, "bpf verifier narrow ctx load misconfigured\n");
+                               return -EINVAL;
+                       }
                        if (ctx_field_size <= 4) {
                                if (shift)
                                        insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
@@ -12096,7 +12392,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                subprog_end = env->subprog_info[i + 1].start;
 
                len = subprog_end - subprog_start;
-               /* BPF_PROG_RUN doesn't call subprogs directly,
+               /* bpf_prog_run() doesn't call subprogs directly,
                 * hence main prog stats include the runtime of subprogs.
                 * subprogs don't have IDs and not reachable via prog_get_next_id
                 * func[i]->stats will never be accessed and stays NULL
@@ -12342,6 +12638,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 {
        struct bpf_prog *prog = env->prog;
        bool expect_blinding = bpf_jit_blinding_enabled(prog);
+       enum bpf_prog_type prog_type = resolve_prog_type(prog);
        struct bpf_insn *insn = prog->insnsi;
        const struct bpf_func_proto *fn;
        const int insn_cnt = prog->len;
@@ -12559,6 +12856,39 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
                        continue;
                }
 
+               if (insn->imm == BPF_FUNC_timer_set_callback) {
+                       /* The verifier will process callback_fn as many times as necessary
+                        * with different maps and the register states prepared by
+                        * set_timer_callback_state will be accurate.
+                        *
+                        * The following use case is valid:
+                        *   map1 is shared by prog1, prog2, prog3.
+                        *   prog1 calls bpf_timer_init for some map1 elements
+                        *   prog2 calls bpf_timer_set_callback for some map1 elements.
+                        *     Those that were not bpf_timer_init-ed will return -EINVAL.
+                        *   prog3 calls bpf_timer_start for some map1 elements.
+                        *     Those that were not both bpf_timer_init-ed and
+                        *     bpf_timer_set_callback-ed will return -EINVAL.
+                        */
+                       struct bpf_insn ld_addrs[2] = {
+                               BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
+                       };
+
+                       insn_buf[0] = ld_addrs[0];
+                       insn_buf[1] = ld_addrs[1];
+                       insn_buf[2] = *insn;
+                       cnt = 3;
+
+                       new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+                       if (!new_prog)
+                               return -ENOMEM;
+
+                       delta    += cnt - 1;
+                       env->prog = prog = new_prog;
+                       insn      = new_prog->insnsi + i + delta;
+                       goto patch_call_imm;
+               }
+
                /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
                 * and other inlining handlers are currently limited to 64 bit
                 * only.
@@ -12675,6 +13005,21 @@ patch_map_ops_generic:
                        continue;
                }
 
+               /* Implement bpf_get_func_ip inline. */
+               if (prog_type == BPF_PROG_TYPE_TRACING &&
+                   insn->imm == BPF_FUNC_get_func_ip) {
+                       /* Load IP address from ctx - 8 */
+                       insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
+
+                       new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
+                       if (!new_prog)
+                               return -ENOMEM;
+
+                       env->prog = prog = new_prog;
+                       insn      = new_prog->insnsi + i + delta;
+                       continue;
+               }
+
 patch_call_imm:
                fn = env->ops->get_func_proto(insn->imm, env->prog);
                /* all functions that have prototype and verifier allowed
index 1cb1f9b..011cc50 100644 (file)
@@ -4697,7 +4697,6 @@ errout:
 }
 
 static void perf_event_free_filter(struct perf_event *event);
-static void perf_event_free_bpf_prog(struct perf_event *event);
 
 static void free_event_rcu(struct rcu_head *head)
 {
@@ -5574,7 +5573,6 @@ static inline int perf_fget_light(int fd, struct fd *p)
 static int perf_event_set_output(struct perf_event *event,
                                 struct perf_event *output_event);
 static int perf_event_set_filter(struct perf_event *event, void __user *arg);
-static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
 static int perf_copy_attr(struct perf_event_attr __user *uattr,
                          struct perf_event_attr *attr);
 
@@ -5637,7 +5635,22 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
                return perf_event_set_filter(event, (void __user *)arg);
 
        case PERF_EVENT_IOC_SET_BPF:
-               return perf_event_set_bpf_prog(event, arg);
+       {
+               struct bpf_prog *prog;
+               int err;
+
+               prog = bpf_prog_get(arg);
+               if (IS_ERR(prog))
+                       return PTR_ERR(prog);
+
+               err = perf_event_set_bpf_prog(event, prog, 0);
+               if (err) {
+                       bpf_prog_put(prog);
+                       return err;
+               }
+
+               return 0;
+       }
 
        case PERF_EVENT_IOC_PAUSE_OUTPUT: {
                struct perf_buffer *rb;
@@ -9907,13 +9920,16 @@ static void bpf_overflow_handler(struct perf_event *event,
                .data = data,
                .event = event,
        };
+       struct bpf_prog *prog;
        int ret = 0;
 
        ctx.regs = perf_arch_bpf_user_pt_regs(regs);
        if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
                goto out;
        rcu_read_lock();
-       ret = BPF_PROG_RUN(event->prog, &ctx);
+       prog = READ_ONCE(event->prog);
+       if (prog)
+               ret = bpf_prog_run(prog, &ctx);
        rcu_read_unlock();
 out:
        __this_cpu_dec(bpf_prog_active);
@@ -9923,10 +9939,10 @@ out:
        event->orig_overflow_handler(event, data, regs);
 }
 
-static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+static int perf_event_set_bpf_handler(struct perf_event *event,
+                                     struct bpf_prog *prog,
+                                     u64 bpf_cookie)
 {
-       struct bpf_prog *prog;
-
        if (event->overflow_handler_context)
                /* hw breakpoint or kernel counter */
                return -EINVAL;
@@ -9934,9 +9950,8 @@ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
        if (event->prog)
                return -EEXIST;
 
-       prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
-       if (IS_ERR(prog))
-               return PTR_ERR(prog);
+       if (prog->type != BPF_PROG_TYPE_PERF_EVENT)
+               return -EINVAL;
 
        if (event->attr.precise_ip &&
            prog->call_get_stack &&
@@ -9952,11 +9967,11 @@ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
                 * attached to perf_sample_data, do not allow attaching BPF
                 * program that calls bpf_get_[stack|stackid].
                 */
-               bpf_prog_put(prog);
                return -EPROTO;
        }
 
        event->prog = prog;
+       event->bpf_cookie = bpf_cookie;
        event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
        WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
        return 0;
@@ -9974,7 +9989,9 @@ static void perf_event_free_bpf_handler(struct perf_event *event)
        bpf_prog_put(prog);
 }
 #else
-static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+static int perf_event_set_bpf_handler(struct perf_event *event,
+                                     struct bpf_prog *prog,
+                                     u64 bpf_cookie)
 {
        return -EOPNOTSUPP;
 }
@@ -10002,14 +10019,13 @@ static inline bool perf_event_is_tracing(struct perf_event *event)
        return false;
 }
 
-static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
+                           u64 bpf_cookie)
 {
        bool is_kprobe, is_tracepoint, is_syscall_tp;
-       struct bpf_prog *prog;
-       int ret;
 
        if (!perf_event_is_tracing(event))
-               return perf_event_set_bpf_handler(event, prog_fd);
+               return perf_event_set_bpf_handler(event, prog, bpf_cookie);
 
        is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
        is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
@@ -10018,41 +10034,27 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
                /* bpf programs can only be attached to u/kprobe or tracepoint */
                return -EINVAL;
 
-       prog = bpf_prog_get(prog_fd);
-       if (IS_ERR(prog))
-               return PTR_ERR(prog);
-
        if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
            (is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
-           (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
-               /* valid fd, but invalid bpf program type */
-               bpf_prog_put(prog);
+           (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT))
                return -EINVAL;
-       }
 
        /* Kprobe override only works for kprobes, not uprobes. */
        if (prog->kprobe_override &&
-           !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
-               bpf_prog_put(prog);
+           !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
                return -EINVAL;
-       }
 
        if (is_tracepoint || is_syscall_tp) {
                int off = trace_event_get_offsets(event->tp_event);
 
-               if (prog->aux->max_ctx_offset > off) {
-                       bpf_prog_put(prog);
+               if (prog->aux->max_ctx_offset > off)
                        return -EACCES;
-               }
        }
 
-       ret = perf_event_attach_bpf_prog(event, prog);
-       if (ret)
-               bpf_prog_put(prog);
-       return ret;
+       return perf_event_attach_bpf_prog(event, prog, bpf_cookie);
 }
 
-static void perf_event_free_bpf_prog(struct perf_event *event)
+void perf_event_free_bpf_prog(struct perf_event *event)
 {
        if (!perf_event_is_tracing(event)) {
                perf_event_free_bpf_handler(event);
@@ -10071,12 +10073,13 @@ static void perf_event_free_filter(struct perf_event *event)
 {
 }
 
-static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
+                           u64 bpf_cookie)
 {
        return -ENOENT;
 }
 
-static void perf_event_free_bpf_prog(struct perf_event *event)
+void perf_event_free_bpf_prog(struct perf_event *event)
 {
 }
 #endif /* CONFIG_EVENT_TRACING */
index 44f4c2d..c97e852 100644 (file)
@@ -2083,6 +2083,7 @@ static __latent_entropy struct task_struct *copy_process(
 #endif
 #ifdef CONFIG_BPF_SYSCALL
        RCU_INIT_POINTER(p->bpf_storage, NULL);
+       p->bpf_ctx = NULL;
 #endif
 
        /* Perform scheduler related setup. Assign this task to a CPU. */
index fdd1407..8e2eb95 100644 (file)
@@ -124,7 +124,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
         * out of events when it was updated in between this and the
         * rcu_dereference() which is accepted risk.
         */
-       ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
+       ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run);
 
  out:
        __this_cpu_dec(bpf_prog_active);
@@ -714,13 +714,28 @@ BPF_CALL_0(bpf_get_current_task_btf)
        return (unsigned long) current;
 }
 
-BTF_ID_LIST_SINGLE(bpf_get_current_btf_ids, struct, task_struct)
-
-static const struct bpf_func_proto bpf_get_current_task_btf_proto = {
+const struct bpf_func_proto bpf_get_current_task_btf_proto = {
        .func           = bpf_get_current_task_btf,
        .gpl_only       = true,
        .ret_type       = RET_PTR_TO_BTF_ID,
-       .ret_btf_id     = &bpf_get_current_btf_ids[0],
+       .ret_btf_id     = &btf_task_struct_ids[0],
+};
+
+BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task)
+{
+       return (unsigned long) task_pt_regs(task);
+}
+
+BTF_ID_LIST(bpf_task_pt_regs_ids)
+BTF_ID(struct, pt_regs)
+
+const struct bpf_func_proto bpf_task_pt_regs_proto = {
+       .func           = bpf_task_pt_regs,
+       .gpl_only       = true,
+       .arg1_type      = ARG_PTR_TO_BTF_ID,
+       .arg1_btf_id    = &btf_task_struct_ids[0],
+       .ret_type       = RET_PTR_TO_BTF_ID,
+       .ret_btf_id     = &bpf_task_pt_regs_ids[0],
 };
 
 BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
@@ -948,7 +963,61 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
        .arg5_type      = ARG_ANYTHING,
 };
 
-const struct bpf_func_proto *
+BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx)
+{
+       /* This helper call is inlined by verifier. */
+       return ((u64 *)ctx)[-1];
+}
+
+static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
+       .func           = bpf_get_func_ip_tracing,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
+{
+       struct kprobe *kp = kprobe_running();
+
+       return kp ? (uintptr_t)kp->addr : 0;
+}
+
+static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
+       .func           = bpf_get_func_ip_kprobe,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
+{
+       struct bpf_trace_run_ctx *run_ctx;
+
+       run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
+       return run_ctx->bpf_cookie;
+}
+
+static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = {
+       .func           = bpf_get_attach_cookie_trace,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx)
+{
+       return ctx->event->bpf_cookie;
+}
+
+static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
+       .func           = bpf_get_attach_cookie_pe,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+static const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
        switch (func_id) {
@@ -978,6 +1047,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_get_current_task_proto;
        case BPF_FUNC_get_current_task_btf:
                return &bpf_get_current_task_btf_proto;
+       case BPF_FUNC_task_pt_regs:
+               return &bpf_task_pt_regs_proto;
        case BPF_FUNC_get_current_uid_gid:
                return &bpf_get_current_uid_gid_proto;
        case BPF_FUNC_get_current_comm:
@@ -1059,8 +1130,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_for_each_map_elem_proto;
        case BPF_FUNC_snprintf:
                return &bpf_snprintf_proto;
+       case BPF_FUNC_get_func_ip:
+               return &bpf_get_func_ip_proto_tracing;
        default:
-               return NULL;
+               return bpf_base_func_proto(func_id);
        }
 }
 
@@ -1078,6 +1151,10 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        case BPF_FUNC_override_return:
                return &bpf_override_return_proto;
 #endif
+       case BPF_FUNC_get_func_ip:
+               return &bpf_get_func_ip_proto_kprobe;
+       case BPF_FUNC_get_attach_cookie:
+               return &bpf_get_attach_cookie_proto_trace;
        default:
                return bpf_tracing_func_proto(func_id, prog);
        }
@@ -1188,6 +1265,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_get_stackid_proto_tp;
        case BPF_FUNC_get_stack:
                return &bpf_get_stack_proto_tp;
+       case BPF_FUNC_get_attach_cookie:
+               return &bpf_get_attach_cookie_proto_trace;
        default:
                return bpf_tracing_func_proto(func_id, prog);
        }
@@ -1295,6 +1374,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_perf_prog_read_value_proto;
        case BPF_FUNC_read_branch_records:
                return &bpf_read_branch_records_proto;
+       case BPF_FUNC_get_attach_cookie:
+               return &bpf_get_attach_cookie_proto_pe;
        default:
                return bpf_tracing_func_proto(func_id, prog);
        }
@@ -1431,6 +1512,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 const struct bpf_func_proto *
 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
+       const struct bpf_func_proto *fn;
+
        switch (func_id) {
 #ifdef CONFIG_NET
        case BPF_FUNC_skb_output:
@@ -1471,7 +1554,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
        case BPF_FUNC_d_path:
                return &bpf_d_path_proto;
        default:
-               return raw_tp_prog_func_proto(func_id, prog);
+               fn = raw_tp_prog_func_proto(func_id, prog);
+               if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
+                       fn = bpf_iter_get_func_proto(func_id, prog);
+               return fn;
        }
 }
 
@@ -1639,7 +1725,8 @@ static DEFINE_MUTEX(bpf_event_mutex);
 #define BPF_TRACE_MAX_PROGS 64
 
 int perf_event_attach_bpf_prog(struct perf_event *event,
-                              struct bpf_prog *prog)
+                              struct bpf_prog *prog,
+                              u64 bpf_cookie)
 {
        struct bpf_prog_array *old_array;
        struct bpf_prog_array *new_array;
@@ -1666,12 +1753,13 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
                goto unlock;
        }
 
-       ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
+       ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array);
        if (ret < 0)
                goto unlock;
 
        /* set the new array to event->tp_event and set event->prog */
        event->prog = prog;
+       event->bpf_cookie = bpf_cookie;
        rcu_assign_pointer(event->tp_event->prog_array, new_array);
        bpf_prog_array_free(old_array);
 
@@ -1692,7 +1780,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
                goto unlock;
 
        old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
-       ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
+       ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
        if (ret == -ENOENT)
                goto unlock;
        if (ret < 0) {
@@ -1780,7 +1868,7 @@ void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
 {
        cant_sleep();
        rcu_read_lock();
-       (void) BPF_PROG_RUN(prog, args);
+       (void) bpf_prog_run(prog, args);
        rcu_read_unlock();
 }
 
index d500320..830a18e 100644 (file)
@@ -461,6 +461,41 @@ static int bpf_fill_stxdw(struct bpf_test *self)
        return __bpf_fill_stxdw(self, BPF_DW);
 }
 
+static int bpf_fill_long_jmp(struct bpf_test *self)
+{
+       unsigned int len = BPF_MAXINSNS;
+       struct bpf_insn *insn;
+       int i;
+
+       insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
+       if (!insn)
+               return -ENOMEM;
+
+       insn[0] = BPF_ALU64_IMM(BPF_MOV, R0, 1);
+       insn[1] = BPF_JMP_IMM(BPF_JEQ, R0, 1, len - 2 - 1);
+
+       /*
+        * Fill with a complex 64-bit operation that expands to a lot of
+        * instructions on 32-bit JITs. The large jump offset can then
+        * overflow the conditional branch field size, triggering a branch
+        * conversion mechanism in some JITs.
+        *
+        * Note: BPF_MAXINSNS of ALU64 MUL is enough to trigger such branch
+        * conversion on the 32-bit MIPS JIT. For other JITs, the instruction
+        * count and/or operation may need to be modified to trigger the
+        * branch conversion.
+        */
+       for (i = 2; i < len - 1; i++)
+               insn[i] = BPF_ALU64_IMM(BPF_MUL, R0, (i << 16) + i);
+
+       insn[len - 1] = BPF_EXIT_INSN();
+
+       self->u.ptr.insns = insn;
+       self->u.ptr.len = len;
+
+       return 0;
+}
+
 static struct bpf_test tests[] = {
        {
                "TAX",
@@ -1916,6 +1951,163 @@ static struct bpf_test tests[] = {
                { },
                { { 0, -1 } }
        },
+       {
+               /*
+                * Register (non-)clobbering test, in the case where a 32-bit
+                * JIT implements complex ALU64 operations via function calls.
+                * If so, the function call must be invisible in the eBPF
+                * registers. The JIT must then save and restore relevant
+                * registers during the call. The following tests check that
+                * the eBPF registers retain their values after such a call.
+                */
+               "INT: Register clobbering, R1 updated",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 123456789),
+                       BPF_ALU32_IMM(BPF_MOV, R2, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 3),
+                       BPF_ALU32_IMM(BPF_MOV, R4, 4),
+                       BPF_ALU32_IMM(BPF_MOV, R5, 5),
+                       BPF_ALU32_IMM(BPF_MOV, R6, 6),
+                       BPF_ALU32_IMM(BPF_MOV, R7, 7),
+                       BPF_ALU32_IMM(BPF_MOV, R8, 8),
+                       BPF_ALU32_IMM(BPF_MOV, R9, 9),
+                       BPF_ALU64_IMM(BPF_DIV, R1, 123456789),
+                       BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
+                       BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
+                       BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
+                       BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
+                       BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
+                       BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
+                       BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
+                       BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
+                       BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
+                       BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "INT: Register clobbering, R2 updated",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R2, 2 * 123456789),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 3),
+                       BPF_ALU32_IMM(BPF_MOV, R4, 4),
+                       BPF_ALU32_IMM(BPF_MOV, R5, 5),
+                       BPF_ALU32_IMM(BPF_MOV, R6, 6),
+                       BPF_ALU32_IMM(BPF_MOV, R7, 7),
+                       BPF_ALU32_IMM(BPF_MOV, R8, 8),
+                       BPF_ALU32_IMM(BPF_MOV, R9, 9),
+                       BPF_ALU64_IMM(BPF_DIV, R2, 123456789),
+                       BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
+                       BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
+                       BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
+                       BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
+                       BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
+                       BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
+                       BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
+                       BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
+                       BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
+                       BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               /*
+                * Test 32-bit JITs that implement complex ALU64 operations as
+                * function calls R0 = f(R1, R2), and must re-arrange operands.
+                */
+#define NUMER 0xfedcba9876543210ULL
+#define DENOM 0x0123456789abcdefULL
+               "ALU64_DIV X: Operand register permutations",
+               .u.insns_int = {
+                       /* R0 / R2 */
+                       BPF_LD_IMM64(R0, NUMER),
+                       BPF_LD_IMM64(R2, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R0, R2),
+                       BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R1 / R0 */
+                       BPF_LD_IMM64(R1, NUMER),
+                       BPF_LD_IMM64(R0, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R1, R0),
+                       BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R0 / R1 */
+                       BPF_LD_IMM64(R0, NUMER),
+                       BPF_LD_IMM64(R1, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R0, R1),
+                       BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R2 / R0 */
+                       BPF_LD_IMM64(R2, NUMER),
+                       BPF_LD_IMM64(R0, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R2, R0),
+                       BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R2 / R1 */
+                       BPF_LD_IMM64(R2, NUMER),
+                       BPF_LD_IMM64(R1, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R2, R1),
+                       BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R1 / R2 */
+                       BPF_LD_IMM64(R1, NUMER),
+                       BPF_LD_IMM64(R2, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R1, R2),
+                       BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R1 / R1 */
+                       BPF_LD_IMM64(R1, NUMER),
+                       BPF_ALU64_REG(BPF_DIV, R1, R1),
+                       BPF_JMP_IMM(BPF_JEQ, R1, 1, 1),
+                       BPF_EXIT_INSN(),
+                       /* R2 / R2 */
+                       BPF_LD_IMM64(R2, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R2, R2),
+                       BPF_JMP_IMM(BPF_JEQ, R2, 1, 1),
+                       BPF_EXIT_INSN(),
+                       /* R3 / R4 */
+                       BPF_LD_IMM64(R3, NUMER),
+                       BPF_LD_IMM64(R4, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R3, R4),
+                       BPF_JMP_IMM(BPF_JEQ, R3, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* Successful return */
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+#undef NUMER
+#undef DENOM
+       },
+#ifdef CONFIG_32BIT
+       {
+               "INT: 32-bit context pointer word order and zero-extension",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_JMP32_IMM(BPF_JEQ, R1, 0, 3),
+                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
+                       BPF_JMP32_IMM(BPF_JNE, R1, 0, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+#endif
        {
                "check: missing ret",
                .u.insns = {
@@ -2360,6 +2552,48 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
+       {
+               "ALU_MOV_K: small negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       {
+               "ALU_MOV_K: small negative zero extension",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU_MOV_K: large negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123456789),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123456789 } }
+       },
+       {
+               "ALU_MOV_K: large negative zero extension",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123456789),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
        {
                "ALU64_MOV_K: dst = 2",
                .u.insns_int = {
@@ -2412,6 +2646,48 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
+       {
+               "ALU64_MOV_K: small negative",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       {
+               "ALU64_MOV_K: small negative sign extension",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } }
+       },
+       {
+               "ALU64_MOV_K: large negative",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123456789),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123456789 } }
+       },
+       {
+               "ALU64_MOV_K: large negative sign extension",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123456789),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } }
+       },
        /* BPF_ALU | BPF_ADD | BPF_X */
        {
                "ALU_ADD_X: 1 + 2 = 3",
@@ -2967,6 +3243,31 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 2147483647 } },
        },
+       {
+               "ALU64_MUL_X: 64x64 multiply, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0fedcba987654321LL),
+                       BPF_LD_IMM64(R1, 0x123456789abcdef0LL),
+                       BPF_ALU64_REG(BPF_MUL, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xe5618cf0 } }
+       },
+       {
+               "ALU64_MUL_X: 64x64 multiply, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0fedcba987654321LL),
+                       BPF_LD_IMM64(R1, 0x123456789abcdef0LL),
+                       BPF_ALU64_REG(BPF_MUL, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x2236d88f } }
+       },
        /* BPF_ALU | BPF_MUL | BPF_K */
        {
                "ALU_MUL_K: 2 * 3 = 6",
@@ -3077,6 +3378,29 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
+       {
+               "ALU64_MUL_K: 64x32 multiply, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xe242d208 } }
+       },
+       {
+               "ALU64_MUL_K: 64x32 multiply, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xc28f5c28 } }
+       },
        /* BPF_ALU | BPF_DIV | BPF_X */
        {
                "ALU_DIV_X: 6 / 2 = 3",
@@ -3430,6 +3754,44 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0xffffffff } },
        },
+       {
+               "ALU_AND_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_AND, R0, 15),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 4 } }
+       },
+       {
+               "ALU_AND_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4),
+                       BPF_ALU32_IMM(BPF_AND, R0, 0xafbfcfdf),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xa1b2c3d4 } }
+       },
+       {
+               "ALU_AND_K: Zero extension",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x0000000080a0c0e0LL),
+                       BPF_ALU32_IMM(BPF_AND, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
        {
                "ALU64_AND_K: 3 & 2 = 2",
                .u.insns_int = {
@@ -3453,7 +3815,7 @@ static struct bpf_test tests[] = {
                { { 0, 0xffffffff } },
        },
        {
-               "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000",
+               "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000000000000000",
                .u.insns_int = {
                        BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
                        BPF_LD_IMM64(R3, 0x0000000000000000LL),
@@ -3469,7 +3831,7 @@ static struct bpf_test tests[] = {
                { { 0, 0x1 } },
        },
        {
-               "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff",
+               "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffff0000",
                .u.insns_int = {
                        BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
                        BPF_LD_IMM64(R3, 0x0000ffffffff0000LL),
@@ -3500,6 +3862,38 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
+       {
+               "ALU64_AND_K: Sign extension 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x00000000090b0d0fLL),
+                       BPF_ALU64_IMM(BPF_AND, R0, 0x0f0f0f0f),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "ALU64_AND_K: Sign extension 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x0123456780a0c0e0LL),
+                       BPF_ALU64_IMM(BPF_AND, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
        /* BPF_ALU | BPF_OR | BPF_X */
        {
                "ALU_OR_X: 1 | 2 = 3",
@@ -3572,6 +3966,44 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0xffffffff } },
        },
+       {
+               "ALU_OR_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_OR, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01020305 } }
+       },
+       {
+               "ALU_OR_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_OR, R0, 0xa0b0c0d0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xa1b2c3d4 } }
+       },
+       {
+               "ALU_OR_K: Zero extension",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x00000000f9fbfdffLL),
+                       BPF_ALU32_IMM(BPF_OR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
        {
                "ALU64_OR_K: 1 | 2 = 3",
                .u.insns_int = {
@@ -3595,7 +4027,7 @@ static struct bpf_test tests[] = {
                { { 0, 0xffffffff } },
        },
        {
-               "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000",
+               "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffffffff0000",
                .u.insns_int = {
                        BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
                        BPF_LD_IMM64(R3, 0x0000ffffffff0000LL),
@@ -3642,9 +4074,41 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
-       /* BPF_ALU | BPF_XOR | BPF_X */
        {
-               "ALU_XOR_X: 5 ^ 6 = 3",
+               "ALU64_OR_K: Sign extension 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x012345678fafcfefLL),
+                       BPF_ALU64_IMM(BPF_OR, R0, 0x0f0f0f0f),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "ALU64_OR_K: Sign extension 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0xfffffffff9fbfdffLL),
+                       BPF_ALU64_IMM(BPF_OR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       /* BPF_ALU | BPF_XOR | BPF_X */
+       {
+               "ALU_XOR_X: 5 ^ 6 = 3",
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 5),
                        BPF_ALU32_IMM(BPF_MOV, R1, 6),
@@ -3714,6 +4178,44 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0xfffffffe } },
        },
+       {
+               "ALU_XOR_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_XOR, R0, 15),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x0102030b } }
+       },
+       {
+               "ALU_XOR_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4),
+                       BPF_ALU32_IMM(BPF_XOR, R0, 0xafbfcfdf),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x5e4d3c2b } }
+       },
+       {
+               "ALU_XOR_K: Zero extension",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x00000000795b3d1fLL),
+                       BPF_ALU32_IMM(BPF_XOR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
        {
                "ALU64_XOR_K: 5 ^ 6 = 3",
                .u.insns_int = {
@@ -3726,7 +4228,7 @@ static struct bpf_test tests[] = {
                { { 0, 3 } },
        },
        {
-               "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe",
+               "ALU64_XOR_K: 1 ^ 0xffffffff = 0xfffffffe",
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 1),
                        BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff),
@@ -3784,6 +4286,38 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x1 } },
        },
+       {
+               "ALU64_XOR_K: Sign extension 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x0123456786a4c2e0LL),
+                       BPF_ALU64_IMM(BPF_XOR, R0, 0x0f0f0f0f),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "ALU64_XOR_K: Sign extension 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0xfedcba98795b3d1fLL),
+                       BPF_ALU64_IMM(BPF_XOR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
        /* BPF_ALU | BPF_LSH | BPF_X */
        {
                "ALU_LSH_X: 1 << 1 = 2",
@@ -3809,6 +4343,18 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x80000000 } },
        },
+       {
+               "ALU_LSH_X: 0x12345678 << 12 = 0x45678000",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU32_REG(BPF_LSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x45678000 } }
+       },
        {
                "ALU64_LSH_X: 1 << 1 = 2",
                .u.insns_int = {
@@ -3833,570 +4379,1993 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 0x80000000 } },
        },
-       /* BPF_ALU | BPF_LSH | BPF_K */
        {
-               "ALU_LSH_K: 1 << 1 = 2",
+               "ALU64_LSH_X: Shift < 32, low word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU32_IMM(BPF_LSH, R0, 1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 2 } },
+               { { 0, 0xbcdef000 } }
        },
        {
-               "ALU_LSH_K: 1 << 31 = 0x80000000",
+               "ALU64_LSH_X: Shift < 32, high word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU32_IMM(BPF_LSH, R0, 31),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x80000000 } },
+               { { 0, 0x3456789a } }
        },
        {
-               "ALU64_LSH_K: 1 << 1 = 2",
+               "ALU64_LSH_X: Shift > 32, low word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU64_IMM(BPF_LSH, R0, 1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 2 } },
+               { { 0, 0 } }
        },
        {
-               "ALU64_LSH_K: 1 << 31 = 0x80000000",
+               "ALU64_LSH_X: Shift > 32, high word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU64_IMM(BPF_LSH, R0, 31),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x80000000 } },
+               { { 0, 0x9abcdef0 } }
        },
-       /* BPF_ALU | BPF_RSH | BPF_X */
        {
-               "ALU_RSH_X: 2 >> 1 = 1",
+               "ALU64_LSH_X: Shift == 32, low word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
-                       BPF_ALU32_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0 } }
        },
        {
-               "ALU_RSH_X: 0x80000000 >> 31 = 1",
+               "ALU64_LSH_X: Shift == 32, high word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
-                       BPF_ALU32_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x89abcdef } }
        },
        {
-               "ALU64_RSH_X: 2 >> 1 = 1",
+               "ALU64_LSH_X: Zero shift, low word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
-                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x89abcdef } }
        },
        {
-               "ALU64_RSH_X: 0x80000000 >> 31 = 1",
+               "ALU64_LSH_X: Zero shift, high word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
-                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x01234567 } }
        },
-       /* BPF_ALU | BPF_RSH | BPF_K */
+       /* BPF_ALU | BPF_LSH | BPF_K */
        {
-               "ALU_RSH_K: 2 >> 1 = 1",
+               "ALU_LSH_K: 1 << 1 = 2",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU32_IMM(BPF_RSH, R0, 1),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 2 } },
        },
        {
-               "ALU_RSH_K: 0x80000000 >> 31 = 1",
+               "ALU_LSH_K: 1 << 31 = 0x80000000",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU32_IMM(BPF_RSH, R0, 31),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 31),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x80000000 } },
        },
        {
-               "ALU64_RSH_K: 2 >> 1 = 1",
+               "ALU_LSH_K: 0x12345678 << 12 = 0x45678000",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU64_IMM(BPF_RSH, R0, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 12),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x45678000 } }
        },
        {
-               "ALU64_RSH_K: 0x80000000 >> 31 = 1",
+               "ALU_LSH_K: 0x12345678 << 0 = 0x12345678",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU64_IMM(BPF_RSH, R0, 31),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 1 } },
+               { { 0, 0x12345678 } }
        },
-       /* BPF_ALU | BPF_ARSH | BPF_X */
        {
-               "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               "ALU64_LSH_K: 1 << 1 = 2",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 40),
-                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffff00ff } },
+               { { 0, 2 } },
        },
-       /* BPF_ALU | BPF_ARSH | BPF_K */
        {
-               "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               "ALU64_LSH_K: 1 << 31 = 0x80000000",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
-                       BPF_ALU64_IMM(BPF_ARSH, R0, 40),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 31),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffff00ff } },
+               { { 0, 0x80000000 } },
        },
-       /* BPF_ALU | BPF_NEG */
        {
-               "ALU_NEG: -(3) = -3",
+               "ALU64_LSH_K: Shift < 32, low word",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 3),
-                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 12),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, -3 } },
+               { { 0, 0xbcdef000 } }
        },
        {
-               "ALU_NEG: -(-3) = 3",
+               "ALU64_LSH_K: Shift < 32, high word",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, -3),
-                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 12),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 3 } },
+               { { 0, 0x3456789a } }
        },
        {
-               "ALU64_NEG: -(3) = -3",
+               "ALU64_LSH_K: Shift > 32, low word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 3),
-                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 36),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, -3 } },
+               { { 0, 0 } }
        },
        {
-               "ALU64_NEG: -(-3) = 3",
+               "ALU64_LSH_K: Shift > 32, high word",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, -3),
-                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 36),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 3 } },
+               { { 0, 0x9abcdef0 } }
        },
-       /* BPF_ALU | BPF_END | BPF_FROM_BE */
        {
-               "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef",
+               "ALU64_LSH_K: Shift == 32, low word",
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_BE, R0, 16),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0,  cpu_to_be16(0xcdef) } },
+               { { 0, 0 } }
        },
        {
-               "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef",
+               "ALU64_LSH_K: Shift == 32, high word",
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_BE, R0, 32),
-                       BPF_ALU64_REG(BPF_MOV, R1, R0),
-                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
-                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_ALU64_IMM(BPF_LSH, R0, 32),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, cpu_to_be32(0x89abcdef) } },
+               { { 0, 0x89abcdef } }
        },
        {
-               "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef",
+               "ALU64_LSH_K: Zero shift",
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_BE, R0, 64),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } },
+               { { 0, 0x89abcdef } }
        },
-       /* BPF_ALU | BPF_END | BPF_FROM_LE */
+       /* BPF_ALU | BPF_RSH | BPF_X */
        {
-               "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd",
+               "ALU_RSH_X: 2 >> 1 = 1",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_LE, R0, 16),
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
+                       BPF_ALU32_REG(BPF_RSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, cpu_to_le16(0xcdef) } },
+               { { 0, 1 } },
        },
        {
-               "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89",
+               "ALU_RSH_X: 0x80000000 >> 31 = 1",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_LE, R0, 32),
-                       BPF_ALU64_REG(BPF_MOV, R1, R0),
-                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
-                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
+                       BPF_ALU32_REG(BPF_RSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, cpu_to_le32(0x89abcdef) } },
+               { { 0, 1 } },
        },
        {
-               "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301",
+               "ALU_RSH_X: 0x12345678 >> 20 = 0x123",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_LE, R0, 64),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 20),
+                       BPF_ALU32_REG(BPF_RSH, R0, R1),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } },
+               { { 0, 0x123 } }
        },
-       /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */
        {
-               "ST_MEM_B: Store/Load byte: max negative",
+               "ALU64_RSH_X: 2 >> 1 = 1",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_B, R10, -40, 0xff),
-                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_X: 0x80000000 >> 31 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_X: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_RSH_X: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x00081234 } }
+       },
+       {
+               "ALU64_RSH_X: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x08123456 } }
+       },
+       {
+               "ALU64_RSH_X: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_X: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_RSH_X: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_X: Zero shift, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       {
+               "ALU64_RSH_X: Zero shift, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       /* BPF_ALU | BPF_RSH | BPF_K */
+       {
+               "ALU_RSH_K: 2 >> 1 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU_RSH_K: 0x80000000 >> 31 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 31),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU_RSH_K: 0x12345678 >> 20 = 0x123",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 20),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x123 } }
+       },
+       {
+               "ALU_RSH_K: 0x12345678 >> 0 = 0x12345678",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x12345678 } }
+       },
+       {
+               "ALU64_RSH_K: 2 >> 1 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_K: 0x80000000 >> 31 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 31),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_K: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 12),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_RSH_K: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 12),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x00081234 } }
+       },
+       {
+               "ALU64_RSH_K: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 36),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x08123456 } }
+       },
+       {
+               "ALU64_RSH_K: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 36),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_K: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_RSH_K: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_K: Zero shift",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       /* BPF_ALU | BPF_ARSH | BPF_X */
+       {
+               "ALU32_ARSH_X: -1234 >> 7 = -10",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 7),
+                       BPF_ALU32_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -10 } }
+       },
+       {
+               "ALU64_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 40),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff00ff } },
+       },
+       {
+               "ALU64_ARSH_X: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_ARSH_X: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfff81234 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xf8123456 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_X: Zero shift, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       {
+               "ALU64_ARSH_X: Zero shift, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       /* BPF_ALU | BPF_ARSH | BPF_K */
+       {
+               "ALU32_ARSH_K: -1234 >> 7 = -10",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+                       BPF_ALU32_IMM(BPF_ARSH, R0, 7),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -10 } }
+       },
+       {
+               "ALU32_ARSH_K: -1234 >> 0 = -1234",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+                       BPF_ALU32_IMM(BPF_ARSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1234 } }
+       },
+       {
+               "ALU64_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff00ff } },
+       },
+       {
+               "ALU64_ARSH_K: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 12),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_ARSH_K: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 12),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfff81234 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 36),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xf8123456 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0xf123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 36),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 32),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_K: Zero shift",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       /* BPF_ALU | BPF_NEG */
+       {
+               "ALU_NEG: -(3) = -3",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 3),
+                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -3 } },
+       },
+       {
+               "ALU_NEG: -(-3) = 3",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -3),
+                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 3 } },
+       },
+       {
+               "ALU64_NEG: -(3) = -3",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 3),
+                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -3 } },
+       },
+       {
+               "ALU64_NEG: -(-3) = 3",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, -3),
+                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 3 } },
+       },
+       /* BPF_ALU | BPF_END | BPF_FROM_BE */
+       {
+               "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_BE, R0, 16),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0,  cpu_to_be16(0xcdef) } },
+       },
+       {
+               "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_BE, R0, 32),
+                       BPF_ALU64_REG(BPF_MOV, R1, R0),
+                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
+                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, cpu_to_be32(0x89abcdef) } },
+       },
+       {
+               "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_BE, R0, 64),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } },
+       },
+       /* BPF_ALU | BPF_END | BPF_FROM_LE */
+       {
+               "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_LE, R0, 16),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, cpu_to_le16(0xcdef) } },
+       },
+       {
+               "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_LE, R0, 32),
+                       BPF_ALU64_REG(BPF_MOV, R1, R0),
+                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
+                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, cpu_to_le32(0x89abcdef) } },
+       },
+       {
+               "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_LE, R0, 64),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } },
+       },
+       /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */
+       {
+               "ST_MEM_B: Store/Load byte: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_B, R10, -40, 0xff),
+                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_B: Store/Load byte: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_H, R10, -40, 0x7f),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7f } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_B: Store/Load byte: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffLL),
+                       BPF_STX_MEM(BPF_B, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_H: Store/Load half word: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_H, R10, -40, 0xffff),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_H: Store/Load half word: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_H, R10, -40, 0x7fff),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7fff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_H: Store/Load half word: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffffLL),
+                       BPF_STX_MEM(BPF_H, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_W: Store/Load word: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_W: Store/Load word: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7fffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_W: Store/Load word: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffffffffLL),
+                       BPF_STX_MEM(BPF_W, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_DW: Store/Load double word: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_DW: Store/Load double word: max negative 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R2, 0xffff00000000ffffLL),
+                       BPF_LD_IMM64(R3, 0xffffffffffffffffLL),
+                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
+                       BPF_LDX_MEM(BPF_DW, R2, R10, -40),
+                       BPF_JMP_REG(BPF_JEQ, R2, R3, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x1 } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_DW: Store/Load double word: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7fffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_DW: Store/Load double word: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffffffffffffffffLL),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_DW: Store double word: first word in memory",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefLL),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+#ifdef __BIG_ENDIAN
+               { { 0, 0x01234567 } },
+#else
+               { { 0, 0x89abcdef } },
+#endif
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_DW: Store double word: second word in memory",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefLL),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -36),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+#ifdef __BIG_ENDIAN
+               { { 0, 0x89abcdef } },
+#else
+               { { 0, 0x01234567 } },
+#endif
+               .stack_depth = 40,
+       },
+       /* BPF_STX | BPF_ATOMIC | BPF_W/DW */
+       {
+               "STX_XADD_W: X + 1 + 1 + 1 + ...",
+               { },
+               INTERNAL,
+               { },
+               { { 0, 4134 } },
+               .fill_helper = bpf_fill_stxw,
+       },
+       {
+               "STX_XADD_DW: X + 1 + 1 + 1 + ...",
+               { },
+               INTERNAL,
+               { },
+               { { 0, 4134 } },
+               .fill_helper = bpf_fill_stxdw,
+       },
+       /*
+        * Exhaustive tests of atomic operation variants.
+        * Individual tests are expanded from template macros for all
+        * combinations of ALU operation, word size and fetching.
+        */
+#define BPF_ATOMIC_OP_TEST1(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test: "                      \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU32_IMM(BPF_MOV, R5, update),                     \
+               BPF_ST_MEM(width, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R5, -40),                 \
+               BPF_LDX_MEM(width, R0, R10, -40),                       \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, result } },                                              \
+       .stack_depth = 40,                                              \
+}
+#define BPF_ATOMIC_OP_TEST2(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test side effects, r10: "    \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU64_REG(BPF_MOV, R1, R10),                        \
+               BPF_ALU32_IMM(BPF_MOV, R0, update),                     \
+               BPF_ST_MEM(BPF_W, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R0, -40),                 \
+               BPF_ALU64_REG(BPF_MOV, R0, R10),                        \
+               BPF_ALU64_REG(BPF_SUB, R0, R1),                         \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, 0 } },                                                   \
+       .stack_depth = 40,                                              \
+}
+#define BPF_ATOMIC_OP_TEST3(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test side effects, r0: "     \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU64_REG(BPF_MOV, R0, R10),                        \
+               BPF_ALU32_IMM(BPF_MOV, R1, update),                     \
+               BPF_ST_MEM(width, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R1, -40),                 \
+               BPF_ALU64_REG(BPF_SUB, R0, R10),                        \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, 0 } },                                                   \
+       .stack_depth = 40,                                              \
+}
+#define BPF_ATOMIC_OP_TEST4(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test fetch: "                \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU32_IMM(BPF_MOV, R3, update),                     \
+               BPF_ST_MEM(width, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R3, -40),                 \
+               BPF_ALU64_REG(BPF_MOV, R0, R3),                         \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, (op) & BPF_FETCH ? old : update } },                     \
+       .stack_depth = 40,                                              \
+}
+       /* BPF_ATOMIC | BPF_W: BPF_ADD */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_W: BPF_ADD | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_DW: BPF_ADD */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_DW: BPF_ADD | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_W: BPF_AND */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_W: BPF_AND | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_DW: BPF_AND */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_DW: BPF_AND | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_W: BPF_OR */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_W: BPF_OR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_DW: BPF_OR */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_DW: BPF_OR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_W: BPF_XOR */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_W: BPF_XOR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_DW: BPF_XOR */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_DW: BPF_XOR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_W: BPF_XCHG */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       /* BPF_ATOMIC | BPF_DW: BPF_XCHG */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+#undef BPF_ATOMIC_OP_TEST1
+#undef BPF_ATOMIC_OP_TEST2
+#undef BPF_ATOMIC_OP_TEST3
+#undef BPF_ATOMIC_OP_TEST4
+       /* BPF_ATOMIC | BPF_W, BPF_CMPXCHG */
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful return",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01234567 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful store",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure return",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01234567 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure store",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01234567 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test side effects",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_ALU32_REG(BPF_MOV, R0, R3),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } },
+               .stack_depth = 40,
+       },
+       /* BPF_ATOMIC | BPF_DW, BPF_CMPXCHG */
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful return",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful store",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_STX_MEM(BPF_DW, R10, R0, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R2),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure return",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_ALU64_IMM(BPF_ADD, R0, 1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure store",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_ALU64_IMM(BPF_ADD, R0, 1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test side effects",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_LD_IMM64(R0, 0xfecdba9876543210ULL),
+                       BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R2),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       /* BPF_JMP32 | BPF_JEQ | BPF_K */
+       {
+               "JMP32_JEQ_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 321, 1),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 123 } }
+       },
+       {
+               "JMP32_JEQ_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 12345678),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 12345678 & 0xffff, 1),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 12345678, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 12345678 } }
+       },
+       {
+               "JMP32_JEQ_K: negative immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JEQ, R0,  123, 1),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, -123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       /* BPF_JMP32 | BPF_JEQ | BPF_X */
+       {
+               "JMP32_JEQ_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1234),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 4321),
+                       BPF_JMP32_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1234),
+                       BPF_JMP32_REG(BPF_JEQ, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1234 } }
+       },
+       /* BPF_JMP32 | BPF_JNE | BPF_K */
+       {
+               "JMP32_JNE_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 123, 1),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 321, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 123 } }
+       },
+       {
+               "JMP32_JNE_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 12345678),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 12345678, 1),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 12345678 & 0xffff, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 12345678 } }
+       },
+       {
+               "JMP32_JNE_K: negative immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JNE, R0, -123, 1),
+                       BPF_JMP32_IMM(BPF_JNE, R0,  123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       /* BPF_JMP32 | BPF_JNE | BPF_X */
+       {
+               "JMP32_JNE_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1234),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1234),
+                       BPF_JMP32_REG(BPF_JNE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 4321),
+                       BPF_JMP32_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1234 } }
+       },
+       /* BPF_JMP32 | BPF_JSET | BPF_K */
+       {
+               "JMP32_JSET_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 2, 1),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 3, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "JMP32_JSET_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x40000000),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 0x3fffffff, 1),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 0x60000000, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x40000000 } }
+       },
+       {
+               "JMP32_JSET_K: negative immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSET, R0, -1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       /* BPF_JMP32 | BPF_JSET | BPF_X */
+       {
+               "JMP32_JSET_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 8),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 7),
+                       BPF_JMP32_REG(BPF_JSET, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 8 | 2),
+                       BPF_JMP32_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 8 } }
+       },
+       /* BPF_JMP32 | BPF_JGT | BPF_K */
+       {
+               "JMP32_JGT_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 123, 1),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 122, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 123 } }
+       },
+       {
+               "JMP32_JGT_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 0xffffffff, 1),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 0xfffffffd, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfffffffe } }
+       },
+       /* BPF_JMP32 | BPF_JGT | BPF_X */
+       {
+               "JMP32_JGT_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+                       BPF_JMP32_REG(BPF_JGT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+                       BPF_JMP32_REG(BPF_JGT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfffffffe } }
+       },
+       /* BPF_JMP32 | BPF_JGE | BPF_K */
+       {
+               "JMP32_JGE_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 124, 1),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xff } },
-               .stack_depth = 40,
+               { { 0, 123 } }
        },
        {
-               "ST_MEM_B: Store/Load byte: max positive",
+               "JMP32_JGE_K: Large immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_H, R10, -40, 0x7f),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 0xffffffff, 1),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 0xfffffffe, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x7f } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JGE | BPF_X */
        {
-               "STX_MEM_B: Store/Load byte: max negative",
+               "JMP32_JGE_X",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffLL),
-                       BPF_STX_MEM(BPF_B, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+                       BPF_JMP32_REG(BPF_JGE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe),
+                       BPF_JMP32_REG(BPF_JGE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JLT | BPF_K */
        {
-               "ST_MEM_H: Store/Load half word: max negative",
+               "JMP32_JLT_K: Small immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_H, R10, -40, 0xffff),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 123, 1),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 124, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffff } },
-               .stack_depth = 40,
+               { { 0, 123 } }
        },
        {
-               "ST_MEM_H: Store/Load half word: max positive",
+               "JMP32_JLT_K: Large immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_H, R10, -40, 0x7fff),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 0xfffffffd, 1),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 0xffffffff, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x7fff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JLT | BPF_X */
        {
-               "STX_MEM_H: Store/Load half word: max negative",
+               "JMP32_JLT_X",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffffLL),
-                       BPF_STX_MEM(BPF_H, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+                       BPF_JMP32_REG(BPF_JLT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+                       BPF_JMP32_REG(BPF_JLT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JLE | BPF_K */
        {
-               "ST_MEM_W: Store/Load word: max negative",
+               "JMP32_JLE_K: Small immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 122, 1),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, 123 } }
        },
        {
-               "ST_MEM_W: Store/Load word: max positive",
+               "JMP32_JLE_K: Large immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffd, 1),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffe, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x7fffffff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JLE | BPF_X */
        {
-               "STX_MEM_W: Store/Load word: max negative",
+               "JMP32_JLE_X",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffffffffLL),
-                       BPF_STX_MEM(BPF_W, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+                       BPF_JMP32_REG(BPF_JLE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe),
+                       BPF_JMP32_REG(BPF_JLE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
        },
+       /* BPF_JMP32 | BPF_JSGT | BPF_K */
        {
-               "ST_MEM_DW: Store/Load double word: max negative",
+               "JMP32_JSGT_K: Small immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
-                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -123, 1),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -124, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, -123 } }
        },
        {
-               "ST_MEM_DW: Store/Load double word: max negative 2",
+               "JMP32_JSGT_K: Large immediate",
                .u.insns_int = {
-                       BPF_LD_IMM64(R2, 0xffff00000000ffffLL),
-                       BPF_LD_IMM64(R3, 0xffffffffffffffffLL),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
-                       BPF_LDX_MEM(BPF_DW, R2, R10, -40),
-                       BPF_JMP_REG(BPF_JEQ, R2, R3, 2),
-                       BPF_MOV32_IMM(R0, 2),
-                       BPF_EXIT_INSN(),
-                       BPF_MOV32_IMM(R0, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -12345678, 1),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -12345679, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x1 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSGT | BPF_X */
        {
-               "ST_MEM_DW: Store/Load double word: max positive",
+               "JMP32_JSGT_X",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff),
-                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSGT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345679),
+                       BPF_JMP32_REG(BPF_JSGT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x7fffffff } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSGE | BPF_K */
        {
-               "STX_MEM_DW: Store/Load double word: max negative",
+               "JMP32_JSGE_K: Small immediate",
                .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffffffffffffffffLL),
-                       BPF_STX_MEM(BPF_W, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -122, 1),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, -123 } }
        },
-       /* BPF_STX | BPF_ATOMIC | BPF_W/DW */
        {
-               "STX_XADD_W: Test: 0x12 + 0x10 = 0x22",
+               "JMP32_JSGE_K: Large immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -12345677, 1),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -12345678, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x22 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSGE | BPF_X */
        {
-               "STX_XADD_W: Test side-effects, r10: 0x12 + 0x10 = 0x22",
+               "JMP32_JSGE_X",
                .u.insns_int = {
-                       BPF_ALU64_REG(BPF_MOV, R1, R10),
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
-                       BPF_ALU64_REG(BPF_MOV, R0, R10),
-                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345677),
+                       BPF_JMP32_REG(BPF_JSGE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSGE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSLT | BPF_K */
        {
-               "STX_XADD_W: Test side-effects, r0: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLT_K: Small immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -123, 1),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -122, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x12 } },
-               .stack_depth = 40,
+               { { 0, -123 } }
        },
        {
-               "STX_XADD_W: X + 1 + 1 + 1 + ...",
-               { },
+               "JMP32_JSLT_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -12345678, 1),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -12345677, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
                INTERNAL,
                { },
-               { { 0, 4134 } },
-               .fill_helper = bpf_fill_stxw,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSLT | BPF_X */
        {
-               "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLT_X",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
-                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSLT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345677),
+                       BPF_JMP32_REG(BPF_JSLT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x22 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSLE | BPF_K */
        {
-               "STX_XADD_DW: Test side-effects, r10: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLE_K: Small immediate",
                .u.insns_int = {
-                       BPF_ALU64_REG(BPF_MOV, R1, R10),
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
-                       BPF_ALU64_REG(BPF_MOV, R0, R10),
-                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -124, 1),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0 } },
-               .stack_depth = 40,
+               { { 0, -123 } }
        },
        {
-               "STX_XADD_DW: Test side-effects, r0: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLE_K: Large immediate",
                .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -12345679, 1),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -12345678, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
                { },
-               { { 0, 0x12 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
        },
+       /* BPF_JMP32 | BPF_JSLE | BPF_K */
        {
-               "STX_XADD_DW: X + 1 + 1 + 1 + ...",
-               { },
+               "JMP32_JSLE_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345679),
+                       BPF_JMP32_REG(BPF_JSLE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSLE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
                INTERNAL,
                { },
-               { { 0, 4134 } },
-               .fill_helper = bpf_fill_stxdw,
+               { { 0, -12345678 } }
        },
        /* BPF_JMP | BPF_EXIT */
        {
@@ -5223,6 +7192,14 @@ static struct bpf_test tests[] = {
                { },
                { { 0, 1 } },
        },
+       {       /* Mainly checking JIT here. */
+               "BPF_MAXINSNS: Very long conditional jump",
+               { },
+               INTERNAL | FLAG_NO_DATA,
+               { },
+               { { 0, 1 } },
+               .fill_helper = bpf_fill_long_jmp,
+       },
        {
                "JMP_JA: Jump, gap, jump, ...",
                { },
@@ -6639,7 +8616,7 @@ static int __run_one(const struct bpf_prog *fp, const void *data,
        start = ktime_get_ns();
 
        for (i = 0; i < runs; i++)
-               ret = BPF_PROG_RUN(fp, data);
+               ret = bpf_prog_run(fp, data);
 
        finish = ktime_get_ns();
        migrate_enable();
@@ -6659,7 +8636,14 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test)
                u64 duration;
                u32 ret;
 
-               if (test->test[i].data_size == 0 &&
+               /*
+                * NOTE: Several sub-tests may be present, in which case
+                * a zero {data_size, result} tuple indicates the end of
+                * the sub-test array. The first test is always run,
+                * even if both data_size and result happen to be zero.
+                */
+               if (i > 0 &&
+                   test->test[i].data_size == 0 &&
                    test->test[i].result == 0)
                        break;
 
@@ -7005,8 +8989,248 @@ static __init int test_bpf(void)
        return err_cnt ? -EINVAL : 0;
 }
 
+struct tail_call_test {
+       const char *descr;
+       struct bpf_insn insns[MAX_INSNS];
+       int result;
+       int stack_depth;
+};
+
+/*
+ * Magic marker used in test snippets for tail calls below.
+ * BPF_LD/MOV to R2 and R2 with this immediate value is replaced
+ * with the proper values by the test runner.
+ */
+#define TAIL_CALL_MARKER 0x7a11ca11
+
+/* Special offset to indicate a NULL call target */
+#define TAIL_CALL_NULL 0x7fff
+
+/* Special offset to indicate an out-of-range index */
+#define TAIL_CALL_INVALID 0x7ffe
+
+#define TAIL_CALL(offset)                             \
+       BPF_LD_IMM64(R2, TAIL_CALL_MARKER),            \
+       BPF_RAW_INSN(BPF_ALU | BPF_MOV | BPF_K, R3, 0, \
+                    offset, TAIL_CALL_MARKER),        \
+       BPF_JMP_IMM(BPF_TAIL_CALL, 0, 0, 0)
+
+/*
+ * Tail call tests. Each test case may call any other test in the table,
+ * including itself, specified as a relative index offset from the calling
+ * test. The index TAIL_CALL_NULL can be used to specify a NULL target
+ * function to test the JIT error path. Similarly, the index TAIL_CALL_INVALID
+ * results in a target index that is out of range.
+ */
+static struct tail_call_test tail_call_tests[] = {
+       {
+               "Tail call leaf",
+               .insns = {
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_ALU64_IMM(BPF_ADD, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 1,
+       },
+       {
+               "Tail call 2",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 2),
+                       TAIL_CALL(-1),
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 3,
+       },
+       {
+               "Tail call 3",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 3),
+                       TAIL_CALL(-1),
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 6,
+       },
+       {
+               "Tail call 4",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 4),
+                       TAIL_CALL(-1),
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 10,
+       },
+       {
+               "Tail call error path, max count reached",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 1),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       TAIL_CALL(0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = MAX_TAIL_CALL_CNT + 1,
+       },
+       {
+               "Tail call error path, NULL target",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       TAIL_CALL(TAIL_CALL_NULL),
+                       BPF_ALU64_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 1,
+       },
+       {
+               "Tail call error path, index out of range",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       TAIL_CALL(TAIL_CALL_INVALID),
+                       BPF_ALU64_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 1,
+       },
+};
+
+static void __init destroy_tail_call_tests(struct bpf_array *progs)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++)
+               if (progs->ptrs[i])
+                       bpf_prog_free(progs->ptrs[i]);
+       kfree(progs);
+}
+
+static __init int prepare_tail_call_tests(struct bpf_array **pprogs)
+{
+       int ntests = ARRAY_SIZE(tail_call_tests);
+       struct bpf_array *progs;
+       int which, err;
+
+       /* Allocate the table of programs to be used for tall calls */
+       progs = kzalloc(sizeof(*progs) + (ntests + 1) * sizeof(progs->ptrs[0]),
+                       GFP_KERNEL);
+       if (!progs)
+               goto out_nomem;
+
+       /* Create all eBPF programs and populate the table */
+       for (which = 0; which < ntests; which++) {
+               struct tail_call_test *test = &tail_call_tests[which];
+               struct bpf_prog *fp;
+               int len, i;
+
+               /* Compute the number of program instructions */
+               for (len = 0; len < MAX_INSNS; len++) {
+                       struct bpf_insn *insn = &test->insns[len];
+
+                       if (len < MAX_INSNS - 1 &&
+                           insn->code == (BPF_LD | BPF_DW | BPF_IMM))
+                               len++;
+                       if (insn->code == 0)
+                               break;
+               }
+
+               /* Allocate and initialize the program */
+               fp = bpf_prog_alloc(bpf_prog_size(len), 0);
+               if (!fp)
+                       goto out_nomem;
+
+               fp->len = len;
+               fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
+               fp->aux->stack_depth = test->stack_depth;
+               memcpy(fp->insnsi, test->insns, len * sizeof(struct bpf_insn));
+
+               /* Relocate runtime tail call offsets and addresses */
+               for (i = 0; i < len; i++) {
+                       struct bpf_insn *insn = &fp->insnsi[i];
+
+                       if (insn->imm != TAIL_CALL_MARKER)
+                               continue;
+
+                       switch (insn->code) {
+                       case BPF_LD | BPF_DW | BPF_IMM:
+                               insn[0].imm = (u32)(long)progs;
+                               insn[1].imm = ((u64)(long)progs) >> 32;
+                               break;
+
+                       case BPF_ALU | BPF_MOV | BPF_K:
+                               if (insn->off == TAIL_CALL_NULL)
+                                       insn->imm = ntests;
+                               else if (insn->off == TAIL_CALL_INVALID)
+                                       insn->imm = ntests + 1;
+                               else
+                                       insn->imm = which + insn->off;
+                               insn->off = 0;
+                       }
+               }
+
+               fp = bpf_prog_select_runtime(fp, &err);
+               if (err)
+                       goto out_err;
+
+               progs->ptrs[which] = fp;
+       }
+
+       /* The last entry contains a NULL program pointer */
+       progs->map.max_entries = ntests + 1;
+       *pprogs = progs;
+       return 0;
+
+out_nomem:
+       err = -ENOMEM;
+
+out_err:
+       if (progs)
+               destroy_tail_call_tests(progs);
+       return err;
+}
+
+static __init int test_tail_calls(struct bpf_array *progs)
+{
+       int i, err_cnt = 0, pass_cnt = 0;
+       int jit_cnt = 0, run_cnt = 0;
+
+       for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) {
+               struct tail_call_test *test = &tail_call_tests[i];
+               struct bpf_prog *fp = progs->ptrs[i];
+               u64 duration;
+               int ret;
+
+               cond_resched();
+
+               pr_info("#%d %s ", i, test->descr);
+               if (!fp) {
+                       err_cnt++;
+                       continue;
+               }
+               pr_cont("jited:%u ", fp->jited);
+
+               run_cnt++;
+               if (fp->jited)
+                       jit_cnt++;
+
+               ret = __run_one(fp, NULL, MAX_TESTRUNS, &duration);
+               if (ret == test->result) {
+                       pr_cont("%lld PASS", duration);
+                       pass_cnt++;
+               } else {
+                       pr_cont("ret %d != %d FAIL", ret, test->result);
+                       err_cnt++;
+               }
+       }
+
+       pr_info("%s: Summary: %d PASSED, %d FAILED, [%d/%d JIT'ed]\n",
+               __func__, pass_cnt, err_cnt, jit_cnt, run_cnt);
+
+       return err_cnt ? -EINVAL : 0;
+}
+
 static int __init test_bpf_init(void)
 {
+       struct bpf_array *progs = NULL;
        int ret;
 
        ret = prepare_bpf_tests();
@@ -7018,6 +9242,14 @@ static int __init test_bpf_init(void)
        if (ret)
                return ret;
 
+       ret = prepare_tail_call_tests(&progs);
+       if (ret)
+               return ret;
+       ret = test_tail_calls(progs);
+       destroy_tail_call_tests(progs);
+       if (ret)
+               return ret;
+
        return test_skb_segment();
 }
 
index 702a81d..389b576 100644 (file)
@@ -968,7 +968,7 @@ static __always_inline bool memcg_kmem_bypass(void)
                return false;
 
        /* Memcg to charge can't be determined. */
-       if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
+       if (!in_task() || !current->mm || (current->flags & PF_KTHREAD))
                return true;
 
        return false;
@@ -7050,14 +7050,14 @@ void mem_cgroup_sk_free(struct sock *sk)
  * mem_cgroup_charge_skmem - charge socket memory
  * @memcg: memcg to charge
  * @nr_pages: number of pages to charge
+ * @gfp_mask: reclaim mode
  *
  * Charges @nr_pages to @memcg. Returns %true if the charge fit within
- * @memcg's configured limit, %false if the charge had to be forced.
+ * @memcg's configured limit, %false if it doesn't.
  */
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
+                            gfp_t gfp_mask)
 {
-       gfp_t gfp_mask = GFP_KERNEL;
-
        if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
                struct page_counter *fail;
 
@@ -7065,21 +7065,19 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
                        memcg->tcpmem_pressure = 0;
                        return true;
                }
-               page_counter_charge(&memcg->tcpmem, nr_pages);
                memcg->tcpmem_pressure = 1;
+               if (gfp_mask & __GFP_NOFAIL) {
+                       page_counter_charge(&memcg->tcpmem, nr_pages);
+                       return true;
+               }
                return false;
        }
 
-       /* Don't block in the packet receive path */
-       if (in_softirq())
-               gfp_mask = GFP_NOWAIT;
-
-       mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
-
-       if (try_charge(memcg, gfp_mask, nr_pages) == 0)
+       if (try_charge(memcg, gfp_mask, nr_pages) == 0) {
+               mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
                return true;
+       }
 
-       try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
        return false;
 }
 
index 1c140af..600b956 100644 (file)
@@ -170,7 +170,8 @@ static void lowpan_dev_debugfs_ctx_init(struct net_device *dev,
        struct dentry *root;
        char buf[32];
 
-       WARN_ON_ONCE(id > LOWPAN_IPHC_CTX_TABLE_SIZE);
+       if (WARN_ON_ONCE(id >= LOWPAN_IPHC_CTX_TABLE_SIZE))
+               return;
 
        sprintf(buf, "%d", id);
 
index 19406a8..bfed802 100644 (file)
@@ -8,7 +8,6 @@ obj-$(CONFIG_LLC)       += p8022.o psnap.o
 obj-$(CONFIG_NET_FC)   +=                 fc.o
 obj-$(CONFIG_FDDI)     +=                 fddi.o
 obj-$(CONFIG_HIPPI)    +=                 hippi.o
-obj-$(CONFIG_IPX)      += p8022.o psnap.o p8023.o
 obj-$(CONFIG_ATALK)    += p8022.o psnap.o
 obj-$(CONFIG_STP)      += stp.o
 obj-$(CONFIG_GARP)     += garp.o
diff --git a/net/802/p8023.c b/net/802/p8023.c
deleted file mode 100644 (file)
index 19cd569..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *     NET3:   802.3 data link hooks used for IPX 802.3
- *
- *     802.3 isn't really a protocol data link layer. Some old IPX stuff
- *     uses it however. Note that there is only one 802.3 protocol layer
- *     in the system. We don't currently support different protocols
- *     running raw 802.3 on different devices. Thankfully nobody else
- *     has done anything like the old IPX.
- */
-
-#include <linux/in.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-
-#include <net/datalink.h>
-#include <net/p8022.h>
-
-/*
- *     Place an 802.3 header on a packet. The driver will do the mac
- *     addresses, we just need to give it the buffer length.
- */
-static int p8023_request(struct datalink_proto *dl,
-                        struct sk_buff *skb, unsigned char *dest_node)
-{
-       struct net_device *dev = skb->dev;
-
-       dev_hard_header(skb, dev, ETH_P_802_3, dest_node, NULL, skb->len);
-       return dev_queue_xmit(skb);
-}
-
-/*
- *     Create an 802.3 client. Note there can be only one 802.3 client
- */
-struct datalink_proto *make_8023_client(void)
-{
-       struct datalink_proto *proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
-
-       if (proto) {
-               proto->header_length = 0;
-               proto->request       = p8023_request;
-       }
-       return proto;
-}
-
-/*
- *     Destroy the 802.3 client.
- */
-void destroy_8023_client(struct datalink_proto *dl)
-{
-       kfree(dl);
-}
-
-EXPORT_SYMBOL(destroy_8023_client);
-EXPORT_SYMBOL(make_8023_client);
-
-MODULE_LICENSE("GPL");
index 4cdf841..55275ef 100644 (file)
@@ -67,7 +67,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg,
                return 0;
 
        size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN;
-       array = kzalloc(size, GFP_KERNEL);
+       array = kzalloc(size, GFP_KERNEL_ACCOUNT);
        if (array == NULL)
                return -ENOBUFS;
 
index a0367b3..0c21d1f 100644 (file)
@@ -372,8 +372,8 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        case SIOCGMIIREG:
        case SIOCSMIIREG:
        case SIOCGHWTSTAMP:
-               if (netif_device_present(real_dev) && ops->ndo_do_ioctl)
-                       err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd);
+               if (netif_device_present(real_dev) && ops->ndo_eth_ioctl)
+                       err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
                break;
        }
 
@@ -814,7 +814,7 @@ static const struct net_device_ops vlan_netdev_ops = {
        .ndo_set_mac_address    = vlan_dev_set_mac_address,
        .ndo_set_rx_mode        = vlan_dev_set_rx_mode,
        .ndo_change_rx_flags    = vlan_dev_change_rx_flags,
-       .ndo_do_ioctl           = vlan_dev_ioctl,
+       .ndo_eth_ioctl          = vlan_dev_ioctl,
        .ndo_neigh_setup        = vlan_dev_neigh_setup,
        .ndo_get_stats64        = vlan_dev_get_stats64,
 #if IS_ENABLED(CONFIG_FCOE)
index c7392c4..fb13460 100644 (file)
@@ -363,6 +363,7 @@ source "net/bluetooth/Kconfig"
 source "net/rxrpc/Kconfig"
 source "net/kcm/Kconfig"
 source "net/strparser/Kconfig"
+source "net/mctp/Kconfig"
 
 config FIB_RULES
        bool
index 9ca9572..fbfeb8a 100644 (file)
@@ -78,3 +78,4 @@ obj-$(CONFIG_QRTR)            += qrtr/
 obj-$(CONFIG_NET_NCSI)         += ncsi/
 obj-$(CONFIG_XDP_SOCKETS)      += xdp/
 obj-$(CONFIG_MPTCP)            += mptcp/
+obj-$(CONFIG_MCTP)             += mctp/
index 8ade5a4..bf5736c 100644 (file)
@@ -666,7 +666,7 @@ static int atif_ioctl(int cmd, void __user *arg)
        struct rtentry rtdef;
        int add_route;
 
-       if (copy_from_user(&atreq, arg, sizeof(atreq)))
+       if (get_user_ifreq(&atreq, NULL, arg))
                return -EFAULT;
 
        dev = __dev_get_by_name(&init_net, atreq.ifr_name);
@@ -865,7 +865,7 @@ static int atif_ioctl(int cmd, void __user *arg)
                return 0;
        }
 
-       return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0;
+       return put_user_ifreq(&atreq, arg);
 }
 
 static int atrtr_ioctl_addrt(struct rtentry *rt)
index e4f63dd..3624977 100644 (file)
@@ -193,10 +193,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
        skb_pull(skb, AX25_KISS_HEADER_LEN);
 
        if (digipeat != NULL) {
-               if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
-                       kfree_skb(skb);
+               if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL)
                        goto put;
-               }
 
                skb = ourskb;
        }
index f53751b..22f2f66 100644 (file)
@@ -325,7 +325,6 @@ void ax25_kick(ax25_cb *ax25)
 
 void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
 {
-       struct sk_buff *skbn;
        unsigned char *ptr;
        int headroom;
 
@@ -336,18 +335,12 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
 
        headroom = ax25_addr_size(ax25->digipeat);
 
-       if (skb_headroom(skb) < headroom) {
-               if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) {
+       if (unlikely(skb_headroom(skb) < headroom)) {
+               skb = skb_expand_head(skb, headroom);
+               if (!skb) {
                        printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n");
-                       kfree_skb(skb);
                        return;
                }
-
-               if (skb->sk != NULL)
-                       skb_set_owner_w(skbn, skb->sk);
-
-               consume_skb(skb);
-               skb = skbn;
        }
 
        ptr = skb_push(skb, headroom);
index b40e0bc..d0b2e09 100644 (file)
@@ -441,24 +441,17 @@ put:
 struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
        ax25_address *dest, ax25_digi *digi)
 {
-       struct sk_buff *skbn;
        unsigned char *bp;
        int len;
 
        len = digi->ndigi * AX25_ADDR_LEN;
 
-       if (skb_headroom(skb) < len) {
-               if ((skbn = skb_realloc_headroom(skb, len)) == NULL) {
+       if (unlikely(skb_headroom(skb) < len)) {
+               skb = skb_expand_head(skb, len);
+               if (!skb) {
                        printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n");
                        return NULL;
                }
-
-               if (skb->sk != NULL)
-                       skb_set_owner_w(skbn, skb->sk);
-
-               consume_skb(skb);
-
-               skb = skbn;
        }
 
        bp = skb_push(skb, len);
index 1202237..f94f538 100644 (file)
@@ -519,8 +519,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
        }
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
        return res;
 }
 
@@ -857,8 +856,7 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
        rcu_read_unlock();
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 }
 
 static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
@@ -1046,14 +1044,10 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
 unlock:
        rcu_read_unlock();
 out:
-       if (neigh_node)
-               batadv_neigh_node_put(neigh_node);
-       if (router)
-               batadv_neigh_node_put(router);
-       if (neigh_ifinfo)
-               batadv_neigh_ifinfo_put(neigh_ifinfo);
-       if (router_ifinfo)
-               batadv_neigh_ifinfo_put(router_ifinfo);
+       batadv_neigh_node_put(neigh_node);
+       batadv_neigh_node_put(router);
+       batadv_neigh_ifinfo_put(neigh_ifinfo);
+       batadv_neigh_ifinfo_put(router_ifinfo);
 }
 
 /**
@@ -1194,8 +1188,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
                ret = true;
 
 out:
-       if (neigh_node)
-               batadv_neigh_node_put(neigh_node);
+       batadv_neigh_node_put(neigh_node);
        return ret;
 }
 
@@ -1496,16 +1489,11 @@ out_neigh:
        if (orig_neigh_node && !is_single_hop_neigh)
                batadv_orig_node_put(orig_neigh_node);
 out:
-       if (router_ifinfo)
-               batadv_neigh_ifinfo_put(router_ifinfo);
-       if (router)
-               batadv_neigh_node_put(router);
-       if (router_router)
-               batadv_neigh_node_put(router_router);
-       if (orig_neigh_router)
-               batadv_neigh_node_put(orig_neigh_router);
-       if (hardif_neigh)
-               batadv_hardif_neigh_put(hardif_neigh);
+       batadv_neigh_ifinfo_put(router_ifinfo);
+       batadv_neigh_node_put(router);
+       batadv_neigh_node_put(router_router);
+       batadv_neigh_node_put(orig_neigh_router);
+       batadv_hardif_neigh_put(hardif_neigh);
 
        consume_skb(skb_priv);
 }
@@ -1926,8 +1914,7 @@ batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
        }
 
  out:
-       if (neigh_node_best)
-               batadv_neigh_node_put(neigh_node_best);
+       batadv_neigh_node_put(neigh_node_best);
 
        *sub_s = 0;
        return 0;
@@ -2049,10 +2036,8 @@ static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1,
        *diff = (int)tq1 - (int)tq2;
 
 out:
-       if (neigh1_ifinfo)
-               batadv_neigh_ifinfo_put(neigh1_ifinfo);
-       if (neigh2_ifinfo)
-               batadv_neigh_ifinfo_put(neigh2_ifinfo);
+       batadv_neigh_ifinfo_put(neigh1_ifinfo);
+       batadv_neigh_ifinfo_put(neigh2_ifinfo);
 
        return ret;
 }
@@ -2299,8 +2284,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
                        if (tmp_gw_factor > max_gw_factor ||
                            (tmp_gw_factor == max_gw_factor &&
                             tq_avg > max_tq)) {
-                               if (curr_gw)
-                                       batadv_gw_node_put(curr_gw);
+                               batadv_gw_node_put(curr_gw);
                                curr_gw = gw_node;
                                kref_get(&curr_gw->refcount);
                        }
@@ -2314,8 +2298,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
                          *     $routing_class more tq points)
                          */
                        if (tq_avg > max_tq) {
-                               if (curr_gw)
-                                       batadv_gw_node_put(curr_gw);
+                               batadv_gw_node_put(curr_gw);
                                curr_gw = gw_node;
                                kref_get(&curr_gw->refcount);
                        }
@@ -2332,8 +2315,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
 
 next:
                batadv_neigh_node_put(router);
-               if (router_ifinfo)
-                       batadv_neigh_ifinfo_put(router_ifinfo);
+               batadv_neigh_ifinfo_put(router_ifinfo);
        }
        rcu_read_unlock();
 
@@ -2397,14 +2379,10 @@ static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv,
 
        ret = true;
 out:
-       if (router_gw_ifinfo)
-               batadv_neigh_ifinfo_put(router_gw_ifinfo);
-       if (router_orig_ifinfo)
-               batadv_neigh_ifinfo_put(router_orig_ifinfo);
-       if (router_gw)
-               batadv_neigh_node_put(router_gw);
-       if (router_orig)
-               batadv_neigh_node_put(router_orig);
+       batadv_neigh_ifinfo_put(router_gw_ifinfo);
+       batadv_neigh_ifinfo_put(router_orig_ifinfo);
+       batadv_neigh_node_put(router_gw);
+       batadv_neigh_node_put(router_orig);
 
        return ret;
 }
@@ -2479,12 +2457,9 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid,
        ret = 0;
 
 out:
-       if (curr_gw)
-               batadv_gw_node_put(curr_gw);
-       if (router_ifinfo)
-               batadv_neigh_ifinfo_put(router_ifinfo);
-       if (router)
-               batadv_neigh_node_put(router);
+       batadv_gw_node_put(curr_gw);
+       batadv_neigh_ifinfo_put(router_ifinfo);
+       batadv_neigh_node_put(router);
        return ret;
 }
 
index b98aea9..54e41fc 100644 (file)
@@ -106,8 +106,7 @@ static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface)
 
        batadv_v_primary_iface_set(hard_iface);
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 }
 
 static void
@@ -366,8 +365,7 @@ batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
        }
 
  out:
-       if (neigh_node_best)
-               batadv_neigh_node_put(neigh_node_best);
+       batadv_neigh_node_put(neigh_node_best);
 
        *sub_s = 0;
        return 0;
@@ -568,10 +566,8 @@ static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw)
 
        ret = 0;
 out:
-       if (router)
-               batadv_neigh_node_put(router);
-       if (router_ifinfo)
-               batadv_neigh_ifinfo_put(router_ifinfo);
+       batadv_neigh_node_put(router);
+       batadv_neigh_ifinfo_put(router_ifinfo);
 
        return ret;
 }
@@ -599,8 +595,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
                if (curr_gw && bw <= max_bw)
                        goto next;
 
-               if (curr_gw)
-                       batadv_gw_node_put(curr_gw);
+               batadv_gw_node_put(curr_gw);
 
                curr_gw = gw_node;
                kref_get(&curr_gw->refcount);
@@ -662,10 +657,8 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
 
        ret = true;
 out:
-       if (curr_gw)
-               batadv_gw_node_put(curr_gw);
-       if (orig_gw)
-               batadv_gw_node_put(orig_gw);
+       batadv_gw_node_put(curr_gw);
+       batadv_gw_node_put(orig_gw);
 
        return ret;
 }
@@ -764,12 +757,9 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid,
        ret = 0;
 
 out:
-       if (curr_gw)
-               batadv_gw_node_put(curr_gw);
-       if (router_ifinfo)
-               batadv_neigh_ifinfo_put(router_ifinfo);
-       if (router)
-               batadv_neigh_node_put(router);
+       batadv_gw_node_put(curr_gw);
+       batadv_neigh_ifinfo_put(router_ifinfo);
+       batadv_neigh_node_put(router);
        return ret;
 }
 
index 423c2d1..71999e1 100644 (file)
@@ -486,14 +486,11 @@ static void batadv_v_elp_neigh_update(struct batadv_priv *bat_priv,
        hardif_neigh->bat_v.elp_interval = ntohl(elp_packet->elp_interval);
 
 hardif_free:
-       if (hardif_neigh)
-               batadv_hardif_neigh_put(hardif_neigh);
+       batadv_hardif_neigh_put(hardif_neigh);
 neigh_free:
-       if (neigh)
-               batadv_neigh_node_put(neigh);
+       batadv_neigh_node_put(neigh);
 orig_free:
-       if (orig_neigh)
-               batadv_orig_node_put(orig_neigh);
+       batadv_orig_node_put(orig_neigh);
 }
 
 /**
index a0a9636..1d750f3 100644 (file)
@@ -584,12 +584,9 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
        batadv_v_ogm_queue_on_if(skb, if_outgoing);
 
 out:
-       if (orig_ifinfo)
-               batadv_orig_ifinfo_put(orig_ifinfo);
-       if (router)
-               batadv_neigh_node_put(router);
-       if (neigh_ifinfo)
-               batadv_neigh_ifinfo_put(neigh_ifinfo);
+       batadv_orig_ifinfo_put(orig_ifinfo);
+       batadv_neigh_node_put(router);
+       batadv_neigh_ifinfo_put(neigh_ifinfo);
 }
 
 /**
@@ -669,10 +666,8 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
        else
                ret = 0;
 out:
-       if (orig_ifinfo)
-               batadv_orig_ifinfo_put(orig_ifinfo);
-       if (neigh_ifinfo)
-               batadv_neigh_ifinfo_put(neigh_ifinfo);
+       batadv_orig_ifinfo_put(orig_ifinfo);
+       batadv_neigh_ifinfo_put(neigh_ifinfo);
 
        return ret;
 }
@@ -763,16 +758,11 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
 
        batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node);
 out:
-       if (router)
-               batadv_neigh_node_put(router);
-       if (orig_neigh_router)
-               batadv_neigh_node_put(orig_neigh_router);
-       if (orig_neigh_node)
-               batadv_orig_node_put(orig_neigh_node);
-       if (router_ifinfo)
-               batadv_neigh_ifinfo_put(router_ifinfo);
-       if (neigh_ifinfo)
-               batadv_neigh_ifinfo_put(neigh_ifinfo);
+       batadv_neigh_node_put(router);
+       batadv_neigh_node_put(orig_neigh_router);
+       batadv_orig_node_put(orig_neigh_node);
+       batadv_neigh_ifinfo_put(router_ifinfo);
+       batadv_neigh_ifinfo_put(neigh_ifinfo);
 
        return forward;
 }
@@ -978,12 +968,9 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
        }
        rcu_read_unlock();
 out:
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
-       if (neigh_node)
-               batadv_neigh_node_put(neigh_node);
-       if (hardif_neigh)
-               batadv_hardif_neigh_put(hardif_neigh);
+       batadv_orig_node_put(orig_node);
+       batadv_neigh_node_put(neigh_node);
+       batadv_hardif_neigh_put(hardif_neigh);
 }
 
 /**
index 63d42dc..1669744 100644 (file)
@@ -162,6 +162,9 @@ static void batadv_backbone_gw_release(struct kref *ref)
  */
 static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw)
 {
+       if (!backbone_gw)
+               return;
+
        kref_put(&backbone_gw->refcount, batadv_backbone_gw_release);
 }
 
@@ -197,6 +200,9 @@ static void batadv_claim_release(struct kref *ref)
  */
 static void batadv_claim_put(struct batadv_bla_claim *claim)
 {
+       if (!claim)
+               return;
+
        kref_put(&claim->refcount, batadv_claim_release);
 }
 
@@ -439,8 +445,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
 
        netif_rx_any_context(skb);
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 }
 
 /**
@@ -1498,8 +1503,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
                rcu_read_unlock();
        }
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 
        queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
                           msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
@@ -1808,8 +1812,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
                batadv_hash_destroy(bat_priv->bla.backbone_hash);
                bat_priv->bla.backbone_hash = NULL;
        }
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 }
 
 /**
@@ -1996,10 +1999,8 @@ handled:
        ret = true;
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
-       if (claim)
-               batadv_claim_put(claim);
+       batadv_hardif_put(primary_if);
+       batadv_claim_put(claim);
        return ret;
 }
 
@@ -2103,10 +2104,8 @@ allow:
 handled:
        ret = true;
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
-       if (claim)
-               batadv_claim_put(claim);
+       batadv_hardif_put(primary_if);
+       batadv_claim_put(claim);
        return ret;
 }
 
@@ -2271,11 +2270,9 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
        ret = msg->len;
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        return ret;
 }
@@ -2443,11 +2440,9 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
        ret = msg->len;
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        return ret;
 }
index 8c95a11..2f008e3 100644 (file)
@@ -127,6 +127,9 @@ static void batadv_dat_entry_release(struct kref *ref)
  */
 static void batadv_dat_entry_put(struct batadv_dat_entry *dat_entry)
 {
+       if (!dat_entry)
+               return;
+
        kref_put(&dat_entry->refcount, batadv_dat_entry_release);
 }
 
@@ -405,8 +408,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
                   &dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(vid));
 
 out:
-       if (dat_entry)
-               batadv_dat_entry_put(dat_entry);
+       batadv_dat_entry_put(dat_entry);
 }
 
 #ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -594,8 +596,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
                                continue;
 
                        max = tmp_max;
-                       if (max_orig_node)
-                               batadv_orig_node_put(max_orig_node);
+                       batadv_orig_node_put(max_orig_node);
                        max_orig_node = orig_node;
                }
                rcu_read_unlock();
@@ -981,11 +982,9 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
        ret = msg->len;
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        return ret;
 }
@@ -1218,8 +1217,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
                                              BATADV_P_DAT_DHT_GET);
        }
 out:
-       if (dat_entry)
-               batadv_dat_entry_put(dat_entry);
+       batadv_dat_entry_put(dat_entry);
        return ret;
 }
 
@@ -1286,8 +1284,7 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
                ret = true;
        }
 out:
-       if (dat_entry)
-               batadv_dat_entry_put(dat_entry);
+       batadv_dat_entry_put(dat_entry);
        if (ret)
                kfree_skb(skb);
        return ret;
@@ -1420,8 +1417,7 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
 out:
        if (dropped)
                kfree_skb(skb);
-       if (dat_entry)
-               batadv_dat_entry_put(dat_entry);
+       batadv_dat_entry_put(dat_entry);
        /* if dropped == false -> deliver to the interface */
        return dropped;
 }
@@ -1830,7 +1826,6 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
        ret = true;
 
 out:
-       if (dat_entry)
-               batadv_dat_entry_put(dat_entry);
+       batadv_dat_entry_put(dat_entry);
        return ret;
 }
index a5d9d80..0899a72 100644 (file)
@@ -381,10 +381,8 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
        }
 
 out:
-       if (orig_node_dst)
-               batadv_orig_node_put(orig_node_dst);
-       if (neigh_node)
-               batadv_neigh_node_put(neigh_node);
+       batadv_orig_node_put(orig_node_dst);
+       batadv_neigh_node_put(neigh_node);
        return ret;
 }
 
index 007f282..b746613 100644 (file)
@@ -59,7 +59,7 @@
  *  after rcu grace period
  * @ref: kref pointer of the gw_node
  */
-static void batadv_gw_node_release(struct kref *ref)
+void batadv_gw_node_release(struct kref *ref)
 {
        struct batadv_gw_node *gw_node;
 
@@ -69,16 +69,6 @@ static void batadv_gw_node_release(struct kref *ref)
        kfree_rcu(gw_node, rcu);
 }
 
-/**
- * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release
- *  it
- * @gw_node: gateway node to free
- */
-void batadv_gw_node_put(struct batadv_gw_node *gw_node)
-{
-       kref_put(&gw_node->refcount, batadv_gw_node_release);
-}
-
 /**
  * batadv_gw_get_selected_gw_node() - Get currently selected gateway
  * @bat_priv: the bat priv with all the soft interface information
@@ -130,8 +120,7 @@ batadv_gw_get_selected_orig(struct batadv_priv *bat_priv)
 unlock:
        rcu_read_unlock();
 out:
-       if (gw_node)
-               batadv_gw_node_put(gw_node);
+       batadv_gw_node_put(gw_node);
        return orig_node;
 }
 
@@ -148,8 +137,7 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
        curr_gw_node = rcu_replace_pointer(bat_priv->gw.curr_gw, new_gw_node,
                                           true);
 
-       if (curr_gw_node)
-               batadv_gw_node_put(curr_gw_node);
+       batadv_gw_node_put(curr_gw_node);
 
        spin_unlock_bh(&bat_priv->gw.list_lock);
 }
@@ -284,14 +272,10 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
        batadv_gw_select(bat_priv, next_gw);
 
 out:
-       if (curr_gw)
-               batadv_gw_node_put(curr_gw);
-       if (next_gw)
-               batadv_gw_node_put(next_gw);
-       if (router)
-               batadv_neigh_node_put(router);
-       if (router_ifinfo)
-               batadv_neigh_ifinfo_put(router_ifinfo);
+       batadv_gw_node_put(curr_gw);
+       batadv_gw_node_put(next_gw);
+       batadv_neigh_node_put(router);
+       batadv_neigh_ifinfo_put(router_ifinfo);
 }
 
 /**
@@ -325,8 +309,7 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv,
 reselect:
        batadv_gw_reselect(bat_priv);
 out:
-       if (curr_gw_orig)
-               batadv_orig_node_put(curr_gw_orig);
+       batadv_orig_node_put(curr_gw_orig);
 }
 
 /**
@@ -466,13 +449,11 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
                if (gw_node == curr_gw)
                        batadv_gw_reselect(bat_priv);
 
-               if (curr_gw)
-                       batadv_gw_node_put(curr_gw);
+               batadv_gw_node_put(curr_gw);
        }
 
 out:
-       if (gw_node)
-               batadv_gw_node_put(gw_node);
+       batadv_gw_node_put(gw_node);
 }
 
 /**
@@ -555,10 +536,8 @@ int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb)
        ret = msg->len;
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       batadv_hardif_put(primary_if);
+       dev_put(soft_iface);
 
        return ret;
 }
@@ -780,15 +759,10 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
        batadv_neigh_ifinfo_put(old_ifinfo);
 
 out:
-       if (orig_dst_node)
-               batadv_orig_node_put(orig_dst_node);
-       if (curr_gw)
-               batadv_gw_node_put(curr_gw);
-       if (gw_node)
-               batadv_gw_node_put(gw_node);
-       if (neigh_old)
-               batadv_neigh_node_put(neigh_old);
-       if (neigh_curr)
-               batadv_neigh_node_put(neigh_curr);
+       batadv_orig_node_put(orig_dst_node);
+       batadv_gw_node_put(curr_gw);
+       batadv_gw_node_put(gw_node);
+       batadv_neigh_node_put(neigh_old);
+       batadv_neigh_node_put(neigh_curr);
        return out_of_range;
 }
index 2ae5846..95c2ccd 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "main.h"
 
+#include <linux/kref.h>
 #include <linux/netlink.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
@@ -27,7 +28,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
 void batadv_gw_node_delete(struct batadv_priv *bat_priv,
                           struct batadv_orig_node *orig_node);
 void batadv_gw_node_free(struct batadv_priv *bat_priv);
-void batadv_gw_node_put(struct batadv_gw_node *gw_node);
+void batadv_gw_node_release(struct kref *ref);
 struct batadv_gw_node *
 batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv);
 int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb);
@@ -38,4 +39,17 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
 struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
                                          struct batadv_orig_node *orig_node);
 
+/**
+ * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release
+ *  it
+ * @gw_node: gateway node to free
+ */
+static inline void batadv_gw_node_put(struct batadv_gw_node *gw_node)
+{
+       if (!gw_node)
+               return;
+
+       kref_put(&gw_node->refcount, batadv_gw_node_release);
+}
+
 #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
index fdde305..9349c76 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/atomic.h>
 #include <linux/byteorder/generic.h>
 #include <linux/errno.h>
-#include <linux/kernel.h>
+#include <linux/kstrtox.h>
 #include <linux/limits.h>
 #include <linux/math64.h>
 #include <linux/netdevice.h>
index 55d97e1..8a2b78f 100644 (file)
@@ -236,8 +236,7 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
        real_netdev = dev_get_by_index(real_net, ifindex);
 
 out:
-       if (hard_iface)
-               batadv_hardif_put(hard_iface);
+       batadv_hardif_put(hard_iface);
        return real_netdev;
 }
 
@@ -457,8 +456,7 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv,
        batadv_dat_init_own_addr(bat_priv, primary_if);
        batadv_bla_update_orig_address(bat_priv, primary_if, oldif);
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 }
 
 static void batadv_primary_if_select(struct batadv_priv *bat_priv,
@@ -481,8 +479,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
        batadv_primary_if_update_addr(bat_priv, curr_hard_iface);
 
 out:
-       if (curr_hard_iface)
-               batadv_hardif_put(curr_hard_iface);
+       batadv_hardif_put(curr_hard_iface);
 }
 
 static bool
@@ -657,8 +654,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
                bat_priv->algo_ops->iface.activate(hard_iface);
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 }
 
 static void
@@ -811,8 +807,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
                new_if = batadv_hardif_get_active(hard_iface->soft_iface);
                batadv_primary_if_select(bat_priv, new_if);
 
-               if (new_if)
-                       batadv_hardif_put(new_if);
+               batadv_hardif_put(new_if);
        }
 
        bat_priv->algo_ops->iface.disable(hard_iface);
@@ -834,8 +829,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
        batadv_hardif_put(hard_iface);
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 }
 
 static struct batadv_hard_iface *
@@ -990,8 +984,7 @@ static int batadv_hard_if_event(struct notifier_block *this,
 hardif_put:
        batadv_hardif_put(hard_iface);
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
        return NOTIFY_DONE;
 }
 
index 8cb2a1f..64f660d 100644 (file)
@@ -89,6 +89,9 @@ int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
  */
 static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface)
 {
+       if (!hard_iface)
+               return;
+
        kref_put(&hard_iface->refcount, batadv_hardif_release);
 }
 
index 014235f..058b8f2 100644 (file)
@@ -13,7 +13,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2021.2"
+#define BATADV_SOURCE_VERSION "2021.3"
 #endif
 
 /* B.A.T.M.A.N. parameters */
index 923e219..a3b6658 100644 (file)
@@ -91,8 +91,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
                upper = netdev_master_upper_dev_get_rcu(upper);
        } while (upper && !(upper->priv_flags & IFF_EBRIDGE));
 
-       if (upper)
-               dev_hold(upper);
+       dev_hold(upper);
        rcu_read_unlock();
 
        return upper;
@@ -509,8 +508,7 @@ batadv_mcast_mla_softif_get(struct net_device *dev,
        }
 
 out:
-       if (bridge)
-               dev_put(bridge);
+       dev_put(bridge);
 
        return ret4 + ret6;
 }
@@ -2239,12 +2237,11 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
        }
 
 out:
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
 
        if (!ret && primary_if)
                *primary_if = hard_iface;
-       else if (hard_iface)
+       else
                batadv_hardif_put(hard_iface);
 
        return ret;
index b6cc746..2927628 100644 (file)
@@ -359,15 +359,13 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
                        atomic_read(&bat_priv->orig_interval)))
                goto nla_put_failure;
 
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 
        genlmsg_end(msg, hdr);
        return 0;
 
 nla_put_failure:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 
        genlmsg_cancel(msg, hdr);
        return -EMSGSIZE;
index 4bb76b4..9f06132 100644 (file)
@@ -217,6 +217,9 @@ static void batadv_nc_node_release(struct kref *ref)
  */
 static void batadv_nc_node_put(struct batadv_nc_node *nc_node)
 {
+       if (!nc_node)
+               return;
+
        kref_put(&nc_node->refcount, batadv_nc_node_release);
 }
 
@@ -241,6 +244,9 @@ static void batadv_nc_path_release(struct kref *ref)
  */
 static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
 {
+       if (!nc_path)
+               return;
+
        kref_put(&nc_path->refcount, batadv_nc_path_release);
 }
 
@@ -930,10 +936,8 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
        out_nc_node->last_seen = jiffies;
 
 out:
-       if (in_nc_node)
-               batadv_nc_node_put(in_nc_node);
-       if (out_nc_node)
-               batadv_nc_node_put(out_nc_node);
+       batadv_nc_node_put(in_nc_node);
+       batadv_nc_node_put(out_nc_node);
 }
 
 /**
@@ -1209,14 +1213,10 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
        batadv_send_unicast_skb(skb_dest, first_dest);
        res = true;
 out:
-       if (router_neigh)
-               batadv_neigh_node_put(router_neigh);
-       if (router_coding)
-               batadv_neigh_node_put(router_coding);
-       if (router_neigh_ifinfo)
-               batadv_neigh_ifinfo_put(router_neigh_ifinfo);
-       if (router_coding_ifinfo)
-               batadv_neigh_ifinfo_put(router_coding_ifinfo);
+       batadv_neigh_node_put(router_neigh);
+       batadv_neigh_node_put(router_coding);
+       batadv_neigh_ifinfo_put(router_neigh_ifinfo);
+       batadv_neigh_ifinfo_put(router_coding_ifinfo);
        return res;
 }
 
index da72494..aadc653 100644 (file)
@@ -177,7 +177,7 @@ out:
  *  and queue for free after rcu grace period
  * @ref: kref pointer of the originator-vlan object
  */
-static void batadv_orig_node_vlan_release(struct kref *ref)
+void batadv_orig_node_vlan_release(struct kref *ref)
 {
        struct batadv_orig_node_vlan *orig_vlan;
 
@@ -186,16 +186,6 @@ static void batadv_orig_node_vlan_release(struct kref *ref)
        kfree_rcu(orig_vlan, rcu);
 }
 
-/**
- * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release
- *  the originator-vlan object
- * @orig_vlan: the originator-vlan object to release
- */
-void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
-{
-       kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
-}
-
 /**
  * batadv_originator_init() - Initialize all originator structures
  * @bat_priv: the bat priv with all the soft interface information
@@ -231,7 +221,7 @@ err:
  *  free after rcu grace period
  * @ref: kref pointer of the neigh_ifinfo
  */
-static void batadv_neigh_ifinfo_release(struct kref *ref)
+void batadv_neigh_ifinfo_release(struct kref *ref)
 {
        struct batadv_neigh_ifinfo *neigh_ifinfo;
 
@@ -243,22 +233,12 @@ static void batadv_neigh_ifinfo_release(struct kref *ref)
        kfree_rcu(neigh_ifinfo, rcu);
 }
 
-/**
- * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release
- *  the neigh_ifinfo
- * @neigh_ifinfo: the neigh_ifinfo object to release
- */
-void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
-{
-       kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release);
-}
-
 /**
  * batadv_hardif_neigh_release() - release hardif neigh node from lists and
  *  queue for free after rcu grace period
  * @ref: kref pointer of the neigh_node
  */
-static void batadv_hardif_neigh_release(struct kref *ref)
+void batadv_hardif_neigh_release(struct kref *ref)
 {
        struct batadv_hardif_neigh_node *hardif_neigh;
 
@@ -273,22 +253,12 @@ static void batadv_hardif_neigh_release(struct kref *ref)
        kfree_rcu(hardif_neigh, rcu);
 }
 
-/**
- * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter
- *  and possibly release it
- * @hardif_neigh: hardif neigh neighbor to free
- */
-void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
-{
-       kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release);
-}
-
 /**
  * batadv_neigh_node_release() - release neigh_node from lists and queue for
  *  free after rcu grace period
  * @ref: kref pointer of the neigh_node
  */
-static void batadv_neigh_node_release(struct kref *ref)
+void batadv_neigh_node_release(struct kref *ref)
 {
        struct hlist_node *node_tmp;
        struct batadv_neigh_node *neigh_node;
@@ -308,16 +278,6 @@ static void batadv_neigh_node_release(struct kref *ref)
        kfree_rcu(neigh_node, rcu);
 }
 
-/**
- * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly
- *  release it
- * @neigh_node: neigh neighbor to free
- */
-void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
-{
-       kref_put(&neigh_node->refcount, batadv_neigh_node_release);
-}
-
 /**
  * batadv_orig_router_get() - router to the originator depending on iface
  * @orig_node: the orig node for the router
@@ -704,8 +664,7 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node,
 out:
        spin_unlock_bh(&orig_node->neigh_list_lock);
 
-       if (hardif_neigh)
-               batadv_hardif_neigh_put(hardif_neigh);
+       batadv_hardif_neigh_put(hardif_neigh);
        return neigh_node;
 }
 
@@ -797,14 +756,10 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
        ret = msg->len;
 
  out:
-       if (hardif)
-               batadv_hardif_put(hardif);
-       if (hard_iface)
-               dev_put(hard_iface);
-       if (primary_if)
-               batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       batadv_hardif_put(hardif);
+       dev_put(hard_iface);
+       batadv_hardif_put(primary_if);
+       dev_put(soft_iface);
 
        return ret;
 }
@@ -814,7 +769,7 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
  *  free after rcu grace period
  * @ref: kref pointer of the orig_ifinfo
  */
-static void batadv_orig_ifinfo_release(struct kref *ref)
+void batadv_orig_ifinfo_release(struct kref *ref)
 {
        struct batadv_orig_ifinfo *orig_ifinfo;
        struct batadv_neigh_node *router;
@@ -826,22 +781,11 @@ static void batadv_orig_ifinfo_release(struct kref *ref)
 
        /* this is the last reference to this object */
        router = rcu_dereference_protected(orig_ifinfo->router, true);
-       if (router)
-               batadv_neigh_node_put(router);
+       batadv_neigh_node_put(router);
 
        kfree_rcu(orig_ifinfo, rcu);
 }
 
-/**
- * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release
- *  the orig_ifinfo
- * @orig_ifinfo: the orig_ifinfo object to release
- */
-void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
-{
-       kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release);
-}
-
 /**
  * batadv_orig_node_free_rcu() - free the orig_node
  * @rcu: rcu pointer of the orig_node
@@ -865,7 +809,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
  *  free after rcu grace period
  * @ref: kref pointer of the orig_node
  */
-static void batadv_orig_node_release(struct kref *ref)
+void batadv_orig_node_release(struct kref *ref)
 {
        struct hlist_node *node_tmp;
        struct batadv_neigh_node *neigh_node;
@@ -895,8 +839,7 @@ static void batadv_orig_node_release(struct kref *ref)
        orig_node->last_bonding_candidate = NULL;
        spin_unlock_bh(&orig_node->neigh_list_lock);
 
-       if (last_candidate)
-               batadv_orig_ifinfo_put(last_candidate);
+       batadv_orig_ifinfo_put(last_candidate);
 
        spin_lock_bh(&orig_node->vlan_list_lock);
        hlist_for_each_entry_safe(vlan, node_tmp, &orig_node->vlan_list, list) {
@@ -911,16 +854,6 @@ static void batadv_orig_node_release(struct kref *ref)
        call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
 }
 
-/**
- * batadv_orig_node_put() - decrement the orig node refcounter and possibly
- *  release it
- * @orig_node: the orig node to free
- */
-void batadv_orig_node_put(struct batadv_orig_node *orig_node)
-{
-       kref_put(&orig_node->refcount, batadv_orig_node_release);
-}
-
 /**
  * batadv_originator_free() - Free all originator structures
  * @bat_priv: the bat priv with all the soft interface information
@@ -1213,8 +1146,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
                if (!kref_get_unless_zero(&neigh->refcount))
                        continue;
 
-               if (best)
-                       batadv_neigh_node_put(best);
+               batadv_neigh_node_put(best);
 
                best = neigh;
        }
@@ -1259,8 +1191,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
                                                    BATADV_IF_DEFAULT);
        batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT,
                            best_neigh_node);
-       if (best_neigh_node)
-               batadv_neigh_node_put(best_neigh_node);
+       batadv_neigh_node_put(best_neigh_node);
 
        /* ... then for all other interfaces. */
        rcu_read_lock();
@@ -1279,8 +1210,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
                                                            hard_iface);
                batadv_update_route(bat_priv, orig_node, hard_iface,
                                    best_neigh_node);
-               if (best_neigh_node)
-                       batadv_neigh_node_put(best_neigh_node);
+               batadv_neigh_node_put(best_neigh_node);
 
                batadv_hardif_put(hard_iface);
        }
@@ -1410,14 +1340,10 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
        ret = msg->len;
 
  out:
-       if (hardif)
-               batadv_hardif_put(hardif);
-       if (hard_iface)
-               dev_put(hard_iface);
-       if (primary_if)
-               batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       batadv_hardif_put(hardif);
+       dev_put(hard_iface);
+       batadv_hardif_put(primary_if);
+       dev_put(soft_iface);
 
        return ret;
 }
index 805be87..ea3d69e 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/compiler.h>
 #include <linux/if_ether.h>
 #include <linux/jhash.h>
+#include <linux/kref.h>
 #include <linux/netlink.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
@@ -20,19 +21,18 @@ bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
 int batadv_originator_init(struct batadv_priv *bat_priv);
 void batadv_originator_free(struct batadv_priv *bat_priv);
 void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
-void batadv_orig_node_put(struct batadv_orig_node *orig_node);
+void batadv_orig_node_release(struct kref *ref);
 struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
                                              const u8 *addr);
 struct batadv_hardif_neigh_node *
 batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
                        const u8 *neigh_addr);
-void
-batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh);
+void batadv_hardif_neigh_release(struct kref *ref);
 struct batadv_neigh_node *
 batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
                                struct batadv_hard_iface *hard_iface,
                                const u8 *neigh_addr);
-void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node);
+void batadv_neigh_node_release(struct kref *ref);
 struct batadv_neigh_node *
 batadv_orig_router_get(struct batadv_orig_node *orig_node,
                       const struct batadv_hard_iface *if_outgoing);
@@ -42,7 +42,7 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
 struct batadv_neigh_ifinfo *
 batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
                        struct batadv_hard_iface *if_outgoing);
-void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
+void batadv_neigh_ifinfo_release(struct kref *ref);
 
 int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb);
 
@@ -52,7 +52,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
 struct batadv_orig_ifinfo *
 batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
                       struct batadv_hard_iface *if_outgoing);
-void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
+void batadv_orig_ifinfo_release(struct kref *ref);
 
 int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
 struct batadv_orig_node_vlan *
@@ -61,7 +61,7 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
 struct batadv_orig_node_vlan *
 batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
                          unsigned short vid);
-void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan);
+void batadv_orig_node_vlan_release(struct kref *ref);
 
 /**
  * batadv_choose_orig() - Return the index of the orig entry in the hash table
@@ -82,4 +82,86 @@ static inline u32 batadv_choose_orig(const void *data, u32 size)
 struct batadv_orig_node *
 batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data);
 
+/**
+ * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release
+ *  the originator-vlan object
+ * @orig_vlan: the originator-vlan object to release
+ */
+static inline void
+batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
+{
+       if (!orig_vlan)
+               return;
+
+       kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
+}
+
+/**
+ * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release
+ *  the neigh_ifinfo
+ * @neigh_ifinfo: the neigh_ifinfo object to release
+ */
+static inline void
+batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
+{
+       if (!neigh_ifinfo)
+               return;
+
+       kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release);
+}
+
+/**
+ * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter
+ *  and possibly release it
+ * @hardif_neigh: hardif neigh neighbor to free
+ */
+static inline void
+batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
+{
+       if (!hardif_neigh)
+               return;
+
+       kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release);
+}
+
+/**
+ * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly
+ *  release it
+ * @neigh_node: neigh neighbor to free
+ */
+static inline void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
+{
+       if (!neigh_node)
+               return;
+
+       kref_put(&neigh_node->refcount, batadv_neigh_node_release);
+}
+
+/**
+ * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release
+ *  the orig_ifinfo
+ * @orig_ifinfo: the orig_ifinfo object to release
+ */
+static inline void
+batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
+{
+       if (!orig_ifinfo)
+               return;
+
+       kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release);
+}
+
+/**
+ * batadv_orig_node_put() - decrement the orig node refcounter and possibly
+ *  release it
+ * @orig_node: the orig node to free
+ */
+static inline void batadv_orig_node_put(struct batadv_orig_node *orig_node)
+{
+       if (!orig_node)
+               return;
+
+       kref_put(&orig_node->refcount, batadv_orig_node_release);
+}
+
 #endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */
index bb9e93e..970d0d7 100644 (file)
@@ -101,8 +101,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
        }
 
        /* decrease refcount of previous best neighbor */
-       if (curr_router)
-               batadv_neigh_node_put(curr_router);
+       batadv_neigh_node_put(curr_router);
 }
 
 /**
@@ -128,8 +127,7 @@ void batadv_update_route(struct batadv_priv *bat_priv,
                _batadv_update_route(bat_priv, orig_node, recv_if, neigh_node);
 
 out:
-       if (router)
-               batadv_neigh_node_put(router);
+       batadv_neigh_node_put(router);
 }
 
 /**
@@ -269,10 +267,8 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
                goto out;
        }
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
+       batadv_hardif_put(primary_if);
+       batadv_orig_node_put(orig_node);
 
        kfree_skb(skb);
 
@@ -324,10 +320,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
        skb = NULL;
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
+       batadv_hardif_put(primary_if);
+       batadv_orig_node_put(orig_node);
 
        kfree_skb(skb);
 
@@ -425,8 +419,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
        skb = NULL;
 
 put_orig_node:
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
+       batadv_orig_node_put(orig_node);
 free_skb:
        kfree_skb(skb);
 
@@ -513,8 +506,7 @@ batadv_last_bonding_replace(struct batadv_orig_node *orig_node,
        orig_node->last_bonding_candidate = new_candidate;
        spin_unlock_bh(&orig_node->neigh_list_lock);
 
-       if (old_candidate)
-               batadv_orig_ifinfo_put(old_candidate);
+       batadv_orig_ifinfo_put(old_candidate);
 }
 
 /**
@@ -656,8 +648,7 @@ next:
                batadv_orig_ifinfo_put(next_candidate);
        }
 
-       if (last_candidate)
-               batadv_orig_ifinfo_put(last_candidate);
+       batadv_orig_ifinfo_put(last_candidate);
 
        return router;
 }
@@ -785,10 +776,8 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
 
        ret = true;
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
+       batadv_hardif_put(primary_if);
+       batadv_orig_node_put(orig_node);
 
        return ret;
 }
@@ -1031,8 +1020,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
                                    orig_node);
 
 rx_success:
-               if (orig_node)
-                       batadv_orig_node_put(orig_node);
+               batadv_orig_node_put(orig_node);
 
                return NET_RX_SUCCESS;
        }
@@ -1279,7 +1267,6 @@ free_skb:
        kfree_skb(skb);
        ret = NET_RX_DROP;
 out:
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
+       batadv_orig_node_put(orig_node);
        return ret;
 }
index 0b9dd29..477d85a 100644 (file)
@@ -152,8 +152,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
        if (hardif_neigh && ret != NET_XMIT_DROP)
                hardif_neigh->bat_v.last_unicast_tx = jiffies;
 
-       if (hardif_neigh)
-               batadv_hardif_neigh_put(hardif_neigh);
+       batadv_hardif_neigh_put(hardif_neigh);
 #endif
 
        return ret;
@@ -309,8 +308,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
 
        ret = true;
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
        return ret;
 }
 
@@ -425,8 +423,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
        ret = batadv_send_skb_unicast(bat_priv, skb, packet_type,
                                      packet_subtype, orig_node, vid);
 
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
+       batadv_orig_node_put(orig_node);
 
        return ret;
 }
@@ -452,8 +449,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
        ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
                                      BATADV_P_DATA, orig_node, vid);
 
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
+       batadv_orig_node_put(orig_node);
 
        return ret;
 }
@@ -474,10 +470,8 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
        else
                consume_skb(forw_packet->skb);
 
-       if (forw_packet->if_incoming)
-               batadv_hardif_put(forw_packet->if_incoming);
-       if (forw_packet->if_outgoing)
-               batadv_hardif_put(forw_packet->if_outgoing);
+       batadv_hardif_put(forw_packet->if_incoming);
+       batadv_hardif_put(forw_packet->if_outgoing);
        if (forw_packet->queue_left)
                atomic_inc(forw_packet->queue_left);
        kfree(forw_packet);
@@ -748,6 +742,10 @@ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
  * Adds a broadcast packet to the queue and sets up timers. Broadcast packets
  * are sent multiple times to increase probability for being received.
  *
+ * This call clones the given skb, hence the caller needs to take into
+ * account that the data segment of the original skb might not be
+ * modifiable anymore.
+ *
  * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
  */
 static int batadv_forw_bcast_packet_to_list(struct batadv_priv *bat_priv,
@@ -761,7 +759,7 @@ static int batadv_forw_bcast_packet_to_list(struct batadv_priv *bat_priv,
        unsigned long send_time = jiffies;
        struct sk_buff *newskb;
 
-       newskb = skb_copy(skb, GFP_ATOMIC);
+       newskb = skb_clone(skb, GFP_ATOMIC);
        if (!newskb)
                goto err;
 
@@ -800,6 +798,10 @@ err:
  * or if a delay is given after that. Furthermore, queues additional
  * retransmissions if this interface is a wireless one.
  *
+ * This call clones the given skb, hence the caller needs to take into
+ * account that the data segment of the original skb might not be
+ * modifiable anymore.
+ *
  * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
  */
 static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv,
@@ -814,7 +816,7 @@ static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv,
        int ret = NETDEV_TX_OK;
 
        if (!delay) {
-               newskb = skb_copy(skb, GFP_ATOMIC);
+               newskb = skb_clone(skb, GFP_ATOMIC);
                if (!newskb)
                        return NETDEV_TX_BUSY;
 
@@ -867,8 +869,7 @@ static bool batadv_send_no_broadcast(struct batadv_priv *bat_priv,
        ret = batadv_hardif_no_broadcast(if_out, bcast_packet->orig,
                                         orig_neigh);
 
-       if (neigh_node)
-               batadv_hardif_neigh_put(neigh_node);
+       batadv_hardif_neigh_put(neigh_node);
 
        /* ok, may broadcast */
        if (!ret)
index ae368a4..0604b02 100644 (file)
@@ -383,10 +383,8 @@ dropped:
 dropped_freed:
        batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
 end:
-       if (mcast_single_orig)
-               batadv_orig_node_put(mcast_single_orig);
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_orig_node_put(mcast_single_orig);
+       batadv_hardif_put(primary_if);
        return NETDEV_TX_OK;
 }
 
@@ -501,7 +499,7 @@ out:
  *  after rcu grace period
  * @ref: kref pointer of the vlan object
  */
-static void batadv_softif_vlan_release(struct kref *ref)
+void batadv_softif_vlan_release(struct kref *ref)
 {
        struct batadv_softif_vlan *vlan;
 
@@ -514,19 +512,6 @@ static void batadv_softif_vlan_release(struct kref *ref)
        kfree_rcu(vlan, rcu);
 }
 
-/**
- * batadv_softif_vlan_put() - decrease the vlan object refcounter and
- *  possibly release it
- * @vlan: the vlan object to release
- */
-void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
-{
-       if (!vlan)
-               return;
-
-       kref_put(&vlan->refcount, batadv_softif_vlan_release);
-}
-
 /**
  * batadv_softif_vlan_get() - get the vlan object for a specific vid
  * @bat_priv: the bat priv with all the soft interface information
@@ -851,8 +836,7 @@ static int batadv_softif_slave_add(struct net_device *dev,
        ret = batadv_hardif_enable_interface(hard_iface, dev);
 
 out:
-       if (hard_iface)
-               batadv_hardif_put(hard_iface);
+       batadv_hardif_put(hard_iface);
        return ret;
 }
 
@@ -878,8 +862,7 @@ static int batadv_softif_slave_del(struct net_device *dev,
        ret = 0;
 
 out:
-       if (hard_iface)
-               batadv_hardif_put(hard_iface);
+       batadv_hardif_put(hard_iface);
        return ret;
 }
 
index 67a2ddd..9f2003f 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "main.h"
 
+#include <linux/kref.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
@@ -21,8 +22,21 @@ void batadv_interface_rx(struct net_device *soft_iface,
 bool batadv_softif_is_valid(const struct net_device *net_dev);
 extern struct rtnl_link_ops batadv_link_ops;
 int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid);
-void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan);
+void batadv_softif_vlan_release(struct kref *ref);
 struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
                                                  unsigned short vid);
 
+/**
+ * batadv_softif_vlan_put() - decrease the vlan object refcounter and
+ *  possibly release it
+ * @vlan: the vlan object to release
+ */
+static inline void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
+{
+       if (!vlan)
+               return;
+
+       kref_put(&vlan->refcount, batadv_softif_vlan_release);
+}
+
 #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
index 789c851..56b9fe9 100644 (file)
@@ -358,6 +358,9 @@ static void batadv_tp_vars_release(struct kref *ref)
  */
 static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
 {
+       if (!tp_vars)
+               return;
+
        kref_put(&tp_vars->refcount, batadv_tp_vars_release);
 }
 
@@ -748,12 +751,9 @@ move_twnd:
 
        wake_up(&tp_vars->more_bytes);
 out:
-       if (likely(primary_if))
-               batadv_hardif_put(primary_if);
-       if (likely(orig_node))
-               batadv_orig_node_put(orig_node);
-       if (likely(tp_vars))
-               batadv_tp_vars_put(tp_vars);
+       batadv_hardif_put(primary_if);
+       batadv_orig_node_put(orig_node);
+       batadv_tp_vars_put(tp_vars);
 }
 
 /**
@@ -882,10 +882,8 @@ static int batadv_tp_send(void *arg)
        }
 
 out:
-       if (likely(primary_if))
-               batadv_hardif_put(primary_if);
-       if (likely(orig_node))
-               batadv_orig_node_put(orig_node);
+       batadv_hardif_put(primary_if);
+       batadv_orig_node_put(orig_node);
 
        batadv_tp_sender_end(bat_priv, tp_vars);
        batadv_tp_sender_cleanup(bat_priv, tp_vars);
@@ -1205,10 +1203,8 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
        ret = 0;
 
 out:
-       if (likely(orig_node))
-               batadv_orig_node_put(orig_node);
-       if (likely(primary_if))
-               batadv_hardif_put(primary_if);
+       batadv_orig_node_put(orig_node);
+       batadv_hardif_put(primary_if);
 
        return ret;
 }
@@ -1456,8 +1452,7 @@ send_ack:
        batadv_tp_send_ack(bat_priv, icmp->orig, tp_vars->last_recv,
                           icmp->timestamp, icmp->session, icmp->uid);
 out:
-       if (likely(tp_vars))
-               batadv_tp_vars_put(tp_vars);
+       batadv_tp_vars_put(tp_vars);
 }
 
 /**
index 434b4f0..e0b3dac 100644 (file)
@@ -247,6 +247,9 @@ static void batadv_tt_local_entry_release(struct kref *ref)
 static void
 batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
 {
+       if (!tt_local_entry)
+               return;
+
        kref_put(&tt_local_entry->common.refcount,
                 batadv_tt_local_entry_release);
 }
@@ -270,7 +273,7 @@ static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
  *  queue for free after rcu grace period
  * @ref: kref pointer of the nc_node
  */
-static void batadv_tt_global_entry_release(struct kref *ref)
+void batadv_tt_global_entry_release(struct kref *ref)
 {
        struct batadv_tt_global_entry *tt_global_entry;
 
@@ -282,17 +285,6 @@ static void batadv_tt_global_entry_release(struct kref *ref)
        call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu);
 }
 
-/**
- * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and
- *  possibly release it
- * @tt_global_entry: tt_global_entry to be free'd
- */
-void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
-{
-       kref_put(&tt_global_entry->common.refcount,
-                batadv_tt_global_entry_release);
-}
-
 /**
  * batadv_tt_global_hash_count() - count the number of orig entries
  * @bat_priv: the bat priv with all the soft interface information
@@ -452,6 +444,9 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref)
 static void
 batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry)
 {
+       if (!orig_entry)
+               return;
+
        kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release);
 }
 
@@ -818,14 +813,10 @@ check_roaming:
 
        ret = true;
 out:
-       if (in_hardif)
-               batadv_hardif_put(in_hardif);
-       if (in_dev)
-               dev_put(in_dev);
-       if (tt_local)
-               batadv_tt_local_entry_put(tt_local);
-       if (tt_global)
-               batadv_tt_global_entry_put(tt_global);
+       batadv_hardif_put(in_hardif);
+       dev_put(in_dev);
+       batadv_tt_local_entry_put(tt_local);
+       batadv_tt_global_entry_put(tt_global);
        return ret;
 }
 
@@ -1215,10 +1206,8 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
        ret = msg->len;
 
  out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       batadv_hardif_put(primary_if);
+       dev_put(soft_iface);
 
        cb->args[0] = bucket;
        cb->args[1] = idx;
@@ -1305,8 +1294,7 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
        batadv_tt_local_entry_put(tt_removed_entry);
 
 out:
-       if (tt_local_entry)
-               batadv_tt_local_entry_put(tt_local_entry);
+       batadv_tt_local_entry_put(tt_local_entry);
 
        return curr_flags;
 }
@@ -1576,8 +1564,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
 sync_flags:
        batadv_tt_global_sync_flags(tt_global);
 out:
-       if (orig_entry)
-               batadv_tt_orig_list_entry_put(orig_entry);
+       batadv_tt_orig_list_entry_put(orig_entry);
 
        spin_unlock_bh(&tt_global->list_lock);
 }
@@ -1750,10 +1737,8 @@ out_remove:
                tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_ROAM;
 
 out:
-       if (tt_global_entry)
-               batadv_tt_global_entry_put(tt_global_entry);
-       if (tt_local_entry)
-               batadv_tt_local_entry_put(tt_local_entry);
+       batadv_tt_global_entry_put(tt_global_entry);
+       batadv_tt_local_entry_put(tt_local_entry);
        return ret;
 }
 
@@ -1789,15 +1774,13 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
                }
 
                /* release the refcount for the "old" best */
-               if (best_router)
-                       batadv_neigh_node_put(best_router);
+               batadv_neigh_node_put(best_router);
 
                best_entry = orig_entry;
                best_router = router;
        }
 
-       if (best_router)
-               batadv_neigh_node_put(best_router);
+       batadv_neigh_node_put(best_router);
 
        return best_entry;
 }
@@ -2003,10 +1986,8 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
        ret = msg->len;
 
  out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       batadv_hardif_put(primary_if);
+       dev_put(soft_iface);
 
        cb->args[0] = bucket;
        cb->args[1] = idx;
@@ -2196,10 +2177,8 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
        }
 
 out:
-       if (tt_global_entry)
-               batadv_tt_global_entry_put(tt_global_entry);
-       if (local_entry)
-               batadv_tt_local_entry_put(local_entry);
+       batadv_tt_global_entry_put(tt_global_entry);
+       batadv_tt_local_entry_put(local_entry);
 }
 
 /**
@@ -2426,10 +2405,8 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
        rcu_read_unlock();
 
 out:
-       if (tt_global_entry)
-               batadv_tt_global_entry_put(tt_global_entry);
-       if (tt_local_entry)
-               batadv_tt_local_entry_put(tt_local_entry);
+       batadv_tt_global_entry_put(tt_global_entry);
+       batadv_tt_local_entry_put(tt_local_entry);
 
        return orig_node;
 }
@@ -2606,6 +2583,9 @@ static void batadv_tt_req_node_release(struct kref *ref)
  */
 static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node)
 {
+       if (!tt_req_node)
+               return;
+
        kref_put(&tt_req_node->refcount, batadv_tt_req_node_release);
 }
 
@@ -2987,8 +2967,7 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
        ret = true;
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 
        if (ret && tt_req_node) {
                spin_lock_bh(&bat_priv->tt.req_list_lock);
@@ -2999,8 +2978,7 @@ out:
                spin_unlock_bh(&bat_priv->tt.req_list_lock);
        }
 
-       if (tt_req_node)
-               batadv_tt_req_node_put(tt_req_node);
+       batadv_tt_req_node_put(tt_req_node);
 
        kfree(tvlv_tt_data);
        return ret;
@@ -3131,10 +3109,8 @@ unlock:
        spin_unlock_bh(&req_dst_orig_node->tt_buff_lock);
 
 out:
-       if (res_dst_orig_node)
-               batadv_orig_node_put(res_dst_orig_node);
-       if (req_dst_orig_node)
-               batadv_orig_node_put(req_dst_orig_node);
+       batadv_orig_node_put(res_dst_orig_node);
+       batadv_orig_node_put(req_dst_orig_node);
        kfree(tvlv_tt_data);
        return ret;
 }
@@ -3248,10 +3224,8 @@ unlock:
        spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
 out:
        spin_unlock_bh(&bat_priv->tt.commit_lock);
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_orig_node_put(orig_node);
+       batadv_hardif_put(primary_if);
        kfree(tvlv_tt_data);
        /* The packet was for this host, so it doesn't need to be re-routed */
        return true;
@@ -3336,8 +3310,7 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
        atomic_set(&orig_node->last_ttvn, ttvn);
 
 out:
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
+       batadv_orig_node_put(orig_node);
 }
 
 static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
@@ -3378,8 +3351,7 @@ bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
                goto out;
        ret = true;
 out:
-       if (tt_local_entry)
-               batadv_tt_local_entry_put(tt_local_entry);
+       batadv_tt_local_entry_put(tt_local_entry);
        return ret;
 }
 
@@ -3442,8 +3414,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
 
        spin_unlock_bh(&bat_priv->tt.req_list_lock);
 out:
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
+       batadv_orig_node_put(orig_node);
 }
 
 static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv)
@@ -3574,8 +3545,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client,
                                 &tvlv_roam, sizeof(tvlv_roam));
 
 out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+       batadv_hardif_put(primary_if);
 }
 
 static void batadv_tt_purge(struct work_struct *work)
@@ -4170,8 +4140,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
                             atomic_read(&orig_node->last_ttvn) + 1);
 
 out:
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
+       batadv_orig_node_put(orig_node);
        return NET_RX_SUCCESS;
 }
 
index e128590..d18740d 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "main.h"
 
+#include <linux/kref.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/skbuff.h>
@@ -28,7 +29,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 struct batadv_tt_global_entry *
 batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
                           unsigned short vid);
-void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry);
+void batadv_tt_global_entry_release(struct kref *ref);
 int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
                                const u8 *addr, unsigned short vid);
 struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
@@ -55,4 +56,19 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
 int batadv_tt_cache_init(void);
 void batadv_tt_cache_destroy(void);
 
+/**
+ * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and
+ *  possibly release it
+ * @tt_global_entry: tt_global_entry to be free'd
+ */
+static inline void
+batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
+{
+       if (!tt_global_entry)
+               return;
+
+       kref_put(&tt_global_entry->common.refcount,
+                batadv_tt_global_entry_release);
+}
+
 #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
index 253f5a3..9927733 100644 (file)
@@ -50,6 +50,9 @@ static void batadv_tvlv_handler_release(struct kref *ref)
  */
 static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
 {
+       if (!tvlv_handler)
+               return;
+
        kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
 }
 
@@ -106,6 +109,9 @@ static void batadv_tvlv_container_release(struct kref *ref)
  */
 static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
 {
+       if (!tvlv)
+               return;
+
        kref_put(&tvlv->refcount, batadv_tvlv_container_release);
 }
 
@@ -438,8 +444,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
                                                ogm_source, orig_node,
                                                src, dst, tvlv_value,
                                                tvlv_value_cont_len);
-               if (tvlv_handler)
-                       batadv_tvlv_handler_put(tvlv_handler);
+               batadv_tvlv_handler_put(tvlv_handler);
                tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
                tvlv_value_len -= tvlv_value_cont_len;
        }
index c32638d..f6b9dc4 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/types.h>
 #include <net/bluetooth/bluetooth.h>
 
-#define BTNAMSIZ 18
+#define BTNAMSIZ 21
 
 /* CMTP ioctl defines */
 #define CMTPCONNADD    _IOW('C', 200, int)
index e1a545c..8a47a30 100644 (file)
@@ -1343,6 +1343,12 @@ int hci_inquiry(void __user *arg)
                goto done;
        }
 
+       /* Restrict maximum inquiry length to 60 seconds */
+       if (ir.length > 60) {
+               err = -EINVAL;
+               goto done;
+       }
+
        hci_dev_lock(hdev);
        if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX ||
            inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) {
@@ -1718,6 +1724,7 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
 int hci_dev_do_close(struct hci_dev *hdev)
 {
        bool auto_off;
+       int err = 0;
 
        BT_DBG("%s %p", hdev->name, hdev);
 
@@ -1727,10 +1734,18 @@ int hci_dev_do_close(struct hci_dev *hdev)
        hci_request_cancel_all(hdev);
        hci_req_sync_lock(hdev);
 
+       if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
+           !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+           test_bit(HCI_UP, &hdev->flags)) {
+               /* Execute vendor specific shutdown routine */
+               if (hdev->shutdown)
+                       err = hdev->shutdown(hdev);
+       }
+
        if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
                cancel_delayed_work_sync(&hdev->cmd_timer);
                hci_req_sync_unlock(hdev);
-               return 0;
+               return err;
        }
 
        hci_leds_update_powered(hdev, false);
@@ -1798,14 +1813,6 @@ int hci_dev_do_close(struct hci_dev *hdev)
                clear_bit(HCI_INIT, &hdev->flags);
        }
 
-       if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
-           !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
-           test_bit(HCI_UP, &hdev->flags)) {
-               /* Execute vendor specific shutdown routine */
-               if (hdev->shutdown)
-                       hdev->shutdown(hdev);
-       }
-
        /* flush cmd  work */
        flush_work(&hdev->cmd_work);
 
@@ -1845,7 +1852,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
        hci_req_sync_unlock(hdev);
 
        hci_dev_put(hdev);
-       return 0;
+       return err;
 }
 
 int hci_dev_close(__u16 dev)
@@ -3751,11 +3758,18 @@ done:
 }
 
 /* Alloc HCI device */
-struct hci_dev *hci_alloc_dev(void)
+struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
 {
        struct hci_dev *hdev;
+       unsigned int alloc_size;
+
+       alloc_size = sizeof(*hdev);
+       if (sizeof_priv) {
+               /* Fixme: May need ALIGN-ment? */
+               alloc_size += sizeof_priv;
+       }
 
-       hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+       hdev = kzalloc(alloc_size, GFP_KERNEL);
        if (!hdev)
                return NULL;
 
@@ -3869,7 +3883,7 @@ struct hci_dev *hci_alloc_dev(void)
 
        return hdev;
 }
-EXPORT_SYMBOL(hci_alloc_dev);
+EXPORT_SYMBOL(hci_alloc_dev_priv);
 
 /* Free HCI device */
 void hci_free_dev(struct hci_dev *hdev)
@@ -4034,13 +4048,13 @@ void hci_unregister_dev(struct hci_dev *hdev)
        }
 
        device_del(&hdev->dev);
-       /* Actual cleanup is deferred until hci_cleanup_dev(). */
+       /* Actual cleanup is deferred until hci_release_dev(). */
        hci_dev_put(hdev);
 }
 EXPORT_SYMBOL(hci_unregister_dev);
 
-/* Cleanup HCI device */
-void hci_cleanup_dev(struct hci_dev *hdev)
+/* Release HCI device */
+void hci_release_dev(struct hci_dev *hdev)
 {
        debugfs_remove_recursive(hdev->debugfs);
        kfree_const(hdev->hw_info);
@@ -4067,7 +4081,9 @@ void hci_cleanup_dev(struct hci_dev *hdev)
        hci_dev_unlock(hdev);
 
        ida_simple_remove(&hci_index_ida, hdev->id);
+       kfree(hdev);
 }
+EXPORT_SYMBOL(hci_release_dev);
 
 /* Suspend HCI device */
 int hci_suspend_dev(struct hci_dev *hdev)
index 1c30182..0bca035 100644 (file)
@@ -40,6 +40,8 @@
 #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \
                 "\x00\x00\x00\x00\x00\x00\x00\x00"
 
+#define secs_to_jiffies(_secs) msecs_to_jiffies((_secs) * 1000)
+
 /* Handle HCI Event packets */
 
 static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb,
@@ -1171,6 +1173,12 @@ static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb)
 
        bacpy(&hdev->random_addr, sent);
 
+       if (!bacmp(&hdev->rpa, sent)) {
+               hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED);
+               queue_delayed_work(hdev->workqueue, &hdev->rpa_expired,
+                                  secs_to_jiffies(hdev->rpa_timeout));
+       }
+
        hci_dev_unlock(hdev);
 }
 
@@ -1201,24 +1209,30 @@ static void hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev,
 {
        __u8 status = *((__u8 *) skb->data);
        struct hci_cp_le_set_adv_set_rand_addr *cp;
-       struct adv_info *adv_instance;
+       struct adv_info *adv;
 
        if (status)
                return;
 
        cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_SET_RAND_ADDR);
-       if (!cp)
+       /* Update only in case the adv instance since handle 0x00 shall be using
+        * HCI_OP_LE_SET_RANDOM_ADDR since that allows both extended and
+        * non-extended adverting.
+        */
+       if (!cp || !cp->handle)
                return;
 
        hci_dev_lock(hdev);
 
-       if (!cp->handle) {
-               /* Store in hdev for instance 0 (Set adv and Directed advs) */
-               bacpy(&hdev->random_addr, &cp->bdaddr);
-       } else {
-               adv_instance = hci_find_adv_instance(hdev, cp->handle);
-               if (adv_instance)
-                       bacpy(&adv_instance->random_addr, &cp->bdaddr);
+       adv = hci_find_adv_instance(hdev, cp->handle);
+       if (adv) {
+               bacpy(&adv->random_addr, &cp->bdaddr);
+               if (!bacmp(&hdev->rpa, &cp->bdaddr)) {
+                       adv->rpa_expired = false;
+                       queue_delayed_work(hdev->workqueue,
+                                          &adv->rpa_expired_cb,
+                                          secs_to_jiffies(hdev->rpa_timeout));
+               }
        }
 
        hci_dev_unlock(hdev);
@@ -1277,7 +1291,9 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev,
                                         struct sk_buff *skb)
 {
        struct hci_cp_le_set_ext_adv_enable *cp;
+       struct hci_cp_ext_adv_set *set;
        __u8 status = *((__u8 *) skb->data);
+       struct adv_info *adv = NULL, *n;
 
        BT_DBG("%s status 0x%2.2x", hdev->name, status);
 
@@ -1288,22 +1304,48 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev,
        if (!cp)
                return;
 
+       set = (void *)cp->data;
+
        hci_dev_lock(hdev);
 
+       if (cp->num_of_sets)
+               adv = hci_find_adv_instance(hdev, set->handle);
+
        if (cp->enable) {
                struct hci_conn *conn;
 
                hci_dev_set_flag(hdev, HCI_LE_ADV);
 
+               if (adv)
+                       adv->enabled = true;
+
                conn = hci_lookup_le_connect(hdev);
                if (conn)
                        queue_delayed_work(hdev->workqueue,
                                           &conn->le_conn_timeout,
                                           conn->conn_timeout);
        } else {
+               if (adv) {
+                       adv->enabled = false;
+                       /* If just one instance was disabled check if there are
+                        * any other instance enabled before clearing HCI_LE_ADV
+                        */
+                       list_for_each_entry_safe(adv, n, &hdev->adv_instances,
+                                                list) {
+                               if (adv->enabled)
+                                       goto unlock;
+                       }
+               } else {
+                       /* All instances shall be considered disabled */
+                       list_for_each_entry_safe(adv, n, &hdev->adv_instances,
+                                                list)
+                               adv->enabled = false;
+               }
+
                hci_dev_clear_flag(hdev, HCI_LE_ADV);
        }
 
+unlock:
        hci_dev_unlock(hdev);
 }
 
@@ -2306,19 +2348,20 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
 
        conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
        if (conn) {
-               u8 type = conn->type;
-
                mgmt_disconnect_failed(hdev, &conn->dst, conn->type,
                                       conn->dst_type, status);
 
+               if (conn->type == LE_LINK) {
+                       hdev->cur_adv_instance = conn->adv_instance;
+                       hci_req_reenable_advertising(hdev);
+               }
+
                /* If the disconnection failed for any reason, the upper layer
                 * does not retry to disconnect in current implementation.
                 * Hence, we need to do some basic cleanup here and re-enable
                 * advertising if necessary.
                 */
                hci_conn_del(conn);
-               if (type == LE_LINK)
-                       hci_req_reenable_advertising(hdev);
        }
 
        hci_dev_unlock(hdev);
@@ -2844,7 +2887,6 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
        struct hci_conn_params *params;
        struct hci_conn *conn;
        bool mgmt_connected;
-       u8 type;
 
        BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
 
@@ -2899,10 +2941,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
                }
        }
 
-       type = conn->type;
-
        hci_disconn_cfm(conn, ev->reason);
-       hci_conn_del(conn);
 
        /* The suspend notifier is waiting for all devices to disconnect so
         * clear the bit from pending tasks and inform the wait queue.
@@ -2922,8 +2961,12 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
         * or until a connection is created or until the Advertising
         * is timed out due to Directed Advertising."
         */
-       if (type == LE_LINK)
+       if (conn->type == LE_LINK) {
+               hdev->cur_adv_instance = conn->adv_instance;
                hci_req_reenable_advertising(hdev);
+       }
+
+       hci_conn_del(conn);
 
 unlock:
        hci_dev_unlock(hdev);
@@ -3268,11 +3311,9 @@ unlock:
        hci_dev_unlock(hdev);
 }
 
-static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev,
-                                           u16 opcode, u8 ncmd)
+static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, u8 ncmd)
 {
-       if (opcode != HCI_OP_NOP)
-               cancel_delayed_work(&hdev->cmd_timer);
+       cancel_delayed_work(&hdev->cmd_timer);
 
        if (!test_bit(HCI_RESET, &hdev->flags)) {
                if (ncmd) {
@@ -3647,7 +3688,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
                break;
        }
 
-       handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd);
+       handle_cmd_cnt_and_timer(hdev, ev->ncmd);
 
        hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
                             req_complete_skb);
@@ -3748,7 +3789,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
                break;
        }
 
-       handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd);
+       handle_cmd_cnt_and_timer(hdev, ev->ncmd);
 
        /* Indicate request completion if the command failed. Also, if
         * we're not waiting for a special event and we get a success
@@ -4382,6 +4423,21 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
 
        switch (ev->status) {
        case 0x00:
+               /* The synchronous connection complete event should only be
+                * sent once per new connection. Receiving a successful
+                * complete event when the connection status is already
+                * BT_CONNECTED means that the device is misbehaving and sent
+                * multiple complete event packets for the same new connection.
+                *
+                * Registering the device more than once can corrupt kernel
+                * memory, hence upon detecting this invalid event, we report
+                * an error and ignore the packet.
+                */
+               if (conn->state == BT_CONNECTED) {
+                       bt_dev_err(hdev, "Ignoring connect complete event for existing connection");
+                       goto unlock;
+               }
+
                conn->handle = __le16_to_cpu(ev->handle);
                conn->state  = BT_CONNECTED;
                conn->type   = ev->link_type;
@@ -5104,9 +5160,64 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev,
 }
 #endif
 
+static void le_conn_update_addr(struct hci_conn *conn, bdaddr_t *bdaddr,
+                               u8 bdaddr_type, bdaddr_t *local_rpa)
+{
+       if (conn->out) {
+               conn->dst_type = bdaddr_type;
+               conn->resp_addr_type = bdaddr_type;
+               bacpy(&conn->resp_addr, bdaddr);
+
+               /* Check if the controller has set a Local RPA then it must be
+                * used instead or hdev->rpa.
+                */
+               if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) {
+                       conn->init_addr_type = ADDR_LE_DEV_RANDOM;
+                       bacpy(&conn->init_addr, local_rpa);
+               } else if (hci_dev_test_flag(conn->hdev, HCI_PRIVACY)) {
+                       conn->init_addr_type = ADDR_LE_DEV_RANDOM;
+                       bacpy(&conn->init_addr, &conn->hdev->rpa);
+               } else {
+                       hci_copy_identity_address(conn->hdev, &conn->init_addr,
+                                                 &conn->init_addr_type);
+               }
+       } else {
+               conn->resp_addr_type = conn->hdev->adv_addr_type;
+               /* Check if the controller has set a Local RPA then it must be
+                * used instead or hdev->rpa.
+                */
+               if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) {
+                       conn->resp_addr_type = ADDR_LE_DEV_RANDOM;
+                       bacpy(&conn->resp_addr, local_rpa);
+               } else if (conn->hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) {
+                       /* In case of ext adv, resp_addr will be updated in
+                        * Adv Terminated event.
+                        */
+                       if (!ext_adv_capable(conn->hdev))
+                               bacpy(&conn->resp_addr,
+                                     &conn->hdev->random_addr);
+               } else {
+                       bacpy(&conn->resp_addr, &conn->hdev->bdaddr);
+               }
+
+               conn->init_addr_type = bdaddr_type;
+               bacpy(&conn->init_addr, bdaddr);
+
+               /* For incoming connections, set the default minimum
+                * and maximum connection interval. They will be used
+                * to check if the parameters are in range and if not
+                * trigger the connection update procedure.
+                */
+               conn->le_conn_min_interval = conn->hdev->le_conn_min_interval;
+               conn->le_conn_max_interval = conn->hdev->le_conn_max_interval;
+       }
+}
+
 static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
-                       bdaddr_t *bdaddr, u8 bdaddr_type, u8 role, u16 handle,
-                       u16 interval, u16 latency, u16 supervision_timeout)
+                                bdaddr_t *bdaddr, u8 bdaddr_type,
+                                bdaddr_t *local_rpa, u8 role, u16 handle,
+                                u16 interval, u16 latency,
+                                u16 supervision_timeout)
 {
        struct hci_conn_params *params;
        struct hci_conn *conn;
@@ -5154,32 +5265,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
                cancel_delayed_work(&conn->le_conn_timeout);
        }
 
-       if (!conn->out) {
-               /* Set the responder (our side) address type based on
-                * the advertising address type.
-                */
-               conn->resp_addr_type = hdev->adv_addr_type;
-               if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) {
-                       /* In case of ext adv, resp_addr will be updated in
-                        * Adv Terminated event.
-                        */
-                       if (!ext_adv_capable(hdev))
-                               bacpy(&conn->resp_addr, &hdev->random_addr);
-               } else {
-                       bacpy(&conn->resp_addr, &hdev->bdaddr);
-               }
-
-               conn->init_addr_type = bdaddr_type;
-               bacpy(&conn->init_addr, bdaddr);
-
-               /* For incoming connections, set the default minimum
-                * and maximum connection interval. They will be used
-                * to check if the parameters are in range and if not
-                * trigger the connection update procedure.
-                */
-               conn->le_conn_min_interval = hdev->le_conn_min_interval;
-               conn->le_conn_max_interval = hdev->le_conn_max_interval;
-       }
+       le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa);
 
        /* Lookup the identity address from the stored connection
         * address and address type.
@@ -5236,6 +5322,13 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
        conn->handle = handle;
        conn->state = BT_CONFIG;
 
+       /* Store current advertising instance as connection advertising instance
+        * when sotfware rotation is in use so it can be re-enabled when
+        * disconnected.
+        */
+       if (!ext_adv_capable(hdev))
+               conn->adv_instance = hdev->cur_adv_instance;
+
        conn->le_conn_interval = interval;
        conn->le_conn_latency = latency;
        conn->le_supv_timeout = supervision_timeout;
@@ -5290,7 +5383,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
        BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
 
        le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type,
-                            ev->role, le16_to_cpu(ev->handle),
+                            NULL, ev->role, le16_to_cpu(ev->handle),
                             le16_to_cpu(ev->interval),
                             le16_to_cpu(ev->latency),
                             le16_to_cpu(ev->supervision_timeout));
@@ -5304,7 +5397,7 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev,
        BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
 
        le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type,
-                            ev->role, le16_to_cpu(ev->handle),
+                            &ev->local_rpa, ev->role, le16_to_cpu(ev->handle),
                             le16_to_cpu(ev->interval),
                             le16_to_cpu(ev->latency),
                             le16_to_cpu(ev->supervision_timeout));
@@ -5319,13 +5412,13 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
        struct hci_evt_le_ext_adv_set_term *ev = (void *) skb->data;
        struct hci_conn *conn;
+       struct adv_info *adv;
 
        BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
 
-       if (ev->status) {
-               struct adv_info *adv;
+       adv = hci_find_adv_instance(hdev, ev->handle);
 
-               adv = hci_find_adv_instance(hdev, ev->handle);
+       if (ev->status) {
                if (!adv)
                        return;
 
@@ -5336,11 +5429,18 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
                return;
        }
 
+       if (adv)
+               adv->enabled = false;
+
        conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->conn_handle));
        if (conn) {
-               struct adv_info *adv_instance;
+               /* Store handle in the connection so the correct advertising
+                * instance can be re-enabled when disconnected.
+                */
+               conn->adv_instance = ev->handle;
 
-               if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM)
+               if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM ||
+                   bacmp(&conn->resp_addr, BDADDR_ANY))
                        return;
 
                if (!ev->handle) {
@@ -5348,9 +5448,8 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
                        return;
                }
 
-               adv_instance = hci_find_adv_instance(hdev, ev->handle);
-               if (adv_instance)
-                       bacpy(&conn->resp_addr, &adv_instance->random_addr);
+               if (adv)
+                       bacpy(&conn->resp_addr, &adv->random_addr);
        }
 }
 
index 1d14adc..f156266 100644 (file)
@@ -2072,8 +2072,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
         * current RPA has expired then generate a new one.
         */
        if (use_rpa) {
-               int to;
-
                /* If Controller supports LL Privacy use own address type is
                 * 0x03
                 */
@@ -2084,14 +2082,10 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
                        *own_addr_type = ADDR_LE_DEV_RANDOM;
 
                if (adv_instance) {
-                       if (!adv_instance->rpa_expired &&
-                           !bacmp(&adv_instance->random_addr, &hdev->rpa))
+                       if (adv_rpa_valid(adv_instance))
                                return 0;
-
-                       adv_instance->rpa_expired = false;
                } else {
-                       if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) &&
-                           !bacmp(&hdev->random_addr, &hdev->rpa))
+                       if (rpa_valid(hdev))
                                return 0;
                }
 
@@ -2103,14 +2097,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
 
                bacpy(rand_addr, &hdev->rpa);
 
-               to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
-               if (adv_instance)
-                       queue_delayed_work(hdev->workqueue,
-                                          &adv_instance->rpa_expired_cb, to);
-               else
-                       queue_delayed_work(hdev->workqueue,
-                                          &hdev->rpa_expired, to);
-
                return 0;
        }
 
@@ -2153,6 +2139,30 @@ void __hci_req_clear_ext_adv_sets(struct hci_request *req)
        hci_req_add(req, HCI_OP_LE_CLEAR_ADV_SETS, 0, NULL);
 }
 
+static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
+{
+       struct hci_dev *hdev = req->hdev;
+
+       /* If we're advertising or initiating an LE connection we can't
+        * go ahead and change the random address at this time. This is
+        * because the eventual initiator address used for the
+        * subsequently created connection will be undefined (some
+        * controllers use the new address and others the one we had
+        * when the operation started).
+        *
+        * In this kind of scenario skip the update and let the random
+        * address be updated at the next cycle.
+        */
+       if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
+           hci_lookup_le_connect(hdev)) {
+               bt_dev_dbg(hdev, "Deferring random address update");
+               hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
+               return;
+       }
+
+       hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
+}
+
 int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
 {
        struct hci_cp_le_set_ext_adv_params cp;
@@ -2255,6 +2265,13 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
                } else {
                        if (!bacmp(&random_addr, &hdev->random_addr))
                                return 0;
+                       /* Instance 0x00 doesn't have an adv_info, instead it
+                        * uses hdev->random_addr to track its address so
+                        * whenever it needs to be updated this also set the
+                        * random address since hdev->random_addr is shared with
+                        * scan state machine.
+                        */
+                       set_random_addr(req, &random_addr);
                }
 
                memset(&cp, 0, sizeof(cp));
@@ -2512,30 +2529,6 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
                                                false);
 }
 
-static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
-{
-       struct hci_dev *hdev = req->hdev;
-
-       /* If we're advertising or initiating an LE connection we can't
-        * go ahead and change the random address at this time. This is
-        * because the eventual initiator address used for the
-        * subsequently created connection will be undefined (some
-        * controllers use the new address and others the one we had
-        * when the operation started).
-        *
-        * In this kind of scenario skip the update and let the random
-        * address be updated at the next cycle.
-        */
-       if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
-           hci_lookup_le_connect(hdev)) {
-               bt_dev_dbg(hdev, "Deferring random address update");
-               hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
-               return;
-       }
-
-       hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
-}
-
 int hci_update_random_address(struct hci_request *req, bool require_privacy,
                              bool use_rpa, u8 *own_addr_type)
 {
@@ -2547,8 +2540,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
         * the current RPA in use, then generate a new one.
         */
        if (use_rpa) {
-               int to;
-
                /* If Controller supports LL Privacy use own address type is
                 * 0x03
                 */
@@ -2558,8 +2549,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
                else
                        *own_addr_type = ADDR_LE_DEV_RANDOM;
 
-               if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) &&
-                   !bacmp(&hdev->random_addr, &hdev->rpa))
+               if (rpa_valid(hdev))
                        return 0;
 
                err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
@@ -2570,9 +2560,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 
                set_random_addr(req, &hdev->rpa);
 
-               to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
-               queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to);
-
                return 0;
        }
 
index b69d88b..7827639 100644 (file)
@@ -85,8 +85,7 @@ static void bt_host_release(struct device *dev)
        struct hci_dev *hdev = to_hci_dev(dev);
 
        if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
-               hci_cleanup_dev(hdev);
-       kfree(hdev);
+               hci_release_dev(hdev);
        module_put(THIS_MODULE);
 }
 
index 3663f88..cea01e2 100644 (file)
@@ -7204,7 +7204,7 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status,
        if (!mgmt_rp)
                goto done;
 
-       if (status)
+       if (eir_len == 0)
                goto send_rsp;
 
        eir_len = eir_append_data(mgmt_rp->eir, 0, EIR_CLASS_OF_DEV,
@@ -7725,7 +7725,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
         * advertising.
         */
        if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
-               return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
+               return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
                                       MGMT_STATUS_NOT_SUPPORTED);
 
        if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets)
index ae6f807..2c95bb5 100644 (file)
@@ -70,7 +70,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
 
        BT_DBG("dlc %p state %ld err %d", d, d->state, err);
 
-       spin_lock_bh(&sk->sk_lock.slock);
+       lock_sock(sk);
 
        if (err)
                sk->sk_err = err;
@@ -91,7 +91,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
                sk->sk_state_change(sk);
        }
 
-       spin_unlock_bh(&sk->sk_lock.slock);
+       release_sock(sk);
 
        if (parent && sock_flag(sk, SOCK_ZAPPED)) {
                /* We have to drop DLC lock here, otherwise
@@ -974,7 +974,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
        if (!parent)
                return 0;
 
-       bh_lock_sock(parent);
+       lock_sock(parent);
 
        /* Check for backlog size */
        if (sk_acceptq_is_full(parent)) {
@@ -1001,7 +1001,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
        result = 1;
 
 done:
-       bh_unlock_sock(parent);
+       release_sock(parent);
 
        if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
                parent->sk_state_change(parent);
index d9a4e88..98a8815 100644 (file)
@@ -48,6 +48,8 @@ struct sco_conn {
        spinlock_t      lock;
        struct sock     *sk;
 
+       struct delayed_work     timeout_work;
+
        unsigned int    mtu;
 };
 
@@ -74,31 +76,47 @@ struct sco_pinfo {
 #define SCO_CONN_TIMEOUT       (HZ * 40)
 #define SCO_DISCONN_TIMEOUT    (HZ * 2)
 
-static void sco_sock_timeout(struct timer_list *t)
+static void sco_sock_timeout(struct work_struct *work)
 {
-       struct sock *sk = from_timer(sk, t, sk_timer);
+       struct sco_conn *conn = container_of(work, struct sco_conn,
+                                            timeout_work.work);
+       struct sock *sk;
+
+       sco_conn_lock(conn);
+       sk = conn->sk;
+       if (sk)
+               sock_hold(sk);
+       sco_conn_unlock(conn);
+
+       if (!sk)
+               return;
 
        BT_DBG("sock %p state %d", sk, sk->sk_state);
 
-       bh_lock_sock(sk);
+       lock_sock(sk);
        sk->sk_err = ETIMEDOUT;
        sk->sk_state_change(sk);
-       bh_unlock_sock(sk);
-
-       sco_sock_kill(sk);
+       release_sock(sk);
        sock_put(sk);
 }
 
 static void sco_sock_set_timer(struct sock *sk, long timeout)
 {
+       if (!sco_pi(sk)->conn)
+               return;
+
        BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout);
-       sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
+       cancel_delayed_work(&sco_pi(sk)->conn->timeout_work);
+       schedule_delayed_work(&sco_pi(sk)->conn->timeout_work, timeout);
 }
 
 static void sco_sock_clear_timer(struct sock *sk)
 {
+       if (!sco_pi(sk)->conn)
+               return;
+
        BT_DBG("sock %p state %d", sk, sk->sk_state);
-       sk_stop_timer(sk, &sk->sk_timer);
+       cancel_delayed_work(&sco_pi(sk)->conn->timeout_work);
 }
 
 /* ---- SCO connections ---- */
@@ -173,12 +191,14 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
 
        if (sk) {
                sock_hold(sk);
-               bh_lock_sock(sk);
+               lock_sock(sk);
                sco_sock_clear_timer(sk);
                sco_chan_del(sk, err);
-               bh_unlock_sock(sk);
-               sco_sock_kill(sk);
+               release_sock(sk);
                sock_put(sk);
+
+               /* Ensure no more work items will run before freeing conn. */
+               cancel_delayed_work_sync(&conn->timeout_work);
        }
 
        hcon->sco_data = NULL;
@@ -193,6 +213,8 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
        sco_pi(sk)->conn = conn;
        conn->sk = sk;
 
+       INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout);
+
        if (parent)
                bt_accept_enqueue(parent, sk, true);
 }
@@ -212,44 +234,32 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
        return err;
 }
 
-static int sco_connect(struct sock *sk)
+static int sco_connect(struct hci_dev *hdev, struct sock *sk)
 {
        struct sco_conn *conn;
        struct hci_conn *hcon;
-       struct hci_dev  *hdev;
        int err, type;
 
        BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst);
 
-       hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR);
-       if (!hdev)
-               return -EHOSTUNREACH;
-
-       hci_dev_lock(hdev);
-
        if (lmp_esco_capable(hdev) && !disable_esco)
                type = ESCO_LINK;
        else
                type = SCO_LINK;
 
        if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT &&
-           (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) {
-               err = -EOPNOTSUPP;
-               goto done;
-       }
+           (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev)))
+               return -EOPNOTSUPP;
 
        hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst,
                               sco_pi(sk)->setting);
-       if (IS_ERR(hcon)) {
-               err = PTR_ERR(hcon);
-               goto done;
-       }
+       if (IS_ERR(hcon))
+               return PTR_ERR(hcon);
 
        conn = sco_conn_add(hcon);
        if (!conn) {
                hci_conn_drop(hcon);
-               err = -ENOMEM;
-               goto done;
+               return -ENOMEM;
        }
 
        /* Update source addr of the socket */
@@ -257,7 +267,7 @@ static int sco_connect(struct sock *sk)
 
        err = sco_chan_add(conn, sk, NULL);
        if (err)
-               goto done;
+               return err;
 
        if (hcon->state == BT_CONNECTED) {
                sco_sock_clear_timer(sk);
@@ -267,9 +277,6 @@ static int sco_connect(struct sock *sk)
                sco_sock_set_timer(sk, sk->sk_sndtimeo);
        }
 
-done:
-       hci_dev_unlock(hdev);
-       hci_dev_put(hdev);
        return err;
 }
 
@@ -394,8 +401,7 @@ static void sco_sock_cleanup_listen(struct sock *parent)
  */
 static void sco_sock_kill(struct sock *sk)
 {
-       if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket ||
-           sock_flag(sk, SOCK_DEAD))
+       if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
                return;
 
        BT_DBG("sk %p state %d", sk, sk->sk_state);
@@ -443,11 +449,10 @@ static void __sco_sock_close(struct sock *sk)
 /* Must be called on unlocked socket. */
 static void sco_sock_close(struct sock *sk)
 {
-       sco_sock_clear_timer(sk);
        lock_sock(sk);
+       sco_sock_clear_timer(sk);
        __sco_sock_close(sk);
        release_sock(sk);
-       sco_sock_kill(sk);
 }
 
 static void sco_skb_put_cmsg(struct sk_buff *skb, struct msghdr *msg,
@@ -500,8 +505,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
 
        sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
 
-       timer_setup(&sk->sk_timer, sco_sock_timeout, 0);
-
        bt_sock_link(&sco_sk_list, sk);
        return sk;
 }
@@ -566,6 +569,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
 {
        struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
        struct sock *sk = sock->sk;
+       struct hci_dev  *hdev;
        int err;
 
        BT_DBG("sk %p", sk);
@@ -580,12 +584,19 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
        if (sk->sk_type != SOCK_SEQPACKET)
                return -EINVAL;
 
+       hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
+       if (!hdev)
+               return -EHOSTUNREACH;
+       hci_dev_lock(hdev);
+
        lock_sock(sk);
 
        /* Set destination address and psm */
        bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
 
-       err = sco_connect(sk);
+       err = sco_connect(hdev, sk);
+       hci_dev_unlock(hdev);
+       hci_dev_put(hdev);
        if (err)
                goto done;
 
@@ -773,6 +784,11 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting)
                        cp.max_latency = cpu_to_le16(0xffff);
                        cp.retrans_effort = 0xff;
                        break;
+               default:
+                       /* use CVSD settings as fallback */
+                       cp.max_latency = cpu_to_le16(0xffff);
+                       cp.retrans_effort = 0xff;
+                       break;
                }
 
                hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
@@ -1083,11 +1099,11 @@ static void sco_conn_ready(struct sco_conn *conn)
        BT_DBG("conn %p", conn);
 
        if (sk) {
+               lock_sock(sk);
                sco_sock_clear_timer(sk);
-               bh_lock_sock(sk);
                sk->sk_state = BT_CONNECTED;
                sk->sk_state_change(sk);
-               bh_unlock_sock(sk);
+               release_sock(sk);
        } else {
                sco_conn_lock(conn);
 
@@ -1102,12 +1118,12 @@ static void sco_conn_ready(struct sco_conn *conn)
                        return;
                }
 
-               bh_lock_sock(parent);
+               lock_sock(parent);
 
                sk = sco_sock_alloc(sock_net(parent), NULL,
                                    BTPROTO_SCO, GFP_ATOMIC, 0);
                if (!sk) {
-                       bh_unlock_sock(parent);
+                       release_sock(parent);
                        sco_conn_unlock(conn);
                        return;
                }
@@ -1128,7 +1144,7 @@ static void sco_conn_ready(struct sco_conn *conn)
                /* Wake up parent */
                parent->sk_data_ready(parent);
 
-               bh_unlock_sock(parent);
+               release_sock(parent);
 
                sco_conn_unlock(conn);
        }
index caa16bf..2eb0e55 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/error-injection.h>
 #include <linux/smp.h>
 #include <linux/sock_diag.h>
+#include <net/xdp.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/bpf_test_run.h>
@@ -88,17 +89,19 @@ reset:
 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
                        u32 *retval, u32 *time, bool xdp)
 {
-       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+       struct bpf_prog_array_item item = {.prog = prog};
+       struct bpf_run_ctx *old_ctx;
+       struct bpf_cg_run_ctx run_ctx;
        struct bpf_test_timer t = { NO_MIGRATE };
        enum bpf_cgroup_storage_type stype;
        int ret;
 
        for_each_cgroup_storage_type(stype) {
-               storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
-               if (IS_ERR(storage[stype])) {
-                       storage[stype] = NULL;
+               item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+               if (IS_ERR(item.cgroup_storage[stype])) {
+                       item.cgroup_storage[stype] = NULL;
                        for_each_cgroup_storage_type(stype)
-                               bpf_cgroup_storage_free(storage[stype]);
+                               bpf_cgroup_storage_free(item.cgroup_storage[stype]);
                        return -ENOMEM;
                }
        }
@@ -107,22 +110,19 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
                repeat = 1;
 
        bpf_test_timer_enter(&t);
+       old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
        do {
-               ret = bpf_cgroup_storage_set(storage);
-               if (ret)
-                       break;
-
+               run_ctx.prog_item = &item;
                if (xdp)
                        *retval = bpf_prog_run_xdp(prog, ctx);
                else
-                       *retval = BPF_PROG_RUN(prog, ctx);
-
-               bpf_cgroup_storage_unset();
+                       *retval = bpf_prog_run(prog, ctx);
        } while (bpf_test_timer_continue(&t, repeat, &ret, time));
+       bpf_reset_run_ctx(old_ctx);
        bpf_test_timer_leave(&t);
 
        for_each_cgroup_storage_type(stype)
-               bpf_cgroup_storage_free(storage[stype]);
+               bpf_cgroup_storage_free(item.cgroup_storage[stype]);
 
        return ret;
 }
@@ -327,7 +327,7 @@ __bpf_prog_test_run_raw_tp(void *data)
        struct bpf_raw_tp_test_run_info *info = data;
 
        rcu_read_lock();
-       info->retval = BPF_PROG_RUN(info->prog, info->ctx);
+       info->retval = bpf_prog_run(info->prog, info->ctx);
        rcu_read_unlock();
 }
 
@@ -688,6 +688,64 @@ out:
        return ret;
 }
 
+static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp)
+{
+       unsigned int ingress_ifindex, rx_queue_index;
+       struct netdev_rx_queue *rxqueue;
+       struct net_device *device;
+
+       if (!xdp_md)
+               return 0;
+
+       if (xdp_md->egress_ifindex != 0)
+               return -EINVAL;
+
+       ingress_ifindex = xdp_md->ingress_ifindex;
+       rx_queue_index = xdp_md->rx_queue_index;
+
+       if (!ingress_ifindex && rx_queue_index)
+               return -EINVAL;
+
+       if (ingress_ifindex) {
+               device = dev_get_by_index(current->nsproxy->net_ns,
+                                         ingress_ifindex);
+               if (!device)
+                       return -ENODEV;
+
+               if (rx_queue_index >= device->real_num_rx_queues)
+                       goto free_dev;
+
+               rxqueue = __netif_get_rx_queue(device, rx_queue_index);
+
+               if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq))
+                       goto free_dev;
+
+               xdp->rxq = &rxqueue->xdp_rxq;
+               /* The device is now tracked in the xdp->rxq for later
+                * dev_put()
+                */
+       }
+
+       xdp->data = xdp->data_meta + xdp_md->data;
+       return 0;
+
+free_dev:
+       dev_put(device);
+       return -EINVAL;
+}
+
+static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md)
+{
+       if (!xdp_md)
+               return;
+
+       xdp_md->data = xdp->data - xdp->data_meta;
+       xdp_md->data_end = xdp->data_end - xdp->data_meta;
+
+       if (xdp_md->ingress_ifindex)
+               dev_put(xdp->rxq->dev);
+}
+
 int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
                          union bpf_attr __user *uattr)
 {
@@ -698,38 +756,73 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
        struct netdev_rx_queue *rxqueue;
        struct xdp_buff xdp = {};
        u32 retval, duration;
+       struct xdp_md *ctx;
        u32 max_data_sz;
        void *data;
-       int ret;
+       int ret = -EINVAL;
 
        if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
            prog->expected_attach_type == BPF_XDP_CPUMAP)
                return -EINVAL;
-       if (kattr->test.ctx_in || kattr->test.ctx_out)
-               return -EINVAL;
+
+       ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
+
+       if (ctx) {
+               /* There can't be user provided data before the meta data */
+               if (ctx->data_meta || ctx->data_end != size ||
+                   ctx->data > ctx->data_end ||
+                   unlikely(xdp_metalen_invalid(ctx->data)))
+                       goto free_ctx;
+               /* Meta data is allocated from the headroom */
+               headroom -= ctx->data;
+       }
 
        /* XDP have extra tailroom as (most) drivers use full page */
        max_data_sz = 4096 - headroom - tailroom;
 
        data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
-       if (IS_ERR(data))
-               return PTR_ERR(data);
+       if (IS_ERR(data)) {
+               ret = PTR_ERR(data);
+               goto free_ctx;
+       }
 
        rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
        xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
                      &rxqueue->xdp_rxq);
        xdp_prepare_buff(&xdp, data, headroom, size, true);
 
+       ret = xdp_convert_md_to_buff(ctx, &xdp);
+       if (ret)
+               goto free_data;
+
        bpf_prog_change_xdp(NULL, prog);
        ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
+       /* We convert the xdp_buff back to an xdp_md before checking the return
+        * code so the reference count of any held netdevice will be decremented
+        * even if the test run failed.
+        */
+       xdp_convert_buff_to_md(&xdp, ctx);
        if (ret)
                goto out;
-       if (xdp.data != data + headroom || xdp.data_end != xdp.data + size)
-               size = xdp.data_end - xdp.data;
-       ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
+
+       if (xdp.data_meta != data + headroom ||
+           xdp.data_end != xdp.data_meta + size)
+               size = xdp.data_end - xdp.data_meta;
+
+       ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
+                             duration);
+       if (!ret)
+               ret = bpf_ctx_finish(kattr, uattr, ctx,
+                                    sizeof(struct xdp_md));
+
 out:
        bpf_prog_change_xdp(prog, NULL);
+free_data:
        kfree(data);
+free_ctx:
+       kfree(ctx);
        return ret;
 }
 
@@ -896,7 +989,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
        bpf_test_timer_enter(&t);
        do {
                ctx.selected_sk = NULL;
-               retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
+               retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run);
        } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
        bpf_test_timer_leave(&t);
 
index ef743f9..d3a32c6 100644 (file)
@@ -201,6 +201,48 @@ static struct notifier_block br_switchdev_notifier = {
        .notifier_call = br_switchdev_event,
 };
 
+/* called under rtnl_mutex */
+static int br_switchdev_blocking_event(struct notifier_block *nb,
+                                      unsigned long event, void *ptr)
+{
+       struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
+       struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+       struct switchdev_notifier_brport_info *brport_info;
+       const struct switchdev_brport *b;
+       struct net_bridge_port *p;
+       int err = NOTIFY_DONE;
+
+       p = br_port_get_rtnl(dev);
+       if (!p)
+               goto out;
+
+       switch (event) {
+       case SWITCHDEV_BRPORT_OFFLOADED:
+               brport_info = ptr;
+               b = &brport_info->brport;
+
+               err = br_switchdev_port_offload(p, b->dev, b->ctx,
+                                               b->atomic_nb, b->blocking_nb,
+                                               b->tx_fwd_offload, extack);
+               err = notifier_from_errno(err);
+               break;
+       case SWITCHDEV_BRPORT_UNOFFLOADED:
+               brport_info = ptr;
+               b = &brport_info->brport;
+
+               br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb,
+                                           b->blocking_nb);
+               break;
+       }
+
+out:
+       return err;
+}
+
+static struct notifier_block br_switchdev_blocking_notifier = {
+       .notifier_call = br_switchdev_blocking_event,
+};
+
 /* br_boolopt_toggle - change user-controlled boolean option
  *
  * @br: bridge device
@@ -214,17 +256,22 @@ static struct notifier_block br_switchdev_notifier = {
 int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
                      struct netlink_ext_ack *extack)
 {
+       int err = 0;
+
        switch (opt) {
        case BR_BOOLOPT_NO_LL_LEARN:
                br_opt_toggle(br, BROPT_NO_LL_LEARN, on);
                break;
+       case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
+               err = br_multicast_toggle_vlan_snooping(br, on, extack);
+               break;
        default:
                /* shouldn't be called with unsupported options */
                WARN_ON(1);
                break;
        }
 
-       return 0;
+       return err;
 }
 
 int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
@@ -232,6 +279,8 @@ int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
        switch (opt) {
        case BR_BOOLOPT_NO_LL_LEARN:
                return br_opt_get(br, BROPT_NO_LL_LEARN);
+       case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
+               return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED);
        default:
                /* shouldn't be called with unsupported options */
                WARN_ON(1);
@@ -348,11 +397,15 @@ static int __init br_init(void)
        if (err)
                goto err_out4;
 
-       err = br_netlink_init();
+       err = register_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
        if (err)
                goto err_out5;
 
-       brioctl_set(br_ioctl_deviceless_stub);
+       err = br_netlink_init();
+       if (err)
+               goto err_out6;
+
+       brioctl_set(br_ioctl_stub);
 
 #if IS_ENABLED(CONFIG_ATM_LANE)
        br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -366,6 +419,8 @@ static int __init br_init(void)
 
        return 0;
 
+err_out6:
+       unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
 err_out5:
        unregister_switchdev_notifier(&br_switchdev_notifier);
 err_out4:
@@ -385,6 +440,7 @@ static void __exit br_deinit(void)
 {
        stp_proto_unregister(&br_stp_proto);
        br_netlink_fini();
+       unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
        unregister_switchdev_notifier(&br_switchdev_notifier);
        unregister_netdevice_notifier(&br_device_notifier);
        brioctl_set(NULL);
index e8b626c..8d6bab2 100644 (file)
@@ -27,11 +27,14 @@ EXPORT_SYMBOL_GPL(nf_br_ops);
 /* net device transmit always called with BH disabled */
 netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+       struct net_bridge_mcast_port *pmctx_null = NULL;
        struct net_bridge *br = netdev_priv(dev);
+       struct net_bridge_mcast *brmctx = &br->multicast_ctx;
        struct net_bridge_fdb_entry *dst;
        struct net_bridge_mdb_entry *mdst;
        const struct nf_br_ops *nf_ops;
        u8 state = BR_STATE_FORWARDING;
+       struct net_bridge_vlan *vlan;
        const unsigned char *dest;
        u16 vid = 0;
 
@@ -53,7 +56,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
        skb_reset_mac_header(skb);
        skb_pull(skb, ETH_HLEN);
 
-       if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state))
+       if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid,
+                               &state, &vlan))
                goto out;
 
        if (IS_ENABLED(CONFIG_INET) &&
@@ -82,15 +86,15 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
                        br_flood(br, skb, BR_PKT_MULTICAST, false, true);
                        goto out;
                }
-               if (br_multicast_rcv(br, NULL, skb, vid)) {
+               if (br_multicast_rcv(&brmctx, &pmctx_null, vlan, skb, vid)) {
                        kfree_skb(skb);
                        goto out;
                }
 
-               mdst = br_mdb_get(br, skb, vid);
+               mdst = br_mdb_get(brmctx, skb, vid);
                if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
-                   br_multicast_querier_exists(br, eth_hdr(skb), mdst))
-                       br_multicast_flood(mdst, skb, false, true);
+                   br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst))
+                       br_multicast_flood(mdst, skb, brmctx, false, true);
                else
                        br_flood(br, skb, BR_PKT_MULTICAST, false, true);
        } else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) {
@@ -450,7 +454,7 @@ static const struct net_device_ops br_netdev_ops = {
        .ndo_set_rx_mode         = br_dev_set_multicast_list,
        .ndo_change_rx_flags     = br_dev_change_rx_flags,
        .ndo_change_mtu          = br_change_mtu,
-       .ndo_do_ioctl            = br_dev_ioctl,
+       .ndo_siocdevprivate      = br_dev_siocdevprivate,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_netpoll_setup       = br_netpoll_setup,
        .ndo_netpoll_cleanup     = br_netpoll_cleanup,
index 5dee309..46812b6 100644 (file)
@@ -732,11 +732,11 @@ static inline size_t fdb_nlmsg_size(void)
                + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
 }
 
-static int br_fdb_replay_one(struct notifier_block *nb,
+static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
                             const struct net_bridge_fdb_entry *fdb,
-                            struct net_device *dev, unsigned long action,
-                            const void *ctx)
+                            unsigned long action, const void *ctx)
 {
+       const struct net_bridge_port *p = READ_ONCE(fdb->dst);
        struct switchdev_notifier_fdb_info item;
        int err;
 
@@ -745,25 +745,25 @@ static int br_fdb_replay_one(struct notifier_block *nb,
        item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
        item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
        item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
-       item.info.dev = dev;
+       item.info.dev = (!p || item.is_local) ? br->dev : p->dev;
        item.info.ctx = ctx;
 
        err = nb->notifier_call(nb, action, &item);
        return notifier_to_errno(err);
 }
 
-int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
-                 const void *ctx, bool adding, struct notifier_block *nb)
+int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding,
+                 struct notifier_block *nb)
 {
        struct net_bridge_fdb_entry *fdb;
        struct net_bridge *br;
        unsigned long action;
        int err = 0;
 
-       if (!netif_is_bridge_master(br_dev))
-               return -EINVAL;
+       if (!nb)
+               return 0;
 
-       if (!netif_is_bridge_port(dev) && !netif_is_bridge_master(dev))
+       if (!netif_is_bridge_master(br_dev))
                return -EINVAL;
 
        br = netdev_priv(br_dev);
@@ -776,14 +776,7 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
        rcu_read_lock();
 
        hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
-               const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
-               struct net_device *dst_dev;
-
-               dst_dev = dst ? dst->dev : br->dev;
-               if (dst_dev && dst_dev != dev)
-                       continue;
-
-               err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx);
+               err = br_fdb_replay_one(br, nb, fdb, action, ctx);
                if (err)
                        break;
        }
@@ -792,7 +785,6 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
 
        return err;
 }
-EXPORT_SYMBOL_GPL(br_fdb_replay);
 
 static void fdb_notify(struct net_bridge *br,
                       const struct net_bridge_fdb_entry *fdb, int type,
index 0785636..ec64665 100644 (file)
@@ -48,6 +48,8 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb
                skb_set_network_header(skb, depth);
        }
 
+       br_switchdev_frame_set_offload_fwd_mark(skb);
+
        dev_queue_xmit(skb);
 
        return 0;
@@ -76,6 +78,11 @@ static void __br_forward(const struct net_bridge_port *to,
        struct net *net;
        int br_hook;
 
+       /* Mark the skb for forwarding offload early so that br_handle_vlan()
+        * can know whether to pop the VLAN header on egress or keep it.
+        */
+       nbp_switchdev_frame_mark_tx_fwd_offload(to, skb);
+
        vg = nbp_vlan_group_rcu(to);
        skb = br_handle_vlan(to->br, to, vg, skb);
        if (!skb)
@@ -174,6 +181,8 @@ static struct net_bridge_port *maybe_deliver(
        if (!should_deliver(p, skb))
                return prev;
 
+       nbp_switchdev_frame_mark_tx_fwd_to_hwdom(p, skb);
+
        if (!prev)
                goto out;
 
@@ -267,20 +276,19 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
 /* called with rcu_read_lock */
 void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
                        struct sk_buff *skb,
+                       struct net_bridge_mcast *brmctx,
                        bool local_rcv, bool local_orig)
 {
-       struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
-       struct net_bridge *br = netdev_priv(dev);
        struct net_bridge_port *prev = NULL;
        struct net_bridge_port_group *p;
        bool allow_mode_include = true;
        struct hlist_node *rp;
 
-       rp = br_multicast_get_first_rport_node(br, skb);
+       rp = br_multicast_get_first_rport_node(brmctx, skb);
 
        if (mdst) {
                p = rcu_dereference(mdst->ports);
-               if (br_multicast_should_handle_mode(br, mdst->addr.proto) &&
+               if (br_multicast_should_handle_mode(brmctx, mdst->addr.proto) &&
                    br_multicast_is_star_g(&mdst->addr))
                        allow_mode_include = false;
        } else {
index 14cd6ef..4a02f8b 100644 (file)
@@ -456,7 +456,7 @@ int br_add_bridge(struct net *net, const char *name)
        dev_net_set(dev, net);
        dev->rtnl_link_ops = &br_link_ops;
 
-       res = register_netdev(dev);
+       res = register_netdevice(dev);
        if (res)
                free_netdev(dev);
        return res;
@@ -467,7 +467,6 @@ int br_del_bridge(struct net *net, const char *name)
        struct net_device *dev;
        int ret = 0;
 
-       rtnl_lock();
        dev = __dev_get_by_name(net, name);
        if (dev == NULL)
                ret =  -ENXIO;  /* Could not find device */
@@ -485,7 +484,6 @@ int br_del_bridge(struct net *net, const char *name)
        else
                br_dev_delete(dev, NULL);
 
-       rtnl_unlock();
        return ret;
 }
 
@@ -644,10 +642,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
        if (err)
                goto err5;
 
-       err = nbp_switchdev_mark_set(p);
-       if (err)
-               goto err6;
-
        dev_disable_lro(dev);
 
        list_add_rcu(&p->list, &br->port_list);
@@ -685,13 +679,13 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
                 */
                err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack);
                if (err)
-                       goto err7;
+                       goto err6;
        }
 
        err = nbp_vlan_init(p, extack);
        if (err) {
                netdev_err(dev, "failed to initialize vlan filtering on this port\n");
-               goto err7;
+               goto err6;
        }
 
        spin_lock_bh(&br->lock);
@@ -714,13 +708,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
 
        return 0;
 
-err7:
+err6:
        if (fdb_synced)
                br_fdb_unsync_static(br, p);
        list_del_rcu(&p->list);
        br_fdb_delete_by_port(br, p, 0, 1);
        nbp_update_port_count(br);
-err6:
        netdev_upper_dev_unlink(dev, br->dev);
 err5:
        dev->priv_flags &= ~IFF_BRIDGE_PORT;
index 1f50630..b50382f 100644 (file)
@@ -69,8 +69,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
        struct net_bridge_port *p = br_port_get_rcu(skb->dev);
        enum br_pkt_type pkt_type = BR_PKT_UNICAST;
        struct net_bridge_fdb_entry *dst = NULL;
+       struct net_bridge_mcast_port *pmctx;
        struct net_bridge_mdb_entry *mdst;
        bool local_rcv, mcast_hit = false;
+       struct net_bridge_mcast *brmctx;
+       struct net_bridge_vlan *vlan;
        struct net_bridge *br;
        u16 vid = 0;
        u8 state;
@@ -78,9 +81,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
        if (!p || p->state == BR_STATE_DISABLED)
                goto drop;
 
+       brmctx = &p->br->multicast_ctx;
+       pmctx = &p->multicast_ctx;
        state = p->state;
        if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid,
-                               &state))
+                               &state, &vlan))
                goto out;
 
        nbp_switchdev_frame_mark(p, skb);
@@ -98,7 +103,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
                        local_rcv = true;
                } else {
                        pkt_type = BR_PKT_MULTICAST;
-                       if (br_multicast_rcv(br, p, skb, vid))
+                       if (br_multicast_rcv(&brmctx, &pmctx, vlan, skb, vid))
                                goto drop;
                }
        }
@@ -128,11 +133,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 
        switch (pkt_type) {
        case BR_PKT_MULTICAST:
-               mdst = br_mdb_get(br, skb, vid);
+               mdst = br_mdb_get(brmctx, skb, vid);
                if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
-                   br_multicast_querier_exists(br, eth_hdr(skb), mdst)) {
+                   br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) {
                        if ((mdst && mdst->host_joined) ||
-                           br_multicast_is_router(br, skb)) {
+                           br_multicast_is_router(brmctx, skb)) {
                                local_rcv = true;
                                br->dev->stats.multicast++;
                        }
@@ -162,7 +167,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
                if (!mcast_hit)
                        br_flood(br, skb, pkt_type, local_rcv, false);
                else
-                       br_multicast_flood(mdst, skb, local_rcv, false);
+                       br_multicast_flood(mdst, skb, brmctx, local_rcv, false);
        }
 
        if (local_rcv)
@@ -289,11 +294,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
        memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
 
        p = br_port_get_rcu(skb->dev);
-       if (p->flags & BR_VLAN_TUNNEL) {
-               if (br_handle_ingress_vlan_tunnel(skb, p,
-                                                 nbp_vlan_group_rcu(p)))
-                       goto drop;
-       }
+       if (p->flags & BR_VLAN_TUNNEL)
+               br_handle_ingress_vlan_tunnel(skb, p, nbp_vlan_group_rcu(p));
 
        if (unlikely(is_link_local_ether_addr(dest))) {
                u16 fwd_mask = p->br->group_fwd_mask_required;
index 2db800f..793b0db 100644 (file)
@@ -106,15 +106,32 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
  * This interface is deprecated because it was too difficult
  * to do the translation for 32/64bit ioctl compatibility.
  */
-static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd)
 {
        struct net_bridge *br = netdev_priv(dev);
        struct net_bridge_port *p = NULL;
        unsigned long args[4];
+       void __user *argp;
        int ret = -EOPNOTSUPP;
 
-       if (copy_from_user(args, rq->ifr_data, sizeof(args)))
-               return -EFAULT;
+       if (in_compat_syscall()) {
+               unsigned int cargs[4];
+
+               if (copy_from_user(cargs, data, sizeof(cargs)))
+                       return -EFAULT;
+
+               args[0] = cargs[0];
+               args[1] = cargs[1];
+               args[2] = cargs[2];
+               args[3] = cargs[3];
+
+               argp = compat_ptr(args[1]);
+       } else {
+               if (copy_from_user(args, data, sizeof(args)))
+                       return -EFAULT;
+
+               argp = (void __user *)args[1];
+       }
 
        switch (args[0]) {
        case BRCTL_ADD_IF:
@@ -171,7 +188,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
                        return -ENOMEM;
 
                get_port_ifindices(br, indices, num);
-               if (copy_to_user((void __user *)args[1], indices, num*sizeof(int)))
+               if (copy_to_user(argp, indices, num * sizeof(int)))
                        num =  -EFAULT;
                kfree(indices);
                return num;
@@ -232,7 +249,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
                rcu_read_unlock();
 
-               if (copy_to_user((void __user *)args[1], &p, sizeof(p)))
+               if (copy_to_user(argp, &p, sizeof(p)))
                        return -EFAULT;
 
                return 0;
@@ -282,8 +299,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
        }
 
        case BRCTL_GET_FDB_ENTRIES:
-               return get_fdb_entries(br, (void __user *)args[1],
-                                      args[2], args[3]);
+               return get_fdb_entries(br, argp, args[2], args[3]);
        }
 
        if (!ret) {
@@ -320,7 +336,7 @@ static int old_deviceless(struct net *net, void __user *uarg)
 
                args[2] = get_bridge_ifindices(net, indices, args[2]);
 
-               ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int))
+               ret = copy_to_user(uarg, indices, args[2]*sizeof(int))
                        ? -EFAULT : args[2];
 
                kfree(indices);
@@ -350,48 +366,47 @@ static int old_deviceless(struct net *net, void __user *uarg)
        return -EOPNOTSUPP;
 }
 
-int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
+int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd,
+                 struct ifreq *ifr, void __user *uarg)
 {
+       int ret = -EOPNOTSUPP;
+
+       rtnl_lock();
+
        switch (cmd) {
        case SIOCGIFBR:
        case SIOCSIFBR:
-               return old_deviceless(net, uarg);
-
+               ret = old_deviceless(net, uarg);
+               break;
        case SIOCBRADDBR:
        case SIOCBRDELBR:
        {
                char buf[IFNAMSIZ];
 
-               if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-                       return -EPERM;
+               if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+                       ret = -EPERM;
+                       break;
+               }
 
-               if (copy_from_user(buf, uarg, IFNAMSIZ))
-                       return -EFAULT;
+               if (copy_from_user(buf, uarg, IFNAMSIZ)) {
+                       ret = -EFAULT;
+                       break;
+               }
 
                buf[IFNAMSIZ-1] = 0;
                if (cmd == SIOCBRADDBR)
-                       return br_add_bridge(net, buf);
-
-               return br_del_bridge(net, buf);
-       }
+                       ret = br_add_bridge(net, buf);
+               else
+                       ret = br_del_bridge(net, buf);
        }
-       return -EOPNOTSUPP;
-}
-
-int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
-       struct net_bridge *br = netdev_priv(dev);
-
-       switch (cmd) {
-       case SIOCDEVPRIVATE:
-               return old_dev_ioctl(dev, rq, cmd);
-
+               break;
        case SIOCBRADDIF:
        case SIOCBRDELIF:
-               return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);
-
+               ret = add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF);
+               break;
        }
 
-       br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
-       return -EOPNOTSUPP;
+       rtnl_unlock();
+
+       return ret;
 }
index 17a720b..0281453 100644 (file)
 
 #include "br_private.h"
 
-static bool br_rports_have_mc_router(struct net_bridge *br)
+static bool
+br_ip4_rports_get_timer(struct net_bridge_mcast_port *pmctx,
+                       unsigned long *timer)
+{
+       *timer = br_timer_value(&pmctx->ip4_mc_router_timer);
+       return !hlist_unhashed(&pmctx->ip4_rlist);
+}
+
+static bool
+br_ip6_rports_get_timer(struct net_bridge_mcast_port *pmctx,
+                       unsigned long *timer)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-       return !hlist_empty(&br->ip4_mc_router_list) ||
-              !hlist_empty(&br->ip6_mc_router_list);
+       *timer = br_timer_value(&pmctx->ip6_mc_router_timer);
+       return !hlist_unhashed(&pmctx->ip6_rlist);
 #else
-       return !hlist_empty(&br->ip4_mc_router_list);
+       *timer = 0;
+       return false;
 #endif
 }
 
-static bool
-br_ip4_rports_get_timer(struct net_bridge_port *port, unsigned long *timer)
+static size_t __br_rports_one_size(void)
 {
-       *timer = br_timer_value(&port->ip4_mc_router_timer);
-       return !hlist_unhashed(&port->ip4_rlist);
+       return nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PORT */
+              nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_TIMER */
+              nla_total_size(sizeof(u8)) +  /* MDBA_ROUTER_PATTR_TYPE */
+              nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET_TIMER */
+              nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET6_TIMER */
+              nla_total_size(sizeof(u32));  /* MDBA_ROUTER_PATTR_VID */
 }
 
-static bool
-br_ip6_rports_get_timer(struct net_bridge_port *port, unsigned long *timer)
+size_t br_rports_size(const struct net_bridge_mcast *brmctx)
 {
+       struct net_bridge_mcast_port *pmctx;
+       size_t size = nla_total_size(0); /* MDBA_ROUTER */
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list,
+                                ip4_rlist)
+               size += __br_rports_one_size();
+
 #if IS_ENABLED(CONFIG_IPV6)
-       *timer = br_timer_value(&port->ip6_mc_router_timer);
-       return !hlist_unhashed(&port->ip6_rlist);
-#else
-       *timer = 0;
-       return false;
+       hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list,
+                                ip6_rlist)
+               size += __br_rports_one_size();
 #endif
+       rcu_read_unlock();
+
+       return size;
 }
 
-static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
-                              struct net_device *dev)
+int br_rports_fill_info(struct sk_buff *skb,
+                       const struct net_bridge_mcast *brmctx)
 {
-       struct net_bridge *br = netdev_priv(dev);
+       u16 vid = brmctx->vlan ? brmctx->vlan->vid : 0;
        bool have_ip4_mc_rtr, have_ip6_mc_rtr;
        unsigned long ip4_timer, ip6_timer;
        struct nlattr *nest, *port_nest;
        struct net_bridge_port *p;
 
-       if (!br->multicast_router)
-               return 0;
-
-       if (!br_rports_have_mc_router(br))
+       if (!brmctx->multicast_router || !br_rports_have_mc_router(brmctx))
                return 0;
 
        nest = nla_nest_start_noflag(skb, MDBA_ROUTER);
        if (nest == NULL)
                return -EMSGSIZE;
 
-       list_for_each_entry_rcu(p, &br->port_list, list) {
-               have_ip4_mc_rtr = br_ip4_rports_get_timer(p, &ip4_timer);
-               have_ip6_mc_rtr = br_ip6_rports_get_timer(p, &ip6_timer);
+       list_for_each_entry_rcu(p, &brmctx->br->port_list, list) {
+               struct net_bridge_mcast_port *pmctx;
+
+               if (vid) {
+                       struct net_bridge_vlan *v;
+
+                       v = br_vlan_find(nbp_vlan_group(p), vid);
+                       if (!v)
+                               continue;
+                       pmctx = &v->port_mcast_ctx;
+               } else {
+                       pmctx = &p->multicast_ctx;
+               }
+
+               have_ip4_mc_rtr = br_ip4_rports_get_timer(pmctx, &ip4_timer);
+               have_ip6_mc_rtr = br_ip6_rports_get_timer(pmctx, &ip6_timer);
 
                if (!have_ip4_mc_rtr && !have_ip6_mc_rtr)
                        continue;
@@ -79,13 +111,14 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
                    nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER,
                                max(ip4_timer, ip6_timer)) ||
                    nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE,
-                              p->multicast_router) ||
+                              p->multicast_ctx.multicast_router) ||
                    (have_ip4_mc_rtr &&
                     nla_put_u32(skb, MDBA_ROUTER_PATTR_INET_TIMER,
                                 ip4_timer)) ||
                    (have_ip6_mc_rtr &&
                     nla_put_u32(skb, MDBA_ROUTER_PATTR_INET6_TIMER,
-                                ip6_timer))) {
+                                ip6_timer)) ||
+                   (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid))) {
                        nla_nest_cancel(skb, port_nest);
                        goto fail;
                }
@@ -240,7 +273,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
 
        switch (mp->addr.proto) {
        case htons(ETH_P_IP):
-               dump_srcs_mode = !!(mp->br->multicast_igmp_version == 3);
+               dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_igmp_version == 3);
                if (mp->addr.src.ip4) {
                        if (nla_put_in_addr(skb, MDBA_MDB_EATTR_SOURCE,
                                            mp->addr.src.ip4))
@@ -250,7 +283,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
                break;
 #if IS_ENABLED(CONFIG_IPV6)
        case htons(ETH_P_IPV6):
-               dump_srcs_mode = !!(mp->br->multicast_mld_version == 2);
+               dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_mld_version == 2);
                if (!ipv6_addr_any(&mp->addr.src.ip6)) {
                        if (nla_put_in6_addr(skb, MDBA_MDB_EATTR_SOURCE,
                                             &mp->addr.src.ip6))
@@ -390,6 +423,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
        for_each_netdev_rcu(net, dev) {
                if (dev->priv_flags & IFF_EBRIDGE) {
+                       struct net_bridge *br = netdev_priv(dev);
                        struct br_port_msg *bpm;
 
                        if (idx < s_idx)
@@ -406,7 +440,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
                        bpm->ifindex = dev->ifindex;
                        if (br_mdb_fill_info(skb, cb, dev) < 0)
                                goto out;
-                       if (br_rports_fill_info(skb, cb, dev) < 0)
+                       if (br_rports_fill_info(skb, &br->multicast_ctx) < 0)
                                goto out;
 
                        cb->args[1] = 0;
@@ -483,7 +517,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
                /* MDBA_MDB_EATTR_SOURCE */
                if (pg->key.addr.src.ip4)
                        nlmsg_size += nla_total_size(sizeof(__be32));
-               if (pg->key.port->br->multicast_igmp_version == 2)
+               if (pg->key.port->br->multicast_ctx.multicast_igmp_version == 2)
                        goto out;
                addr_size = sizeof(__be32);
                break;
@@ -492,7 +526,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
                /* MDBA_MDB_EATTR_SOURCE */
                if (!ipv6_addr_any(&pg->key.addr.src.ip6))
                        nlmsg_size += nla_total_size(sizeof(struct in6_addr));
-               if (pg->key.port->br->multicast_mld_version == 1)
+               if (pg->key.port->br->multicast_ctx.multicast_mld_version == 1)
                        goto out;
                addr_size = sizeof(struct in6_addr);
                break;
@@ -617,6 +651,9 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
 
        ASSERT_RTNL();
 
+       if (!nb)
+               return 0;
+
        if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
                return -EINVAL;
 
@@ -686,7 +723,6 @@ out_free_mdb:
 
        return err;
 }
-EXPORT_SYMBOL_GPL(br_mdb_replay);
 
 static void br_mdb_switchdev_host_port(struct net_device *dev,
                                       struct net_device *lower_dev,
@@ -781,12 +817,12 @@ errout:
 
 static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
                                   struct net_device *dev,
-                                  int ifindex, u32 pid,
+                                  int ifindex, u16 vid, u32 pid,
                                   u32 seq, int type, unsigned int flags)
 {
+       struct nlattr *nest, *port_nest;
        struct br_port_msg *bpm;
        struct nlmsghdr *nlh;
-       struct nlattr *nest;
 
        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
        if (!nlh)
@@ -800,8 +836,18 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
        if (!nest)
                goto cancel;
 
-       if (nla_put_u32(skb, MDBA_ROUTER_PORT, ifindex))
+       port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT);
+       if (!port_nest)
+               goto end;
+       if (nla_put_nohdr(skb, sizeof(u32), &ifindex)) {
+               nla_nest_cancel(skb, port_nest);
+               goto end;
+       }
+       if (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid)) {
+               nla_nest_cancel(skb, port_nest);
                goto end;
+       }
+       nla_nest_end(skb, port_nest);
 
        nla_nest_end(skb, nest);
        nlmsg_end(skb, nlh);
@@ -817,23 +863,28 @@ cancel:
 static inline size_t rtnl_rtr_nlmsg_size(void)
 {
        return NLMSG_ALIGN(sizeof(struct br_port_msg))
-               + nla_total_size(sizeof(__u32));
+               + nla_total_size(sizeof(__u32))
+               + nla_total_size(sizeof(u16));
 }
 
-void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
+void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx,
                   int type)
 {
        struct net *net = dev_net(dev);
        struct sk_buff *skb;
        int err = -ENOBUFS;
        int ifindex;
+       u16 vid;
 
-       ifindex = port ? port->dev->ifindex : 0;
+       ifindex = pmctx ? pmctx->port->dev->ifindex : 0;
+       vid = pmctx && br_multicast_port_ctx_is_vlan(pmctx) ? pmctx->vlan->vid :
+                                                             0;
        skb = nlmsg_new(rtnl_rtr_nlmsg_size(), GFP_ATOMIC);
        if (!skb)
                goto errout;
 
-       err = nlmsg_populate_rtr_fill(skb, dev, ifindex, 0, 0, type, NTF_SELF);
+       err = nlmsg_populate_rtr_fill(skb, dev, ifindex, vid, 0, 0, type,
+                                     NTF_SELF);
        if (err < 0) {
                kfree_skb(skb);
                goto errout;
@@ -1004,14 +1055,47 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
        return 0;
 }
 
+static struct net_bridge_mcast *
+__br_mdb_choose_context(struct net_bridge *br,
+                       const struct br_mdb_entry *entry,
+                       struct netlink_ext_ack *extack)
+{
+       struct net_bridge_mcast *brmctx = NULL;
+       struct net_bridge_vlan *v;
+
+       if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+               brmctx = &br->multicast_ctx;
+               goto out;
+       }
+
+       if (!entry->vid) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot add an entry without a vlan when vlan snooping is enabled");
+               goto out;
+       }
+
+       v = br_vlan_find(br_vlan_group(br), entry->vid);
+       if (!v) {
+               NL_SET_ERR_MSG_MOD(extack, "Vlan is not configured");
+               goto out;
+       }
+       if (br_multicast_ctx_vlan_global_disabled(&v->br_mcast_ctx)) {
+               NL_SET_ERR_MSG_MOD(extack, "Vlan's multicast processing is disabled");
+               goto out;
+       }
+       brmctx = &v->br_mcast_ctx;
+out:
+       return brmctx;
+}
+
 static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
                            struct br_mdb_entry *entry,
                            struct nlattr **mdb_attrs,
                            struct netlink_ext_ack *extack)
 {
        struct net_bridge_mdb_entry *mp, *star_mp;
-       struct net_bridge_port_group *p;
        struct net_bridge_port_group __rcu **pp;
+       struct net_bridge_port_group *p;
+       struct net_bridge_mcast *brmctx;
        struct br_ip group, star_group;
        unsigned long now = jiffies;
        unsigned char flags = 0;
@@ -1020,6 +1104,10 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
 
        __mdb_entry_to_br_ip(entry, &group, mdb_attrs);
 
+       brmctx = __br_mdb_choose_context(br, entry, extack);
+       if (!brmctx)
+               return -EINVAL;
+
        /* host join errors which can happen before creating the group */
        if (!port) {
                /* don't allow any flags for host-joined groups */
@@ -1053,7 +1141,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
                        return -EEXIST;
                }
 
-               br_multicast_host_join(mp, false);
+               br_multicast_host_join(brmctx, mp, false);
                br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB);
 
                return 0;
@@ -1084,14 +1172,15 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
        }
        rcu_assign_pointer(*pp, p);
        if (entry->state == MDB_TEMPORARY)
-               mod_timer(&p->timer, now + br->multicast_membership_interval);
+               mod_timer(&p->timer,
+                         now + brmctx->multicast_membership_interval);
        br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
        /* if we are adding a new EXCLUDE port group (*,G) it needs to be also
         * added to all S,G entries for proper replication, if we are adding
         * a new INCLUDE port (S,G) then all of *,G EXCLUDE ports need to be
         * added to it for proper replication
         */
-       if (br_multicast_should_handle_mode(br, group.proto)) {
+       if (br_multicast_should_handle_mode(brmctx, group.proto)) {
                switch (filter_mode) {
                case MCAST_EXCLUDE:
                        br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE);
index d0434dc..9231617 100644 (file)
@@ -49,30 +49,30 @@ static const struct rhashtable_params br_sg_port_rht_params = {
        .automatic_shrinking = true,
 };
 
-static void br_multicast_start_querier(struct net_bridge *br,
+static void br_multicast_start_querier(struct net_bridge_mcast *brmctx,
                                       struct bridge_mcast_own_query *query);
-static void br_ip4_multicast_add_router(struct net_bridge *br,
-                                       struct net_bridge_port *port);
-static void br_ip4_multicast_leave_group(struct net_bridge *br,
-                                        struct net_bridge_port *port,
+static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx,
+                                       struct net_bridge_mcast_port *pmctx);
+static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx,
+                                        struct net_bridge_mcast_port *pmctx,
                                         __be32 group,
                                         __u16 vid,
                                         const unsigned char *src);
 static void br_multicast_port_group_rexmit(struct timer_list *t);
 
 static void
-br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted);
-static void br_ip6_multicast_add_router(struct net_bridge *br,
-                                       struct net_bridge_port *port);
+br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted);
+static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx,
+                                       struct net_bridge_mcast_port *pmctx);
 #if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_leave_group(struct net_bridge *br,
-                                        struct net_bridge_port *port,
+static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx,
+                                        struct net_bridge_mcast_port *pmctx,
                                         const struct in6_addr *group,
                                         __u16 vid, const unsigned char *src);
 #endif
 static struct net_bridge_port_group *
-__br_multicast_add_group(struct net_bridge *br,
-                        struct net_bridge_port *port,
+__br_multicast_add_group(struct net_bridge_mcast *brmctx,
+                        struct net_bridge_mcast_port *pmctx,
                         struct br_ip *group,
                         const unsigned char *src,
                         u8 filter_mode,
@@ -80,6 +80,7 @@ __br_multicast_add_group(struct net_bridge *br,
                         bool blocked);
 static void br_multicast_find_del_pg(struct net_bridge *br,
                                     struct net_bridge_port_group *pg);
+static void __br_multicast_stop(struct net_bridge_mcast *brmctx);
 
 static struct net_bridge_port_group *
 br_sg_port_find(struct net_bridge *br,
@@ -140,12 +141,14 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br,
 }
 #endif
 
-struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
                                        struct sk_buff *skb, u16 vid)
 {
+       struct net_bridge *br = brmctx->br;
        struct br_ip ip;
 
-       if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+       if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
+           br_multicast_ctx_vlan_global_disabled(brmctx))
                return NULL;
 
        if (BR_INPUT_SKB_CB(skb)->igmp)
@@ -158,7 +161,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
        switch (skb->protocol) {
        case htons(ETH_P_IP):
                ip.dst.ip4 = ip_hdr(skb)->daddr;
-               if (br->multicast_igmp_version == 3) {
+               if (brmctx->multicast_igmp_version == 3) {
                        struct net_bridge_mdb_entry *mdb;
 
                        ip.src.ip4 = ip_hdr(skb)->saddr;
@@ -171,7 +174,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 #if IS_ENABLED(CONFIG_IPV6)
        case htons(ETH_P_IPV6):
                ip.dst.ip6 = ipv6_hdr(skb)->daddr;
-               if (br->multicast_mld_version == 2) {
+               if (brmctx->multicast_mld_version == 2) {
                        struct net_bridge_mdb_entry *mdb;
 
                        ip.src.ip6 = ipv6_hdr(skb)->saddr;
@@ -190,6 +193,62 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
        return br_mdb_ip_get_rcu(br, &ip);
 }
 
+/* IMPORTANT: this function must be used only when the contexts cannot be
+ * passed down (e.g. timer) and must be used for read-only purposes because
+ * the vlan snooping option can change, so it can return any context
+ * (non-vlan or vlan). Its initial intended purpose is to read timer values
+ * from the *current* context based on the option. At worst that could lead
+ * to inconsistent timers when the contexts are changed, i.e. src timer
+ * which needs to re-arm with a specific delay taken from the old context
+ */
+static struct net_bridge_mcast_port *
+br_multicast_pg_to_port_ctx(const struct net_bridge_port_group *pg)
+{
+       struct net_bridge_mcast_port *pmctx = &pg->key.port->multicast_ctx;
+       struct net_bridge_vlan *vlan;
+
+       lockdep_assert_held_once(&pg->key.port->br->multicast_lock);
+
+       /* if vlan snooping is disabled use the port's multicast context */
+       if (!pg->key.addr.vid ||
+           !br_opt_get(pg->key.port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+               goto out;
+
+       /* locking is tricky here, due to different rules for multicast and
+        * vlans we need to take rcu to find the vlan and make sure it has
+        * the BR_VLFLAG_MCAST_ENABLED flag set, it can only change under
+        * multicast_lock which must be already held here, so the vlan's pmctx
+        * can safely be used on return
+        */
+       rcu_read_lock();
+       vlan = br_vlan_find(nbp_vlan_group_rcu(pg->key.port), pg->key.addr.vid);
+       if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
+               pmctx = &vlan->port_mcast_ctx;
+       else
+               pmctx = NULL;
+       rcu_read_unlock();
+out:
+       return pmctx;
+}
+
+/* when snooping we need to check if the contexts should be used
+ * in the following order:
+ * - if pmctx is non-NULL (port), check if it should be used
+ * - if pmctx is NULL (bridge), check if brmctx should be used
+ */
+static bool
+br_multicast_ctx_should_use(const struct net_bridge_mcast *brmctx,
+                           const struct net_bridge_mcast_port *pmctx)
+{
+       if (!netif_running(brmctx->br->dev))
+               return false;
+
+       if (pmctx)
+               return !br_multicast_port_ctx_state_disabled(pmctx);
+       else
+               return !br_multicast_ctx_vlan_disabled(brmctx);
+}
+
 static bool br_port_group_equal(struct net_bridge_port_group *p,
                                struct net_bridge_port *port,
                                const unsigned char *src)
@@ -203,20 +262,23 @@ static bool br_port_group_equal(struct net_bridge_port_group *p,
        return ether_addr_equal(src, p->eth_addr);
 }
 
-static void __fwd_add_star_excl(struct net_bridge_port_group *pg,
+static void __fwd_add_star_excl(struct net_bridge_mcast_port *pmctx,
+                               struct net_bridge_port_group *pg,
                                struct br_ip *sg_ip)
 {
        struct net_bridge_port_group_sg_key sg_key;
-       struct net_bridge *br = pg->key.port->br;
        struct net_bridge_port_group *src_pg;
+       struct net_bridge_mcast *brmctx;
 
        memset(&sg_key, 0, sizeof(sg_key));
+       brmctx = br_multicast_port_ctx_get_global(pmctx);
        sg_key.port = pg->key.port;
        sg_key.addr = *sg_ip;
-       if (br_sg_port_find(br, &sg_key))
+       if (br_sg_port_find(brmctx->br, &sg_key))
                return;
 
-       src_pg = __br_multicast_add_group(br, pg->key.port, sg_ip, pg->eth_addr,
+       src_pg = __br_multicast_add_group(brmctx, pmctx,
+                                         sg_ip, pg->eth_addr,
                                          MCAST_INCLUDE, false, false);
        if (IS_ERR_OR_NULL(src_pg) ||
            src_pg->rt_protocol != RTPROT_KERNEL)
@@ -256,6 +318,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
 {
        struct net_bridge *br = pg->key.port->br;
        struct net_bridge_port_group *pg_lst;
+       struct net_bridge_mcast_port *pmctx;
        struct net_bridge_mdb_entry *mp;
        struct br_ip sg_ip;
 
@@ -265,9 +328,13 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
        mp = br_mdb_ip_get(br, &pg->key.addr);
        if (!mp)
                return;
+       pmctx = br_multicast_pg_to_port_ctx(pg);
+       if (!pmctx)
+               return;
 
        memset(&sg_ip, 0, sizeof(sg_ip));
        sg_ip = pg->key.addr;
+
        for (pg_lst = mlock_dereference(mp->ports, br);
             pg_lst;
             pg_lst = mlock_dereference(pg_lst->next, br)) {
@@ -284,7 +351,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
                                __fwd_del_star_excl(pg, &sg_ip);
                                break;
                        case MCAST_EXCLUDE:
-                               __fwd_add_star_excl(pg, &sg_ip);
+                               __fwd_add_star_excl(pmctx, pg, &sg_ip);
                                break;
                        }
                }
@@ -377,7 +444,9 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
 {
        struct net_bridge_port_group_sg_key sg_key;
        struct net_bridge *br = star_mp->br;
+       struct net_bridge_mcast_port *pmctx;
        struct net_bridge_port_group *pg;
+       struct net_bridge_mcast *brmctx;
 
        if (WARN_ON(br_multicast_is_star_g(&sg->key.addr)))
                return;
@@ -400,7 +469,12 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
                if (br_sg_port_find(br, &sg_key))
                        continue;
 
-               src_pg = __br_multicast_add_group(br, pg->key.port,
+               pmctx = br_multicast_pg_to_port_ctx(pg);
+               if (!pmctx)
+                       continue;
+               brmctx = br_multicast_port_ctx_get_global(pmctx);
+
+               src_pg = __br_multicast_add_group(brmctx, pmctx,
                                                  &sg->key.addr,
                                                  sg->eth_addr,
                                                  MCAST_INCLUDE, false, false);
@@ -414,16 +488,23 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
 static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
 {
        struct net_bridge_mdb_entry *star_mp;
+       struct net_bridge_mcast_port *pmctx;
        struct net_bridge_port_group *sg;
+       struct net_bridge_mcast *brmctx;
        struct br_ip sg_ip;
 
        if (src->flags & BR_SGRP_F_INSTALLED)
                return;
 
        memset(&sg_ip, 0, sizeof(sg_ip));
+       pmctx = br_multicast_pg_to_port_ctx(src->pg);
+       if (!pmctx)
+               return;
+       brmctx = br_multicast_port_ctx_get_global(pmctx);
        sg_ip = src->pg->key.addr;
        sg_ip.src = src->addr.src;
-       sg = __br_multicast_add_group(src->br, src->pg->key.port, &sg_ip,
+
+       sg = __br_multicast_add_group(brmctx, pmctx, &sg_ip,
                                      src->pg->eth_addr, MCAST_INCLUDE, false,
                                      !timer_pending(&src->timer));
        if (IS_ERR_OR_NULL(sg))
@@ -692,7 +773,28 @@ static void br_multicast_gc(struct hlist_head *head)
        }
 }
 
-static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
+static void __br_multicast_query_handle_vlan(struct net_bridge_mcast *brmctx,
+                                            struct net_bridge_mcast_port *pmctx,
+                                            struct sk_buff *skb)
+{
+       struct net_bridge_vlan *vlan = NULL;
+
+       if (pmctx && br_multicast_port_ctx_is_vlan(pmctx))
+               vlan = pmctx->vlan;
+       else if (br_multicast_ctx_is_vlan(brmctx))
+               vlan = brmctx->vlan;
+
+       if (vlan && !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED)) {
+               u16 vlan_proto;
+
+               if (br_vlan_get_proto(brmctx->br->dev, &vlan_proto) != 0)
+                       return;
+               __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan->vid);
+       }
+}
+
+static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+                                                   struct net_bridge_mcast_port *pmctx,
                                                    struct net_bridge_port_group *pg,
                                                    __be32 ip_dst, __be32 group,
                                                    bool with_srcs, bool over_lmqt,
@@ -714,11 +816,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
        u16 lmqt_srcs = 0;
 
        igmp_hdr_size = sizeof(*ih);
-       if (br->multicast_igmp_version == 3) {
+       if (brmctx->multicast_igmp_version == 3) {
                igmp_hdr_size = sizeof(*ihv3);
                if (pg && with_srcs) {
-                       lmqt = now + (br->multicast_last_member_interval *
-                                     br->multicast_last_member_count);
+                       lmqt = now + (brmctx->multicast_last_member_interval *
+                                     brmctx->multicast_last_member_count);
                        hlist_for_each_entry(ent, &pg->src_list, node) {
                                if (over_lmqt == time_after(ent->timer.expires,
                                                            lmqt) &&
@@ -734,19 +836,20 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
 
        pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size;
        if ((p && pkt_size > p->dev->mtu) ||
-           pkt_size > br->dev->mtu)
+           pkt_size > brmctx->br->dev->mtu)
                return NULL;
 
-       skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
+       skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size);
        if (!skb)
                goto out;
 
+       __br_multicast_query_handle_vlan(brmctx, pmctx, skb);
        skb->protocol = htons(ETH_P_IP);
 
        skb_reset_mac_header(skb);
        eth = eth_hdr(skb);
 
-       ether_addr_copy(eth->h_source, br->dev->dev_addr);
+       ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr);
        ip_eth_mc_map(ip_dst, eth->h_dest);
        eth->h_proto = htons(ETH_P_IP);
        skb_put(skb, sizeof(*eth));
@@ -762,8 +865,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
        iph->frag_off = htons(IP_DF);
        iph->ttl = 1;
        iph->protocol = IPPROTO_IGMP;
-       iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
-                    inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0;
+       iph->saddr = br_opt_get(brmctx->br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
+                    inet_select_addr(brmctx->br->dev, 0, RT_SCOPE_LINK) : 0;
        iph->daddr = ip_dst;
        ((u8 *)&iph[1])[0] = IPOPT_RA;
        ((u8 *)&iph[1])[1] = 4;
@@ -775,12 +878,12 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
        skb_set_transport_header(skb, skb->len);
        *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY;
 
-       switch (br->multicast_igmp_version) {
+       switch (brmctx->multicast_igmp_version) {
        case 2:
                ih = igmp_hdr(skb);
                ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
-               ih->code = (group ? br->multicast_last_member_interval :
-                                   br->multicast_query_response_interval) /
+               ih->code = (group ? brmctx->multicast_last_member_interval :
+                                   brmctx->multicast_query_response_interval) /
                           (HZ / IGMP_TIMER_SCALE);
                ih->group = group;
                ih->csum = 0;
@@ -790,11 +893,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
        case 3:
                ihv3 = igmpv3_query_hdr(skb);
                ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY;
-               ihv3->code = (group ? br->multicast_last_member_interval :
-                                     br->multicast_query_response_interval) /
+               ihv3->code = (group ? brmctx->multicast_last_member_interval :
+                                     brmctx->multicast_query_response_interval) /
                             (HZ / IGMP_TIMER_SCALE);
                ihv3->group = group;
-               ihv3->qqic = br->multicast_query_interval / HZ;
+               ihv3->qqic = brmctx->multicast_query_interval / HZ;
                ihv3->nsrcs = htons(lmqt_srcs);
                ihv3->resv = 0;
                ihv3->suppress = sflag;
@@ -837,7 +940,8 @@ out:
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
+static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+                                                   struct net_bridge_mcast_port *pmctx,
                                                    struct net_bridge_port_group *pg,
                                                    const struct in6_addr *ip6_dst,
                                                    const struct in6_addr *group,
@@ -862,11 +966,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
        u8 *hopopt;
 
        mld_hdr_size = sizeof(*mldq);
-       if (br->multicast_mld_version == 2) {
+       if (brmctx->multicast_mld_version == 2) {
                mld_hdr_size = sizeof(*mld2q);
                if (pg && with_srcs) {
-                       llqt = now + (br->multicast_last_member_interval *
-                                     br->multicast_last_member_count);
+                       llqt = now + (brmctx->multicast_last_member_interval *
+                                     brmctx->multicast_last_member_count);
                        hlist_for_each_entry(ent, &pg->src_list, node) {
                                if (over_llqt == time_after(ent->timer.expires,
                                                            llqt) &&
@@ -882,20 +986,21 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 
        pkt_size = sizeof(*eth) + sizeof(*ip6h) + 8 + mld_hdr_size;
        if ((p && pkt_size > p->dev->mtu) ||
-           pkt_size > br->dev->mtu)
+           pkt_size > brmctx->br->dev->mtu)
                return NULL;
 
-       skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
+       skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size);
        if (!skb)
                goto out;
 
+       __br_multicast_query_handle_vlan(brmctx, pmctx, skb);
        skb->protocol = htons(ETH_P_IPV6);
 
        /* Ethernet header */
        skb_reset_mac_header(skb);
        eth = eth_hdr(skb);
 
-       ether_addr_copy(eth->h_source, br->dev->dev_addr);
+       ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr);
        eth->h_proto = htons(ETH_P_IPV6);
        skb_put(skb, sizeof(*eth));
 
@@ -908,14 +1013,14 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
        ip6h->nexthdr = IPPROTO_HOPOPTS;
        ip6h->hop_limit = 1;
        ip6h->daddr = *ip6_dst;
-       if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
-                              &ip6h->saddr)) {
+       if (ipv6_dev_get_saddr(dev_net(brmctx->br->dev), brmctx->br->dev,
+                              &ip6h->daddr, 0, &ip6h->saddr)) {
                kfree_skb(skb);
-               br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, false);
+               br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, false);
                return NULL;
        }
 
-       br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
+       br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, true);
        ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
        hopopt = (u8 *)(ip6h + 1);
@@ -933,10 +1038,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
        /* ICMPv6 */
        skb_set_transport_header(skb, skb->len);
        interval = ipv6_addr_any(group) ?
-                       br->multicast_query_response_interval :
-                       br->multicast_last_member_interval;
+                       brmctx->multicast_query_response_interval :
+                       brmctx->multicast_last_member_interval;
        *igmp_type = ICMPV6_MGM_QUERY;
-       switch (br->multicast_mld_version) {
+       switch (brmctx->multicast_mld_version) {
        case 1:
                mldq = (struct mld_msg *)icmp6_hdr(skb);
                mldq->mld_type = ICMPV6_MGM_QUERY;
@@ -959,7 +1064,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
                mld2q->mld2q_suppress = sflag;
                mld2q->mld2q_qrv = 2;
                mld2q->mld2q_nsrcs = htons(llqt_srcs);
-               mld2q->mld2q_qqic = br->multicast_query_interval / HZ;
+               mld2q->mld2q_qqic = brmctx->multicast_query_interval / HZ;
                mld2q->mld2q_mca = *group;
                csum = &mld2q->mld2q_cksum;
                csum_start = (void *)mld2q;
@@ -1000,7 +1105,8 @@ out:
 }
 #endif
 
-static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
+static struct sk_buff *br_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+                                               struct net_bridge_mcast_port *pmctx,
                                                struct net_bridge_port_group *pg,
                                                struct br_ip *ip_dst,
                                                struct br_ip *group,
@@ -1013,7 +1119,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
        switch (group->proto) {
        case htons(ETH_P_IP):
                ip4_dst = ip_dst ? ip_dst->dst.ip4 : htonl(INADDR_ALLHOSTS_GROUP);
-               return br_ip4_multicast_alloc_query(br, pg,
+               return br_ip4_multicast_alloc_query(brmctx, pmctx, pg,
                                                    ip4_dst, group->dst.ip4,
                                                    with_srcs, over_lmqt,
                                                    sflag, igmp_type,
@@ -1028,7 +1134,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
                        ipv6_addr_set(&ip6_dst, htonl(0xff020000), 0, 0,
                                      htonl(1));
 
-               return br_ip6_multicast_alloc_query(br, pg,
+               return br_ip6_multicast_alloc_query(brmctx, pmctx, pg,
                                                    &ip6_dst, &group->dst.ip6,
                                                    with_srcs, over_lmqt,
                                                    sflag, igmp_type,
@@ -1206,7 +1312,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
        return p;
 }
 
-void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
+void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
+                           struct net_bridge_mdb_entry *mp, bool notify)
 {
        if (!mp->host_joined) {
                mp->host_joined = true;
@@ -1219,7 +1326,7 @@ void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
        if (br_group_is_l2(&mp->addr))
                return;
 
-       mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval);
+       mod_timer(&mp->timer, jiffies + brmctx->multicast_membership_interval);
 }
 
 void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
@@ -1235,8 +1342,8 @@ void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
 }
 
 static struct net_bridge_port_group *
-__br_multicast_add_group(struct net_bridge *br,
-                        struct net_bridge_port *port,
+__br_multicast_add_group(struct net_bridge_mcast *brmctx,
+                        struct net_bridge_mcast_port *pmctx,
                         struct br_ip *group,
                         const unsigned char *src,
                         u8 filter_mode,
@@ -1248,29 +1355,28 @@ __br_multicast_add_group(struct net_bridge *br,
        struct net_bridge_mdb_entry *mp;
        unsigned long now = jiffies;
 
-       if (!netif_running(br->dev) ||
-           (port && port->state == BR_STATE_DISABLED))
+       if (!br_multicast_ctx_should_use(brmctx, pmctx))
                goto out;
 
-       mp = br_multicast_new_group(br, group);
+       mp = br_multicast_new_group(brmctx->br, group);
        if (IS_ERR(mp))
                return ERR_CAST(mp);
 
-       if (!port) {
-               br_multicast_host_join(mp, true);
+       if (!pmctx) {
+               br_multicast_host_join(brmctx, mp, true);
                goto out;
        }
 
        for (pp = &mp->ports;
-            (p = mlock_dereference(*pp, br)) != NULL;
+            (p = mlock_dereference(*pp, brmctx->br)) != NULL;
             pp = &p->next) {
-               if (br_port_group_equal(p, port, src))
+               if (br_port_group_equal(p, pmctx->port, src))
                        goto found;
-               if ((unsigned long)p->key.port < (unsigned long)port)
+               if ((unsigned long)p->key.port < (unsigned long)pmctx->port)
                        break;
        }
 
-       p = br_multicast_new_port_group(port, group, *pp, 0, src,
+       p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src,
                                        filter_mode, RTPROT_KERNEL);
        if (unlikely(!p)) {
                p = ERR_PTR(-ENOMEM);
@@ -1279,18 +1385,19 @@ __br_multicast_add_group(struct net_bridge *br,
        rcu_assign_pointer(*pp, p);
        if (blocked)
                p->flags |= MDB_PG_FLAGS_BLOCKED;
-       br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
+       br_mdb_notify(brmctx->br->dev, mp, p, RTM_NEWMDB);
 
 found:
        if (igmpv2_mldv1)
-               mod_timer(&p->timer, now + br->multicast_membership_interval);
+               mod_timer(&p->timer,
+                         now + brmctx->multicast_membership_interval);
 
 out:
        return p;
 }
 
-static int br_multicast_add_group(struct net_bridge *br,
-                                 struct net_bridge_port *port,
+static int br_multicast_add_group(struct net_bridge_mcast *brmctx,
+                                 struct net_bridge_mcast_port *pmctx,
                                  struct br_ip *group,
                                  const unsigned char *src,
                                  u8 filter_mode,
@@ -1299,18 +1406,18 @@ static int br_multicast_add_group(struct net_bridge *br,
        struct net_bridge_port_group *pg;
        int err;
 
-       spin_lock(&br->multicast_lock);
-       pg = __br_multicast_add_group(br, port, group, src, filter_mode,
+       spin_lock(&brmctx->br->multicast_lock);
+       pg = __br_multicast_add_group(brmctx, pmctx, group, src, filter_mode,
                                      igmpv2_mldv1, false);
        /* NULL is considered valid for host joined groups */
        err = PTR_ERR_OR_ZERO(pg);
-       spin_unlock(&br->multicast_lock);
+       spin_unlock(&brmctx->br->multicast_lock);
 
        return err;
 }
 
-static int br_ip4_multicast_add_group(struct net_bridge *br,
-                                     struct net_bridge_port *port,
+static int br_ip4_multicast_add_group(struct net_bridge_mcast *brmctx,
+                                     struct net_bridge_mcast_port *pmctx,
                                      __be32 group,
                                      __u16 vid,
                                      const unsigned char *src,
@@ -1328,13 +1435,13 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
        br_group.vid = vid;
        filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE;
 
-       return br_multicast_add_group(br, port, &br_group, src, filter_mode,
-                                     igmpv2);
+       return br_multicast_add_group(brmctx, pmctx, &br_group, src,
+                                     filter_mode, igmpv2);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_add_group(struct net_bridge *br,
-                                     struct net_bridge_port *port,
+static int br_ip6_multicast_add_group(struct net_bridge_mcast *brmctx,
+                                     struct net_bridge_mcast_port *pmctx,
                                      const struct in6_addr *group,
                                      __u16 vid,
                                      const unsigned char *src,
@@ -1352,8 +1459,8 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
        br_group.vid = vid;
        filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE;
 
-       return br_multicast_add_group(br, port, &br_group, src, filter_mode,
-                                     mldv1);
+       return br_multicast_add_group(brmctx, pmctx, &br_group, src,
+                                     filter_mode, mldv1);
 }
 #endif
 
@@ -1366,52 +1473,54 @@ static bool br_multicast_rport_del(struct hlist_node *rlist)
        return true;
 }
 
-static bool br_ip4_multicast_rport_del(struct net_bridge_port *p)
+static bool br_ip4_multicast_rport_del(struct net_bridge_mcast_port *pmctx)
 {
-       return br_multicast_rport_del(&p->ip4_rlist);
+       return br_multicast_rport_del(&pmctx->ip4_rlist);
 }
 
-static bool br_ip6_multicast_rport_del(struct net_bridge_port *p)
+static bool br_ip6_multicast_rport_del(struct net_bridge_mcast_port *pmctx)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-       return br_multicast_rport_del(&p->ip6_rlist);
+       return br_multicast_rport_del(&pmctx->ip6_rlist);
 #else
        return false;
 #endif
 }
 
-static void br_multicast_router_expired(struct net_bridge_port *port,
+static void br_multicast_router_expired(struct net_bridge_mcast_port *pmctx,
                                        struct timer_list *t,
                                        struct hlist_node *rlist)
 {
-       struct net_bridge *br = port->br;
+       struct net_bridge *br = pmctx->port->br;
        bool del;
 
        spin_lock(&br->multicast_lock);
-       if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
-           port->multicast_router == MDB_RTR_TYPE_PERM ||
+       if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+           pmctx->multicast_router == MDB_RTR_TYPE_PERM ||
            timer_pending(t))
                goto out;
 
        del = br_multicast_rport_del(rlist);
-       br_multicast_rport_del_notify(port, del);
+       br_multicast_rport_del_notify(pmctx, del);
 out:
        spin_unlock(&br->multicast_lock);
 }
 
 static void br_ip4_multicast_router_expired(struct timer_list *t)
 {
-       struct net_bridge_port *port = from_timer(port, t, ip4_mc_router_timer);
+       struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+                                                        ip4_mc_router_timer);
 
-       br_multicast_router_expired(port, t, &port->ip4_rlist);
+       br_multicast_router_expired(pmctx, t, &pmctx->ip4_rlist);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_router_expired(struct timer_list *t)
 {
-       struct net_bridge_port *port = from_timer(port, t, ip6_mc_router_timer);
+       struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+                                                        ip6_mc_router_timer);
 
-       br_multicast_router_expired(port, t, &port->ip6_rlist);
+       br_multicast_router_expired(pmctx, t, &pmctx->ip6_rlist);
 }
 #endif
 
@@ -1428,80 +1537,86 @@ static void br_mc_router_state_change(struct net_bridge *p,
        switchdev_port_attr_set(p->dev, &attr, NULL);
 }
 
-static void br_multicast_local_router_expired(struct net_bridge *br,
+static void br_multicast_local_router_expired(struct net_bridge_mcast *brmctx,
                                              struct timer_list *timer)
 {
-       spin_lock(&br->multicast_lock);
-       if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
-           br->multicast_router == MDB_RTR_TYPE_PERM ||
-           br_ip4_multicast_is_router(br) ||
-           br_ip6_multicast_is_router(br))
+       spin_lock(&brmctx->br->multicast_lock);
+       if (brmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+           brmctx->multicast_router == MDB_RTR_TYPE_PERM ||
+           br_ip4_multicast_is_router(brmctx) ||
+           br_ip6_multicast_is_router(brmctx))
                goto out;
 
-       br_mc_router_state_change(br, false);
+       br_mc_router_state_change(brmctx->br, false);
 out:
-       spin_unlock(&br->multicast_lock);
+       spin_unlock(&brmctx->br->multicast_lock);
 }
 
 static void br_ip4_multicast_local_router_expired(struct timer_list *t)
 {
-       struct net_bridge *br = from_timer(br, t, ip4_mc_router_timer);
+       struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+                                                    ip4_mc_router_timer);
 
-       br_multicast_local_router_expired(br, t);
+       br_multicast_local_router_expired(brmctx, t);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_local_router_expired(struct timer_list *t)
 {
-       struct net_bridge *br = from_timer(br, t, ip6_mc_router_timer);
+       struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+                                                    ip6_mc_router_timer);
 
-       br_multicast_local_router_expired(br, t);
+       br_multicast_local_router_expired(brmctx, t);
 }
 #endif
 
-static void br_multicast_querier_expired(struct net_bridge *br,
+static void br_multicast_querier_expired(struct net_bridge_mcast *brmctx,
                                         struct bridge_mcast_own_query *query)
 {
-       spin_lock(&br->multicast_lock);
-       if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+       spin_lock(&brmctx->br->multicast_lock);
+       if (!netif_running(brmctx->br->dev) ||
+           br_multicast_ctx_vlan_global_disabled(brmctx) ||
+           !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
                goto out;
 
-       br_multicast_start_querier(br, query);
+       br_multicast_start_querier(brmctx, query);
 
 out:
-       spin_unlock(&br->multicast_lock);
+       spin_unlock(&brmctx->br->multicast_lock);
 }
 
 static void br_ip4_multicast_querier_expired(struct timer_list *t)
 {
-       struct net_bridge *br = from_timer(br, t, ip4_other_query.timer);
+       struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+                                                    ip4_other_query.timer);
 
-       br_multicast_querier_expired(br, &br->ip4_own_query);
+       br_multicast_querier_expired(brmctx, &brmctx->ip4_own_query);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_querier_expired(struct timer_list *t)
 {
-       struct net_bridge *br = from_timer(br, t, ip6_other_query.timer);
+       struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+                                                    ip6_other_query.timer);
 
-       br_multicast_querier_expired(br, &br->ip6_own_query);
+       br_multicast_querier_expired(brmctx, &brmctx->ip6_own_query);
 }
 #endif
 
-static void br_multicast_select_own_querier(struct net_bridge *br,
+static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx,
                                            struct br_ip *ip,
                                            struct sk_buff *skb)
 {
        if (ip->proto == htons(ETH_P_IP))
-               br->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr;
+               brmctx->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr;
 #if IS_ENABLED(CONFIG_IPV6)
        else
-               br->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr;
+               brmctx->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr;
 #endif
 }
 
-static void __br_multicast_send_query(struct net_bridge *br,
-                                     struct net_bridge_port *port,
+static void __br_multicast_send_query(struct net_bridge_mcast *brmctx,
+                                     struct net_bridge_mcast_port *pmctx,
                                      struct net_bridge_port_group *pg,
                                      struct br_ip *ip_dst,
                                      struct br_ip *group,
@@ -1513,19 +1628,23 @@ static void __br_multicast_send_query(struct net_bridge *br,
        struct sk_buff *skb;
        u8 igmp_type;
 
+       if (!br_multicast_ctx_should_use(brmctx, pmctx) ||
+           !br_multicast_ctx_matches_vlan_snooping(brmctx))
+               return;
+
 again_under_lmqt:
-       skb = br_multicast_alloc_query(br, pg, ip_dst, group, with_srcs,
-                                      over_lmqt, sflag, &igmp_type,
+       skb = br_multicast_alloc_query(brmctx, pmctx, pg, ip_dst, group,
+                                      with_srcs, over_lmqt, sflag, &igmp_type,
                                       need_rexmit);
        if (!skb)
                return;
 
-       if (port) {
-               skb->dev = port->dev;
-               br_multicast_count(brport, skb, igmp_type,
+       if (pmctx) {
+               skb->dev = pmctx->port->dev;
+               br_multicast_count(brmctx->br, pmctx->port, skb, igmp_type,
                                   BR_MCAST_DIR_TX);
                NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
-                       dev_net(port->dev), NULL, skb, NULL, skb->dev,
+                       dev_net(pmctx->port->dev), NULL, skb, NULL, skb->dev,
                        br_dev_queue_push_xmit);
 
                if (over_lmqt && with_srcs && sflag) {
@@ -1533,35 +1652,64 @@ again_under_lmqt:
                        goto again_under_lmqt;
                }
        } else {
-               br_multicast_select_own_querier(br, group, skb);
-               br_multicast_count(br, port, skb, igmp_type,
+               br_multicast_select_own_querier(brmctx, group, skb);
+               br_multicast_count(brmctx->br, NULL, skb, igmp_type,
                                   BR_MCAST_DIR_RX);
                netif_rx(skb);
        }
 }
 
-static void br_multicast_send_query(struct net_bridge *br,
-                                   struct net_bridge_port *port,
+static void br_multicast_read_querier(const struct bridge_mcast_querier *querier,
+                                     struct bridge_mcast_querier *dest)
+{
+       unsigned int seq;
+
+       memset(dest, 0, sizeof(*dest));
+       do {
+               seq = read_seqcount_begin(&querier->seq);
+               dest->port_ifidx = querier->port_ifidx;
+               memcpy(&dest->addr, &querier->addr, sizeof(struct br_ip));
+       } while (read_seqcount_retry(&querier->seq, seq));
+}
+
+static void br_multicast_update_querier(struct net_bridge_mcast *brmctx,
+                                       struct bridge_mcast_querier *querier,
+                                       int ifindex,
+                                       struct br_ip *saddr)
+{
+       lockdep_assert_held_once(&brmctx->br->multicast_lock);
+
+       write_seqcount_begin(&querier->seq);
+       querier->port_ifidx = ifindex;
+       memcpy(&querier->addr, saddr, sizeof(*saddr));
+       write_seqcount_end(&querier->seq);
+}
+
+static void br_multicast_send_query(struct net_bridge_mcast *brmctx,
+                                   struct net_bridge_mcast_port *pmctx,
                                    struct bridge_mcast_own_query *own_query)
 {
        struct bridge_mcast_other_query *other_query = NULL;
+       struct bridge_mcast_querier *querier;
        struct br_ip br_group;
        unsigned long time;
 
-       if (!netif_running(br->dev) ||
-           !br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
-           !br_opt_get(br, BROPT_MULTICAST_QUERIER))
+       if (!br_multicast_ctx_should_use(brmctx, pmctx) ||
+           !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) ||
+           !brmctx->multicast_querier)
                return;
 
        memset(&br_group.dst, 0, sizeof(br_group.dst));
 
-       if (port ? (own_query == &port->ip4_own_query) :
-                  (own_query == &br->ip4_own_query)) {
-               other_query = &br->ip4_other_query;
+       if (pmctx ? (own_query == &pmctx->ip4_own_query) :
+                   (own_query == &brmctx->ip4_own_query)) {
+               querier = &brmctx->ip4_querier;
+               other_query = &brmctx->ip4_other_query;
                br_group.proto = htons(ETH_P_IP);
 #if IS_ENABLED(CONFIG_IPV6)
        } else {
-               other_query = &br->ip6_other_query;
+               querier = &brmctx->ip6_querier;
+               other_query = &brmctx->ip6_other_query;
                br_group.proto = htons(ETH_P_IPV6);
 #endif
        }
@@ -1569,31 +1717,39 @@ static void br_multicast_send_query(struct net_bridge *br,
        if (!other_query || timer_pending(&other_query->timer))
                return;
 
-       __br_multicast_send_query(br, port, NULL, NULL, &br_group, false, 0,
-                                 NULL);
+       /* we're about to select ourselves as querier */
+       if (!pmctx && querier->port_ifidx) {
+               struct br_ip zeroip = {};
+
+               br_multicast_update_querier(brmctx, querier, 0, &zeroip);
+       }
+
+       __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &br_group, false,
+                                 0, NULL);
 
        time = jiffies;
-       time += own_query->startup_sent < br->multicast_startup_query_count ?
-               br->multicast_startup_query_interval :
-               br->multicast_query_interval;
+       time += own_query->startup_sent < brmctx->multicast_startup_query_count ?
+               brmctx->multicast_startup_query_interval :
+               brmctx->multicast_query_interval;
        mod_timer(&own_query->timer, time);
 }
 
 static void
-br_multicast_port_query_expired(struct net_bridge_port *port,
+br_multicast_port_query_expired(struct net_bridge_mcast_port *pmctx,
                                struct bridge_mcast_own_query *query)
 {
-       struct net_bridge *br = port->br;
+       struct net_bridge *br = pmctx->port->br;
+       struct net_bridge_mcast *brmctx;
 
        spin_lock(&br->multicast_lock);
-       if (port->state == BR_STATE_DISABLED ||
-           port->state == BR_STATE_BLOCKING)
+       if (br_multicast_port_ctx_state_stopped(pmctx))
                goto out;
 
-       if (query->startup_sent < br->multicast_startup_query_count)
+       brmctx = br_multicast_port_ctx_get_global(pmctx);
+       if (query->startup_sent < brmctx->multicast_startup_query_count)
                query->startup_sent++;
 
-       br_multicast_send_query(port->br, port, query);
+       br_multicast_send_query(brmctx, pmctx, query);
 
 out:
        spin_unlock(&br->multicast_lock);
@@ -1601,17 +1757,19 @@ out:
 
 static void br_ip4_multicast_port_query_expired(struct timer_list *t)
 {
-       struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer);
+       struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+                                                        ip4_own_query.timer);
 
-       br_multicast_port_query_expired(port, &port->ip4_own_query);
+       br_multicast_port_query_expired(pmctx, &pmctx->ip4_own_query);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_port_query_expired(struct timer_list *t)
 {
-       struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer);
+       struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+                                                        ip6_own_query.timer);
 
-       br_multicast_port_query_expired(port, &port->ip6_own_query);
+       br_multicast_port_query_expired(pmctx, &pmctx->ip6_own_query);
 }
 #endif
 
@@ -1620,19 +1778,27 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
        struct net_bridge_port_group *pg = from_timer(pg, t, rexmit_timer);
        struct bridge_mcast_other_query *other_query = NULL;
        struct net_bridge *br = pg->key.port->br;
+       struct net_bridge_mcast_port *pmctx;
+       struct net_bridge_mcast *brmctx;
        bool need_rexmit = false;
 
        spin_lock(&br->multicast_lock);
        if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) ||
-           !br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
-           !br_opt_get(br, BROPT_MULTICAST_QUERIER))
+           !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+               goto out;
+
+       pmctx = br_multicast_pg_to_port_ctx(pg);
+       if (!pmctx)
+               goto out;
+       brmctx = br_multicast_port_ctx_get_global(pmctx);
+       if (!brmctx->multicast_querier)
                goto out;
 
        if (pg->key.addr.proto == htons(ETH_P_IP))
-               other_query = &br->ip4_other_query;
+               other_query = &brmctx->ip4_other_query;
 #if IS_ENABLED(CONFIG_IPV6)
        else
-               other_query = &br->ip6_other_query;
+               other_query = &brmctx->ip6_other_query;
 #endif
 
        if (!other_query || timer_pending(&other_query->timer))
@@ -1640,15 +1806,15 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
 
        if (pg->grp_query_rexmit_cnt) {
                pg->grp_query_rexmit_cnt--;
-               __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+               __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
                                          &pg->key.addr, false, 1, NULL);
        }
-       __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+       __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
                                  &pg->key.addr, true, 0, &need_rexmit);
 
        if (pg->grp_query_rexmit_cnt || need_rexmit)
                mod_timer(&pg->rexmit_timer, jiffies +
-                                            br->multicast_last_member_interval);
+                                            brmctx->multicast_last_member_interval);
 out:
        spin_unlock(&br->multicast_lock);
 }
@@ -1666,23 +1832,40 @@ static int br_mc_disabled_update(struct net_device *dev, bool value,
        return switchdev_port_attr_set(dev, &attr, extack);
 }
 
-int br_multicast_add_port(struct net_bridge_port *port)
+void br_multicast_port_ctx_init(struct net_bridge_port *port,
+                               struct net_bridge_vlan *vlan,
+                               struct net_bridge_mcast_port *pmctx)
 {
-       int err;
-
-       port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
-       port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
-
-       timer_setup(&port->ip4_mc_router_timer,
+       pmctx->port = port;
+       pmctx->vlan = vlan;
+       pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+       timer_setup(&pmctx->ip4_mc_router_timer,
                    br_ip4_multicast_router_expired, 0);
-       timer_setup(&port->ip4_own_query.timer,
+       timer_setup(&pmctx->ip4_own_query.timer,
                    br_ip4_multicast_port_query_expired, 0);
 #if IS_ENABLED(CONFIG_IPV6)
-       timer_setup(&port->ip6_mc_router_timer,
+       timer_setup(&pmctx->ip6_mc_router_timer,
                    br_ip6_multicast_router_expired, 0);
-       timer_setup(&port->ip6_own_query.timer,
+       timer_setup(&pmctx->ip6_own_query.timer,
                    br_ip6_multicast_port_query_expired, 0);
 #endif
+}
+
+void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       del_timer_sync(&pmctx->ip6_mc_router_timer);
+#endif
+       del_timer_sync(&pmctx->ip4_mc_router_timer);
+}
+
+int br_multicast_add_port(struct net_bridge_port *port)
+{
+       int err;
+
+       port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
+       br_multicast_port_ctx_init(port, NULL, &port->multicast_ctx);
+
        err = br_mc_disabled_update(port->dev,
                                    br_opt_get(port->br,
                                               BROPT_MULTICAST_ENABLED),
@@ -1711,10 +1894,7 @@ void br_multicast_del_port(struct net_bridge_port *port)
        hlist_move_list(&br->mcast_gc_list, &deleted_head);
        spin_unlock_bh(&br->multicast_lock);
        br_multicast_gc(&deleted_head);
-       del_timer_sync(&port->ip4_mc_router_timer);
-#if IS_ENABLED(CONFIG_IPV6)
-       del_timer_sync(&port->ip6_mc_router_timer);
-#endif
+       br_multicast_port_ctx_deinit(&port->multicast_ctx);
        free_percpu(port->mcast_stats);
 }
 
@@ -1727,20 +1907,23 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query)
                mod_timer(&query->timer, jiffies);
 }
 
-static void __br_multicast_enable_port(struct net_bridge_port *port)
+static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
 {
-       struct net_bridge *br = port->br;
+       struct net_bridge *br = pmctx->port->br;
+       struct net_bridge_mcast *brmctx;
 
-       if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || !netif_running(br->dev))
+       brmctx = br_multicast_port_ctx_get_global(pmctx);
+       if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
+           !netif_running(br->dev))
                return;
 
-       br_multicast_enable(&port->ip4_own_query);
+       br_multicast_enable(&pmctx->ip4_own_query);
 #if IS_ENABLED(CONFIG_IPV6)
-       br_multicast_enable(&port->ip6_own_query);
+       br_multicast_enable(&pmctx->ip6_own_query);
 #endif
-       if (port->multicast_router == MDB_RTR_TYPE_PERM) {
-               br_ip4_multicast_add_router(br, port);
-               br_ip6_multicast_add_router(br, port);
+       if (pmctx->multicast_router == MDB_RTR_TYPE_PERM) {
+               br_ip4_multicast_add_router(brmctx, pmctx);
+               br_ip6_multicast_add_router(brmctx, pmctx);
        }
 }
 
@@ -1748,33 +1931,39 @@ void br_multicast_enable_port(struct net_bridge_port *port)
 {
        struct net_bridge *br = port->br;
 
-       spin_lock(&br->multicast_lock);
-       __br_multicast_enable_port(port);
-       spin_unlock(&br->multicast_lock);
+       spin_lock_bh(&br->multicast_lock);
+       __br_multicast_enable_port_ctx(&port->multicast_ctx);
+       spin_unlock_bh(&br->multicast_lock);
 }
 
-void br_multicast_disable_port(struct net_bridge_port *port)
+static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx)
 {
-       struct net_bridge *br = port->br;
        struct net_bridge_port_group *pg;
        struct hlist_node *n;
        bool del = false;
 
-       spin_lock(&br->multicast_lock);
-       hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
-               if (!(pg->flags & MDB_PG_FLAGS_PERMANENT))
-                       br_multicast_find_del_pg(br, pg);
+       hlist_for_each_entry_safe(pg, n, &pmctx->port->mglist, mglist)
+               if (!(pg->flags & MDB_PG_FLAGS_PERMANENT) &&
+                   (!br_multicast_port_ctx_is_vlan(pmctx) ||
+                    pg->key.addr.vid == pmctx->vlan->vid))
+                       br_multicast_find_del_pg(pmctx->port->br, pg);
 
-       del |= br_ip4_multicast_rport_del(port);
-       del_timer(&port->ip4_mc_router_timer);
-       del_timer(&port->ip4_own_query.timer);
-       del |= br_ip6_multicast_rport_del(port);
+       del |= br_ip4_multicast_rport_del(pmctx);
+       del_timer(&pmctx->ip4_mc_router_timer);
+       del_timer(&pmctx->ip4_own_query.timer);
+       del |= br_ip6_multicast_rport_del(pmctx);
 #if IS_ENABLED(CONFIG_IPV6)
-       del_timer(&port->ip6_mc_router_timer);
-       del_timer(&port->ip6_own_query.timer);
+       del_timer(&pmctx->ip6_mc_router_timer);
+       del_timer(&pmctx->ip6_own_query.timer);
 #endif
-       br_multicast_rport_del_notify(port, del);
-       spin_unlock(&br->multicast_lock);
+       br_multicast_rport_del_notify(pmctx, del);
+}
+
+void br_multicast_disable_port(struct net_bridge_port *port)
+{
+       spin_lock_bh(&port->br->multicast_lock);
+       __br_multicast_disable_port_ctx(&port->multicast_ctx);
+       spin_unlock_bh(&port->br->multicast_lock);
 }
 
 static int __grp_src_delete_marked(struct net_bridge_port_group *pg)
@@ -1799,31 +1988,32 @@ static void __grp_src_mod_timer(struct net_bridge_group_src *src,
        br_multicast_fwd_src_handle(src);
 }
 
-static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
+static void __grp_src_query_marked_and_rexmit(struct net_bridge_mcast *brmctx,
+                                             struct net_bridge_mcast_port *pmctx,
+                                             struct net_bridge_port_group *pg)
 {
        struct bridge_mcast_other_query *other_query = NULL;
-       struct net_bridge *br = pg->key.port->br;
-       u32 lmqc = br->multicast_last_member_count;
+       u32 lmqc = brmctx->multicast_last_member_count;
        unsigned long lmqt, lmi, now = jiffies;
        struct net_bridge_group_src *ent;
 
-       if (!netif_running(br->dev) ||
-           !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+       if (!netif_running(brmctx->br->dev) ||
+           !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
                return;
 
        if (pg->key.addr.proto == htons(ETH_P_IP))
-               other_query = &br->ip4_other_query;
+               other_query = &brmctx->ip4_other_query;
 #if IS_ENABLED(CONFIG_IPV6)
        else
-               other_query = &br->ip6_other_query;
+               other_query = &brmctx->ip6_other_query;
 #endif
 
-       lmqt = now + br_multicast_lmqt(br);
+       lmqt = now + br_multicast_lmqt(brmctx);
        hlist_for_each_entry(ent, &pg->src_list, node) {
                if (ent->flags & BR_SGRP_F_SEND) {
                        ent->flags &= ~BR_SGRP_F_SEND;
                        if (ent->timer.expires > lmqt) {
-                               if (br_opt_get(br, BROPT_MULTICAST_QUERIER) &&
+                               if (brmctx->multicast_querier &&
                                    other_query &&
                                    !timer_pending(&other_query->timer))
                                        ent->src_query_rexmit_cnt = lmqc;
@@ -1832,41 +2022,42 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
                }
        }
 
-       if (!br_opt_get(br, BROPT_MULTICAST_QUERIER) ||
+       if (!brmctx->multicast_querier ||
            !other_query || timer_pending(&other_query->timer))
                return;
 
-       __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+       __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
                                  &pg->key.addr, true, 1, NULL);
 
-       lmi = now + br->multicast_last_member_interval;
+       lmi = now + brmctx->multicast_last_member_interval;
        if (!timer_pending(&pg->rexmit_timer) ||
            time_after(pg->rexmit_timer.expires, lmi))
                mod_timer(&pg->rexmit_timer, lmi);
 }
 
-static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
+static void __grp_send_query_and_rexmit(struct net_bridge_mcast *brmctx,
+                                       struct net_bridge_mcast_port *pmctx,
+                                       struct net_bridge_port_group *pg)
 {
        struct bridge_mcast_other_query *other_query = NULL;
-       struct net_bridge *br = pg->key.port->br;
        unsigned long now = jiffies, lmi;
 
-       if (!netif_running(br->dev) ||
-           !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+       if (!netif_running(brmctx->br->dev) ||
+           !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
                return;
 
        if (pg->key.addr.proto == htons(ETH_P_IP))
-               other_query = &br->ip4_other_query;
+               other_query = &brmctx->ip4_other_query;
 #if IS_ENABLED(CONFIG_IPV6)
        else
-               other_query = &br->ip6_other_query;
+               other_query = &brmctx->ip6_other_query;
 #endif
 
-       if (br_opt_get(br, BROPT_MULTICAST_QUERIER) &&
+       if (brmctx->multicast_querier &&
            other_query && !timer_pending(&other_query->timer)) {
-               lmi = now + br->multicast_last_member_interval;
-               pg->grp_query_rexmit_cnt = br->multicast_last_member_count - 1;
-               __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+               lmi = now + brmctx->multicast_last_member_interval;
+               pg->grp_query_rexmit_cnt = brmctx->multicast_last_member_count - 1;
+               __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
                                          &pg->key.addr, false, 0, NULL);
                if (!timer_pending(&pg->rexmit_timer) ||
                    time_after(pg->rexmit_timer.expires, lmi))
@@ -1875,8 +2066,8 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
 
        if (pg->filter_mode == MCAST_EXCLUDE &&
            (!timer_pending(&pg->timer) ||
-            time_after(pg->timer.expires, now + br_multicast_lmqt(br))))
-               mod_timer(&pg->timer, now + br_multicast_lmqt(br));
+            time_after(pg->timer.expires, now + br_multicast_lmqt(brmctx))))
+               mod_timer(&pg->timer, now + br_multicast_lmqt(brmctx));
 }
 
 /* State          Msg type      New state                Actions
@@ -1884,11 +2075,11 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
  * INCLUDE (A)    ALLOW (B)     INCLUDE (A+B)            (B)=GMI
  * EXCLUDE (X,Y)  ALLOW (A)     EXCLUDE (X+A,Y-A)        (A)=GMI
  */
-static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_isinc_allow(const struct net_bridge_mcast *brmctx,
+                                    struct net_bridge_port_group *pg, void *h_addr,
                                     void *srcs, u32 nsrcs, size_t addr_size,
                                     int grec_type)
 {
-       struct net_bridge *br = pg->key.port->br;
        struct net_bridge_group_src *ent;
        unsigned long now = jiffies;
        bool changed = false;
@@ -1907,10 +2098,11 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a
                }
 
                if (ent)
-                       __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+                       __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
        }
 
-       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+       if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+                                   grec_type))
                changed = true;
 
        return changed;
@@ -1921,7 +2113,8 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a
  *                                                       Delete (A-B)
  *                                                       Group Timer=GMI
  */
-static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
+static void __grp_src_isexc_incl(const struct net_bridge_mcast *brmctx,
+                                struct net_bridge_port_group *pg, void *h_addr,
                                 void *srcs, u32 nsrcs, size_t addr_size,
                                 int grec_type)
 {
@@ -1945,7 +2138,8 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
                        br_multicast_fwd_src_handle(ent);
        }
 
-       br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+       br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+                               grec_type);
 
        __grp_src_delete_marked(pg);
 }
@@ -1956,11 +2150,11 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
  *                                                       Delete (Y-A)
  *                                                       Group Timer=GMI
  */
-static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_isexc_excl(const struct net_bridge_mcast *brmctx,
+                                struct net_bridge_port_group *pg, void *h_addr,
                                 void *srcs, u32 nsrcs, size_t addr_size,
                                 int grec_type)
 {
-       struct net_bridge *br = pg->key.port->br;
        struct net_bridge_group_src *ent;
        unsigned long now = jiffies;
        bool changed = false;
@@ -1981,13 +2175,14 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
                        ent = br_multicast_new_group_src(pg, &src_ip);
                        if (ent) {
                                __grp_src_mod_timer(ent,
-                                                   now + br_multicast_gmi(br));
+                                                   now + br_multicast_gmi(brmctx));
                                changed = true;
                        }
                }
        }
 
-       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+       if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+                                   grec_type))
                changed = true;
 
        if (__grp_src_delete_marked(pg))
@@ -1996,28 +2191,28 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
        return changed;
 }
 
-static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_isexc(const struct net_bridge_mcast *brmctx,
+                              struct net_bridge_port_group *pg, void *h_addr,
                               void *srcs, u32 nsrcs, size_t addr_size,
                               int grec_type)
 {
-       struct net_bridge *br = pg->key.port->br;
        bool changed = false;
 
        switch (pg->filter_mode) {
        case MCAST_INCLUDE:
-               __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size,
+               __grp_src_isexc_incl(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
                                     grec_type);
                br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
                changed = true;
                break;
        case MCAST_EXCLUDE:
-               changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size,
-                                              grec_type);
+               changed = __grp_src_isexc_excl(brmctx, pg, h_addr, srcs, nsrcs,
+                                              addr_size, grec_type);
                break;
        }
 
        pg->filter_mode = MCAST_EXCLUDE;
-       mod_timer(&pg->timer, jiffies + br_multicast_gmi(br));
+       mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx));
 
        return changed;
 }
@@ -2026,11 +2221,12 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr,
  * INCLUDE (A)    TO_IN (B)     INCLUDE (A+B)            (B)=GMI
  *                                                       Send Q(G,A-B)
  */
-static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toin_incl(struct net_bridge_mcast *brmctx,
+                               struct net_bridge_mcast_port *pmctx,
+                               struct net_bridge_port_group *pg, void *h_addr,
                                void *srcs, u32 nsrcs, size_t addr_size,
                                int grec_type)
 {
-       struct net_bridge *br = pg->key.port->br;
        u32 src_idx, to_send = pg->src_ents;
        struct net_bridge_group_src *ent;
        unsigned long now = jiffies;
@@ -2054,14 +2250,15 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
                                changed = true;
                }
                if (ent)
-                       __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+                       __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
        }
 
-       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+       if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+                                   grec_type))
                changed = true;
 
        if (to_send)
-               __grp_src_query_marked_and_rexmit(pg);
+               __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
 
        return changed;
 }
@@ -2071,11 +2268,12 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
  *                                                       Send Q(G,X-A)
  *                                                       Send Q(G)
  */
-static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toin_excl(struct net_bridge_mcast *brmctx,
+                               struct net_bridge_mcast_port *pmctx,
+                               struct net_bridge_port_group *pg, void *h_addr,
                                void *srcs, u32 nsrcs, size_t addr_size,
                                int grec_type)
 {
-       struct net_bridge *br = pg->key.port->br;
        u32 src_idx, to_send = pg->src_ents;
        struct net_bridge_group_src *ent;
        unsigned long now = jiffies;
@@ -2102,21 +2300,24 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr,
                                changed = true;
                }
                if (ent)
-                       __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+                       __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
        }
 
-       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+       if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+                                   grec_type))
                changed = true;
 
        if (to_send)
-               __grp_src_query_marked_and_rexmit(pg);
+               __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
 
-       __grp_send_query_and_rexmit(pg);
+       __grp_send_query_and_rexmit(brmctx, pmctx, pg);
 
        return changed;
 }
 
-static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_toin(struct net_bridge_mcast *brmctx,
+                             struct net_bridge_mcast_port *pmctx,
+                             struct net_bridge_port_group *pg, void *h_addr,
                              void *srcs, u32 nsrcs, size_t addr_size,
                              int grec_type)
 {
@@ -2124,12 +2325,12 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
 
        switch (pg->filter_mode) {
        case MCAST_INCLUDE:
-               changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size,
-                                             grec_type);
+               changed = __grp_src_toin_incl(brmctx, pmctx, pg, h_addr, srcs,
+                                             nsrcs, addr_size, grec_type);
                break;
        case MCAST_EXCLUDE:
-               changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size,
-                                             grec_type);
+               changed = __grp_src_toin_excl(brmctx, pmctx, pg, h_addr, srcs,
+                                             nsrcs, addr_size, grec_type);
                break;
        }
 
@@ -2151,7 +2352,9 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
  *                                                       Send Q(G,A*B)
  *                                                       Group Timer=GMI
  */
-static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
+static void __grp_src_toex_incl(struct net_bridge_mcast *brmctx,
+                               struct net_bridge_mcast_port *pmctx,
+                               struct net_bridge_port_group *pg, void *h_addr,
                                void *srcs, u32 nsrcs, size_t addr_size,
                                int grec_type)
 {
@@ -2178,11 +2381,12 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
                        br_multicast_fwd_src_handle(ent);
        }
 
-       br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+       br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+                               grec_type);
 
        __grp_src_delete_marked(pg);
        if (to_send)
-               __grp_src_query_marked_and_rexmit(pg);
+               __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
 }
 
 /* State          Msg type      New state                Actions
@@ -2192,7 +2396,9 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
  *                                                       Send Q(G,A-Y)
  *                                                       Group Timer=GMI
  */
-static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toex_excl(struct net_bridge_mcast *brmctx,
+                               struct net_bridge_mcast_port *pmctx,
+                               struct net_bridge_port_group *pg, void *h_addr,
                                void *srcs, u32 nsrcs, size_t addr_size,
                                int grec_type)
 {
@@ -2224,39 +2430,41 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr,
                }
        }
 
-       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+       if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+                                   grec_type))
                changed = true;
 
        if (__grp_src_delete_marked(pg))
                changed = true;
        if (to_send)
-               __grp_src_query_marked_and_rexmit(pg);
+               __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
 
        return changed;
 }
 
-static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_toex(struct net_bridge_mcast *brmctx,
+                             struct net_bridge_mcast_port *pmctx,
+                             struct net_bridge_port_group *pg, void *h_addr,
                              void *srcs, u32 nsrcs, size_t addr_size,
                              int grec_type)
 {
-       struct net_bridge *br = pg->key.port->br;
        bool changed = false;
 
        switch (pg->filter_mode) {
        case MCAST_INCLUDE:
-               __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size,
-                                   grec_type);
+               __grp_src_toex_incl(brmctx, pmctx, pg, h_addr, srcs, nsrcs,
+                                   addr_size, grec_type);
                br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
                changed = true;
                break;
        case MCAST_EXCLUDE:
-               changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size,
-                                             grec_type);
+               changed = __grp_src_toex_excl(brmctx, pmctx, pg, h_addr, srcs,
+                                             nsrcs, addr_size, grec_type);
                break;
        }
 
        pg->filter_mode = MCAST_EXCLUDE;
-       mod_timer(&pg->timer, jiffies + br_multicast_gmi(br));
+       mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx));
 
        return changed;
 }
@@ -2264,7 +2472,9 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr,
 /* State          Msg type      New state                Actions
  * INCLUDE (A)    BLOCK (B)     INCLUDE (A)              Send Q(G,A*B)
  */
-static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_block_incl(struct net_bridge_mcast *brmctx,
+                                struct net_bridge_mcast_port *pmctx,
+                                struct net_bridge_port_group *pg, void *h_addr,
                                 void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
 {
        struct net_bridge_group_src *ent;
@@ -2286,11 +2496,12 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
                }
        }
 
-       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+       if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+                                   grec_type))
                changed = true;
 
        if (to_send)
-               __grp_src_query_marked_and_rexmit(pg);
+               __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
 
        return changed;
 }
@@ -2299,7 +2510,9 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
  * EXCLUDE (X,Y)  BLOCK (A)     EXCLUDE (X+(A-Y),Y)      (A-X-Y)=Group Timer
  *                                                       Send Q(G,A-Y)
  */
-static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_block_excl(struct net_bridge_mcast *brmctx,
+                                struct net_bridge_mcast_port *pmctx,
+                                struct net_bridge_port_group *pg, void *h_addr,
                                 void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
 {
        struct net_bridge_group_src *ent;
@@ -2328,28 +2541,31 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr,
                }
        }
 
-       if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+       if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+                                   grec_type))
                changed = true;
 
        if (to_send)
-               __grp_src_query_marked_and_rexmit(pg);
+               __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
 
        return changed;
 }
 
-static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_block(struct net_bridge_mcast *brmctx,
+                              struct net_bridge_mcast_port *pmctx,
+                              struct net_bridge_port_group *pg, void *h_addr,
                               void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
 {
        bool changed = false;
 
        switch (pg->filter_mode) {
        case MCAST_INCLUDE:
-               changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size,
-                                              grec_type);
+               changed = __grp_src_block_incl(brmctx, pmctx, pg, h_addr, srcs,
+                                              nsrcs, addr_size, grec_type);
                break;
        case MCAST_EXCLUDE:
-               changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size,
-                                              grec_type);
+               changed = __grp_src_block_excl(brmctx, pmctx, pg, h_addr, srcs,
+                                              nsrcs, addr_size, grec_type);
                break;
        }
 
@@ -2384,12 +2600,12 @@ br_multicast_find_port(struct net_bridge_mdb_entry *mp,
        return NULL;
 }
 
-static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
-                                        struct net_bridge_port *port,
+static int br_ip4_multicast_igmp3_report(struct net_bridge_mcast *brmctx,
+                                        struct net_bridge_mcast_port *pmctx,
                                         struct sk_buff *skb,
                                         u16 vid)
 {
-       bool igmpv2 = br->multicast_igmp_version == 2;
+       bool igmpv2 = brmctx->multicast_igmp_version == 2;
        struct net_bridge_mdb_entry *mdst;
        struct net_bridge_port_group *pg;
        const unsigned char *src;
@@ -2436,25 +2652,29 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
                if (nsrcs == 0 &&
                    (type == IGMPV3_CHANGE_TO_INCLUDE ||
                     type == IGMPV3_MODE_IS_INCLUDE)) {
-                       if (!port || igmpv2) {
-                               br_ip4_multicast_leave_group(br, port, group, vid, src);
+                       if (!pmctx || igmpv2) {
+                               br_ip4_multicast_leave_group(brmctx, pmctx,
+                                                            group, vid, src);
                                continue;
                        }
                } else {
-                       err = br_ip4_multicast_add_group(br, port, group, vid,
-                                                        src, igmpv2);
+                       err = br_ip4_multicast_add_group(brmctx, pmctx, group,
+                                                        vid, src, igmpv2);
                        if (err)
                                break;
                }
 
-               if (!port || igmpv2)
+               if (!pmctx || igmpv2)
                        continue;
 
-               spin_lock_bh(&br->multicast_lock);
-               mdst = br_mdb_ip4_get(br, group, vid);
+               spin_lock_bh(&brmctx->br->multicast_lock);
+               if (!br_multicast_ctx_should_use(brmctx, pmctx))
+                       goto unlock_continue;
+
+               mdst = br_mdb_ip4_get(brmctx->br, group, vid);
                if (!mdst)
                        goto unlock_continue;
-               pg = br_multicast_find_port(mdst, port, src);
+               pg = br_multicast_find_port(mdst, pmctx->port, src);
                if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
                        goto unlock_continue;
                /* reload grec and host addr */
@@ -2462,51 +2682,57 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
                h_addr = &ip_hdr(skb)->saddr;
                switch (type) {
                case IGMPV3_ALLOW_NEW_SOURCES:
-                       changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+                       changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
+                                                          grec->grec_src,
                                                           nsrcs, sizeof(__be32), type);
                        break;
                case IGMPV3_MODE_IS_INCLUDE:
-                       changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+                       changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
+                                                          grec->grec_src,
                                                           nsrcs, sizeof(__be32), type);
                        break;
                case IGMPV3_MODE_IS_EXCLUDE:
-                       changed = br_multicast_isexc(pg, h_addr, grec->grec_src,
+                       changed = br_multicast_isexc(brmctx, pg, h_addr,
+                                                    grec->grec_src,
                                                     nsrcs, sizeof(__be32), type);
                        break;
                case IGMPV3_CHANGE_TO_INCLUDE:
-                       changed = br_multicast_toin(pg, h_addr, grec->grec_src,
+                       changed = br_multicast_toin(brmctx, pmctx, pg, h_addr,
+                                                   grec->grec_src,
                                                    nsrcs, sizeof(__be32), type);
                        break;
                case IGMPV3_CHANGE_TO_EXCLUDE:
-                       changed = br_multicast_toex(pg, h_addr, grec->grec_src,
+                       changed = br_multicast_toex(brmctx, pmctx, pg, h_addr,
+                                                   grec->grec_src,
                                                    nsrcs, sizeof(__be32), type);
                        break;
                case IGMPV3_BLOCK_OLD_SOURCES:
-                       changed = br_multicast_block(pg, h_addr, grec->grec_src,
+                       changed = br_multicast_block(brmctx, pmctx, pg, h_addr,
+                                                    grec->grec_src,
                                                     nsrcs, sizeof(__be32), type);
                        break;
                }
                if (changed)
-                       br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB);
+                       br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB);
 unlock_continue:
-               spin_unlock_bh(&br->multicast_lock);
+               spin_unlock_bh(&brmctx->br->multicast_lock);
        }
 
        return err;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_mld2_report(struct net_bridge *br,
-                                       struct net_bridge_port *port,
+static int br_ip6_multicast_mld2_report(struct net_bridge_mcast *brmctx,
+                                       struct net_bridge_mcast_port *pmctx,
                                        struct sk_buff *skb,
                                        u16 vid)
 {
-       bool mldv1 = br->multicast_mld_version == 1;
+       bool mldv1 = brmctx->multicast_mld_version == 1;
        struct net_bridge_mdb_entry *mdst;
        struct net_bridge_port_group *pg;
        unsigned int nsrcs_offset;
+       struct mld2_report *mld2r;
        const unsigned char *src;
-       struct icmp6hdr *icmp6h;
        struct in6_addr *h_addr;
        struct mld2_grec *grec;
        unsigned int grec_len;
@@ -2514,12 +2740,12 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
        int i, len, num;
        int err = 0;
 
-       if (!ipv6_mc_may_pull(skb, sizeof(*icmp6h)))
+       if (!ipv6_mc_may_pull(skb, sizeof(*mld2r)))
                return -EINVAL;
 
-       icmp6h = icmp6_hdr(skb);
-       num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
-       len = skb_transport_offset(skb) + sizeof(*icmp6h);
+       mld2r = (struct mld2_report *)icmp6_hdr(skb);
+       num = ntohs(mld2r->mld2r_ngrec);
+       len = skb_transport_offset(skb) + sizeof(*mld2r);
 
        for (i = 0; i < num; i++) {
                __be16 *_nsrcs, __nsrcs;
@@ -2562,137 +2788,243 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
                if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE ||
                     grec->grec_type == MLD2_MODE_IS_INCLUDE) &&
                    nsrcs == 0) {
-                       if (!port || mldv1) {
-                               br_ip6_multicast_leave_group(br, port,
+                       if (!pmctx || mldv1) {
+                               br_ip6_multicast_leave_group(brmctx, pmctx,
                                                             &grec->grec_mca,
                                                             vid, src);
                                continue;
                        }
                } else {
-                       err = br_ip6_multicast_add_group(br, port,
+                       err = br_ip6_multicast_add_group(brmctx, pmctx,
                                                         &grec->grec_mca, vid,
                                                         src, mldv1);
                        if (err)
                                break;
                }
 
-               if (!port || mldv1)
+               if (!pmctx || mldv1)
                        continue;
 
-               spin_lock_bh(&br->multicast_lock);
-               mdst = br_mdb_ip6_get(br, &grec->grec_mca, vid);
+               spin_lock_bh(&brmctx->br->multicast_lock);
+               if (!br_multicast_ctx_should_use(brmctx, pmctx))
+                       goto unlock_continue;
+
+               mdst = br_mdb_ip6_get(brmctx->br, &grec->grec_mca, vid);
                if (!mdst)
                        goto unlock_continue;
-               pg = br_multicast_find_port(mdst, port, src);
+               pg = br_multicast_find_port(mdst, pmctx->port, src);
                if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
                        goto unlock_continue;
                h_addr = &ipv6_hdr(skb)->saddr;
                switch (grec->grec_type) {
                case MLD2_ALLOW_NEW_SOURCES:
-                       changed = br_multicast_isinc_allow(pg, h_addr,
+                       changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
                                                           grec->grec_src, nsrcs,
                                                           sizeof(struct in6_addr),
                                                           grec->grec_type);
                        break;
                case MLD2_MODE_IS_INCLUDE:
-                       changed = br_multicast_isinc_allow(pg, h_addr,
+                       changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
                                                           grec->grec_src, nsrcs,
                                                           sizeof(struct in6_addr),
                                                           grec->grec_type);
                        break;
                case MLD2_MODE_IS_EXCLUDE:
-                       changed = br_multicast_isexc(pg, h_addr,
+                       changed = br_multicast_isexc(brmctx, pg, h_addr,
                                                     grec->grec_src, nsrcs,
                                                     sizeof(struct in6_addr),
                                                     grec->grec_type);
                        break;
                case MLD2_CHANGE_TO_INCLUDE:
-                       changed = br_multicast_toin(pg, h_addr,
+                       changed = br_multicast_toin(brmctx, pmctx, pg, h_addr,
                                                    grec->grec_src, nsrcs,
                                                    sizeof(struct in6_addr),
                                                    grec->grec_type);
                        break;
                case MLD2_CHANGE_TO_EXCLUDE:
-                       changed = br_multicast_toex(pg, h_addr,
+                       changed = br_multicast_toex(brmctx, pmctx, pg, h_addr,
                                                    grec->grec_src, nsrcs,
                                                    sizeof(struct in6_addr),
                                                    grec->grec_type);
                        break;
                case MLD2_BLOCK_OLD_SOURCES:
-                       changed = br_multicast_block(pg, h_addr,
+                       changed = br_multicast_block(brmctx, pmctx, pg, h_addr,
                                                     grec->grec_src, nsrcs,
                                                     sizeof(struct in6_addr),
                                                     grec->grec_type);
                        break;
                }
                if (changed)
-                       br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB);
+                       br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB);
 unlock_continue:
-               spin_unlock_bh(&br->multicast_lock);
+               spin_unlock_bh(&brmctx->br->multicast_lock);
        }
 
        return err;
 }
 #endif
 
-static bool br_ip4_multicast_select_querier(struct net_bridge *br,
-                                           struct net_bridge_port *port,
-                                           __be32 saddr)
+static bool br_multicast_select_querier(struct net_bridge_mcast *brmctx,
+                                       struct net_bridge_mcast_port *pmctx,
+                                       struct br_ip *saddr)
 {
-       if (!timer_pending(&br->ip4_own_query.timer) &&
-           !timer_pending(&br->ip4_other_query.timer))
-               goto update;
+       int port_ifidx = pmctx ? pmctx->port->dev->ifindex : 0;
+       struct timer_list *own_timer, *other_timer;
+       struct bridge_mcast_querier *querier;
 
-       if (!br->ip4_querier.addr.src.ip4)
-               goto update;
+       switch (saddr->proto) {
+       case htons(ETH_P_IP):
+               querier = &brmctx->ip4_querier;
+               own_timer = &brmctx->ip4_own_query.timer;
+               other_timer = &brmctx->ip4_other_query.timer;
+               if (!querier->addr.src.ip4 ||
+                   ntohl(saddr->src.ip4) <= ntohl(querier->addr.src.ip4))
+                       goto update;
+               break;
+#if IS_ENABLED(CONFIG_IPV6)
+       case htons(ETH_P_IPV6):
+               querier = &brmctx->ip6_querier;
+               own_timer = &brmctx->ip6_own_query.timer;
+               other_timer = &brmctx->ip6_other_query.timer;
+               if (ipv6_addr_cmp(&saddr->src.ip6, &querier->addr.src.ip6) <= 0)
+                       goto update;
+               break;
+#endif
+       default:
+               return false;
+       }
 
-       if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.src.ip4))
+       if (!timer_pending(own_timer) && !timer_pending(other_timer))
                goto update;
 
        return false;
 
 update:
-       br->ip4_querier.addr.src.ip4 = saddr;
-
-       /* update protected by general multicast_lock by caller */
-       rcu_assign_pointer(br->ip4_querier.port, port);
+       br_multicast_update_querier(brmctx, querier, port_ifidx, saddr);
 
        return true;
 }
 
+static struct net_bridge_port *
+__br_multicast_get_querier_port(struct net_bridge *br,
+                               const struct bridge_mcast_querier *querier)
+{
+       int port_ifidx = READ_ONCE(querier->port_ifidx);
+       struct net_bridge_port *p;
+       struct net_device *dev;
+
+       if (port_ifidx == 0)
+               return NULL;
+
+       dev = dev_get_by_index_rcu(dev_net(br->dev), port_ifidx);
+       if (!dev)
+               return NULL;
+       p = br_port_get_rtnl_rcu(dev);
+       if (!p || p->br != br)
+               return NULL;
+
+       return p;
+}
+
+size_t br_multicast_querier_state_size(void)
+{
+       return nla_total_size(0) +              /* nest attribute */
+              nla_total_size(sizeof(__be32)) + /* BRIDGE_QUERIER_IP_ADDRESS */
+              nla_total_size(sizeof(int)) +    /* BRIDGE_QUERIER_IP_PORT */
+              nla_total_size_64bit(sizeof(u64)) + /* BRIDGE_QUERIER_IP_OTHER_TIMER */
 #if IS_ENABLED(CONFIG_IPV6)
-static bool br_ip6_multicast_select_querier(struct net_bridge *br,
-                                           struct net_bridge_port *port,
-                                           struct in6_addr *saddr)
+              nla_total_size(sizeof(struct in6_addr)) + /* BRIDGE_QUERIER_IPV6_ADDRESS */
+              nla_total_size(sizeof(int)) +             /* BRIDGE_QUERIER_IPV6_PORT */
+              nla_total_size_64bit(sizeof(u64)) +       /* BRIDGE_QUERIER_IPV6_OTHER_TIMER */
+#endif
+              0;
+}
+
+/* protected by rtnl or rcu */
+int br_multicast_dump_querier_state(struct sk_buff *skb,
+                                   const struct net_bridge_mcast *brmctx,
+                                   int nest_attr)
 {
-       if (!timer_pending(&br->ip6_own_query.timer) &&
-           !timer_pending(&br->ip6_other_query.timer))
-               goto update;
+       struct bridge_mcast_querier querier = {};
+       struct net_bridge_port *p;
+       struct nlattr *nest;
 
-       if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.src.ip6) <= 0)
-               goto update;
+       if (!br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) ||
+           br_multicast_ctx_vlan_global_disabled(brmctx))
+               return 0;
 
-       return false;
+       nest = nla_nest_start(skb, nest_attr);
+       if (!nest)
+               return -EMSGSIZE;
 
-update:
-       br->ip6_querier.addr.src.ip6 = *saddr;
+       rcu_read_lock();
+       if (!brmctx->multicast_querier &&
+           !timer_pending(&brmctx->ip4_other_query.timer))
+               goto out_v6;
+
+       br_multicast_read_querier(&brmctx->ip4_querier, &querier);
+       if (nla_put_in_addr(skb, BRIDGE_QUERIER_IP_ADDRESS,
+                           querier.addr.src.ip4)) {
+               rcu_read_unlock();
+               goto out_err;
+       }
 
-       /* update protected by general multicast_lock by caller */
-       rcu_assign_pointer(br->ip6_querier.port, port);
+       p = __br_multicast_get_querier_port(brmctx->br, &querier);
+       if (timer_pending(&brmctx->ip4_other_query.timer) &&
+           (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IP_OTHER_TIMER,
+                              br_timer_value(&brmctx->ip4_other_query.timer),
+                              BRIDGE_QUERIER_PAD) ||
+            (p && nla_put_u32(skb, BRIDGE_QUERIER_IP_PORT, p->dev->ifindex)))) {
+               rcu_read_unlock();
+               goto out_err;
+       }
 
-       return true;
-}
+out_v6:
+#if IS_ENABLED(CONFIG_IPV6)
+       if (!brmctx->multicast_querier &&
+           !timer_pending(&brmctx->ip6_other_query.timer))
+               goto out;
+
+       br_multicast_read_querier(&brmctx->ip6_querier, &querier);
+       if (nla_put_in6_addr(skb, BRIDGE_QUERIER_IPV6_ADDRESS,
+                            &querier.addr.src.ip6)) {
+               rcu_read_unlock();
+               goto out_err;
+       }
+
+       p = __br_multicast_get_querier_port(brmctx->br, &querier);
+       if (timer_pending(&brmctx->ip6_other_query.timer) &&
+           (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IPV6_OTHER_TIMER,
+                              br_timer_value(&brmctx->ip6_other_query.timer),
+                              BRIDGE_QUERIER_PAD) ||
+            (p && nla_put_u32(skb, BRIDGE_QUERIER_IPV6_PORT,
+                              p->dev->ifindex)))) {
+               rcu_read_unlock();
+               goto out_err;
+       }
+out:
 #endif
+       rcu_read_unlock();
+       nla_nest_end(skb, nest);
+       if (!nla_len(nest))
+               nla_nest_cancel(skb, nest);
+
+       return 0;
+
+out_err:
+       nla_nest_cancel(skb, nest);
+       return -EMSGSIZE;
+}
 
 static void
-br_multicast_update_query_timer(struct net_bridge *br,
+br_multicast_update_query_timer(struct net_bridge_mcast *brmctx,
                                struct bridge_mcast_other_query *query,
                                unsigned long max_delay)
 {
        if (!timer_pending(&query->timer))
                query->delay_time = jiffies + max_delay;
 
-       mod_timer(&query->timer, jiffies + br->multicast_querier_interval);
+       mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval);
 }
 
 static void br_port_mc_router_state_change(struct net_bridge_port *p,
@@ -2709,19 +3041,26 @@ static void br_port_mc_router_state_change(struct net_bridge_port *p,
 }
 
 static struct net_bridge_port *
-br_multicast_rport_from_node(struct net_bridge *br,
+br_multicast_rport_from_node(struct net_bridge_mcast *brmctx,
                             struct hlist_head *mc_router_list,
                             struct hlist_node *rlist)
 {
+       struct net_bridge_mcast_port *pmctx;
+
 #if IS_ENABLED(CONFIG_IPV6)
-       if (mc_router_list == &br->ip6_mc_router_list)
-               return hlist_entry(rlist, struct net_bridge_port, ip6_rlist);
+       if (mc_router_list == &brmctx->ip6_mc_router_list)
+               pmctx = hlist_entry(rlist, struct net_bridge_mcast_port,
+                                   ip6_rlist);
+       else
 #endif
-       return hlist_entry(rlist, struct net_bridge_port, ip4_rlist);
+               pmctx = hlist_entry(rlist, struct net_bridge_mcast_port,
+                                   ip4_rlist);
+
+       return pmctx->port;
 }
 
 static struct hlist_node *
-br_multicast_get_rport_slot(struct net_bridge *br,
+br_multicast_get_rport_slot(struct net_bridge_mcast *brmctx,
                            struct net_bridge_port *port,
                            struct hlist_head *mc_router_list)
 
@@ -2731,7 +3070,7 @@ br_multicast_get_rport_slot(struct net_bridge *br,
        struct hlist_node *rlist;
 
        hlist_for_each(rlist, mc_router_list) {
-               p = br_multicast_rport_from_node(br, mc_router_list, rlist);
+               p = br_multicast_rport_from_node(brmctx, mc_router_list, rlist);
 
                if ((unsigned long)port >= (unsigned long)p)
                        break;
@@ -2742,14 +3081,14 @@ br_multicast_get_rport_slot(struct net_bridge *br,
        return slot;
 }
 
-static bool br_multicast_no_router_otherpf(struct net_bridge_port *port,
+static bool br_multicast_no_router_otherpf(struct net_bridge_mcast_port *pmctx,
                                           struct hlist_node *rnode)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-       if (rnode != &port->ip6_rlist)
-               return hlist_unhashed(&port->ip6_rlist);
+       if (rnode != &pmctx->ip6_rlist)
+               return hlist_unhashed(&pmctx->ip6_rlist);
        else
-               return hlist_unhashed(&port->ip4_rlist);
+               return hlist_unhashed(&pmctx->ip4_rlist);
 #else
        return true;
 #endif
@@ -2759,8 +3098,8 @@ static bool br_multicast_no_router_otherpf(struct net_bridge_port *port,
  *  list is maintained ordered by pointer value
  *  and locked by br->multicast_lock and RCU
  */
-static void br_multicast_add_router(struct net_bridge *br,
-                                   struct net_bridge_port *port,
+static void br_multicast_add_router(struct net_bridge_mcast *brmctx,
+                                   struct net_bridge_mcast_port *pmctx,
                                    struct hlist_node *rlist,
                                    struct hlist_head *mc_router_list)
 {
@@ -2769,7 +3108,7 @@ static void br_multicast_add_router(struct net_bridge *br,
        if (!hlist_unhashed(rlist))
                return;
 
-       slot = br_multicast_get_rport_slot(brport, mc_router_list);
+       slot = br_multicast_get_rport_slot(brmctx, pmctx->port, mc_router_list);
 
        if (slot)
                hlist_add_behind_rcu(rlist, slot);
@@ -2780,9 +3119,9 @@ static void br_multicast_add_router(struct net_bridge *br,
         * switched from no IPv4/IPv6 multicast router to a new
         * IPv4 or IPv6 multicast router.
         */
-       if (br_multicast_no_router_otherpf(port, rlist)) {
-               br_rtr_notify(br->dev, port, RTM_NEWMDB);
-               br_port_mc_router_state_change(port, true);
+       if (br_multicast_no_router_otherpf(pmctx, rlist)) {
+               br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_NEWMDB);
+               br_port_mc_router_state_change(pmctx->port, true);
        }
 }
 
@@ -2790,116 +3129,119 @@ static void br_multicast_add_router(struct net_bridge *br,
  *  list is maintained ordered by pointer value
  *  and locked by br->multicast_lock and RCU
  */
-static void br_ip4_multicast_add_router(struct net_bridge *br,
-                                       struct net_bridge_port *port)
+static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx,
+                                       struct net_bridge_mcast_port *pmctx)
 {
-       br_multicast_add_router(br, port, &port->ip4_rlist,
-                               &br->ip4_mc_router_list);
+       br_multicast_add_router(brmctx, pmctx, &pmctx->ip4_rlist,
+                               &brmctx->ip4_mc_router_list);
 }
 
 /* Add port to router_list
  *  list is maintained ordered by pointer value
  *  and locked by br->multicast_lock and RCU
  */
-static void br_ip6_multicast_add_router(struct net_bridge *br,
-                                       struct net_bridge_port *port)
+static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx,
+                                       struct net_bridge_mcast_port *pmctx)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-       br_multicast_add_router(br, port, &port->ip6_rlist,
-                               &br->ip6_mc_router_list);
+       br_multicast_add_router(brmctx, pmctx, &pmctx->ip6_rlist,
+                               &brmctx->ip6_mc_router_list);
 #endif
 }
 
-static void br_multicast_mark_router(struct net_bridge *br,
-                                    struct net_bridge_port *port,
+static void br_multicast_mark_router(struct net_bridge_mcast *brmctx,
+                                    struct net_bridge_mcast_port *pmctx,
                                     struct timer_list *timer,
                                     struct hlist_node *rlist,
                                     struct hlist_head *mc_router_list)
 {
        unsigned long now = jiffies;
 
-       if (!port) {
-               if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
-                       if (!br_ip4_multicast_is_router(br) &&
-                           !br_ip6_multicast_is_router(br))
-                               br_mc_router_state_change(br, true);
-                       mod_timer(timer, now + br->multicast_querier_interval);
+       if (!br_multicast_ctx_should_use(brmctx, pmctx))
+               return;
+
+       if (!pmctx) {
+               if (brmctx->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
+                       if (!br_ip4_multicast_is_router(brmctx) &&
+                           !br_ip6_multicast_is_router(brmctx))
+                               br_mc_router_state_change(brmctx->br, true);
+                       mod_timer(timer, now + brmctx->multicast_querier_interval);
                }
                return;
        }
 
-       if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
-           port->multicast_router == MDB_RTR_TYPE_PERM)
+       if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+           pmctx->multicast_router == MDB_RTR_TYPE_PERM)
                return;
 
-       br_multicast_add_router(br, port, rlist, mc_router_list);
-       mod_timer(timer, now + br->multicast_querier_interval);
+       br_multicast_add_router(brmctx, pmctx, rlist, mc_router_list);
+       mod_timer(timer, now + brmctx->multicast_querier_interval);
 }
 
-static void br_ip4_multicast_mark_router(struct net_bridge *br,
-                                        struct net_bridge_port *port)
+static void br_ip4_multicast_mark_router(struct net_bridge_mcast *brmctx,
+                                        struct net_bridge_mcast_port *pmctx)
 {
-       struct timer_list *timer = &br->ip4_mc_router_timer;
+       struct timer_list *timer = &brmctx->ip4_mc_router_timer;
        struct hlist_node *rlist = NULL;
 
-       if (port) {
-               timer = &port->ip4_mc_router_timer;
-               rlist = &port->ip4_rlist;
+       if (pmctx) {
+               timer = &pmctx->ip4_mc_router_timer;
+               rlist = &pmctx->ip4_rlist;
        }
 
-       br_multicast_mark_router(br, port, timer, rlist,
-                                &br->ip4_mc_router_list);
+       br_multicast_mark_router(brmctx, pmctx, timer, rlist,
+                                &brmctx->ip4_mc_router_list);
 }
 
-static void br_ip6_multicast_mark_router(struct net_bridge *br,
-                                        struct net_bridge_port *port)
+static void br_ip6_multicast_mark_router(struct net_bridge_mcast *brmctx,
+                                        struct net_bridge_mcast_port *pmctx)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-       struct timer_list *timer = &br->ip6_mc_router_timer;
+       struct timer_list *timer = &brmctx->ip6_mc_router_timer;
        struct hlist_node *rlist = NULL;
 
-       if (port) {
-               timer = &port->ip6_mc_router_timer;
-               rlist = &port->ip6_rlist;
+       if (pmctx) {
+               timer = &pmctx->ip6_mc_router_timer;
+               rlist = &pmctx->ip6_rlist;
        }
 
-       br_multicast_mark_router(br, port, timer, rlist,
-                                &br->ip6_mc_router_list);
+       br_multicast_mark_router(brmctx, pmctx, timer, rlist,
+                                &brmctx->ip6_mc_router_list);
 #endif
 }
 
 static void
-br_ip4_multicast_query_received(struct net_bridge *br,
-                               struct net_bridge_port *port,
+br_ip4_multicast_query_received(struct net_bridge_mcast *brmctx,
+                               struct net_bridge_mcast_port *pmctx,
                                struct bridge_mcast_other_query *query,
                                struct br_ip *saddr,
                                unsigned long max_delay)
 {
-       if (!br_ip4_multicast_select_querier(br, port, saddr->src.ip4))
+       if (!br_multicast_select_querier(brmctx, pmctx, saddr))
                return;
 
-       br_multicast_update_query_timer(br, query, max_delay);
-       br_ip4_multicast_mark_router(br, port);
+       br_multicast_update_query_timer(brmctx, query, max_delay);
+       br_ip4_multicast_mark_router(brmctx, pmctx);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static void
-br_ip6_multicast_query_received(struct net_bridge *br,
-                               struct net_bridge_port *port,
+br_ip6_multicast_query_received(struct net_bridge_mcast *brmctx,
+                               struct net_bridge_mcast_port *pmctx,
                                struct bridge_mcast_other_query *query,
                                struct br_ip *saddr,
                                unsigned long max_delay)
 {
-       if (!br_ip6_multicast_select_querier(br, port, &saddr->src.ip6))
+       if (!br_multicast_select_querier(brmctx, pmctx, saddr))
                return;
 
-       br_multicast_update_query_timer(br, query, max_delay);
-       br_ip6_multicast_mark_router(br, port);
+       br_multicast_update_query_timer(brmctx, query, max_delay);
+       br_ip6_multicast_mark_router(brmctx, pmctx);
 }
 #endif
 
-static void br_ip4_multicast_query(struct net_bridge *br,
-                                  struct net_bridge_port *port,
+static void br_ip4_multicast_query(struct net_bridge_mcast *brmctx,
+                                  struct net_bridge_mcast_port *pmctx,
                                   struct sk_buff *skb,
                                   u16 vid)
 {
@@ -2910,14 +3252,13 @@ static void br_ip4_multicast_query(struct net_bridge *br,
        struct igmpv3_query *ih3;
        struct net_bridge_port_group *p;
        struct net_bridge_port_group __rcu **pp;
-       struct br_ip saddr;
+       struct br_ip saddr = {};
        unsigned long max_delay;
        unsigned long now = jiffies;
        __be32 group;
 
-       spin_lock(&br->multicast_lock);
-       if (!netif_running(br->dev) ||
-           (port && port->state == BR_STATE_DISABLED))
+       spin_lock(&brmctx->br->multicast_lock);
+       if (!br_multicast_ctx_should_use(brmctx, pmctx))
                goto out;
 
        group = ih->group;
@@ -2932,7 +3273,8 @@ static void br_ip4_multicast_query(struct net_bridge *br,
        } else if (transport_len >= sizeof(*ih3)) {
                ih3 = igmpv3_query_hdr(skb);
                if (ih3->nsrcs ||
-                   (br->multicast_igmp_version == 3 && group && ih3->suppress))
+                   (brmctx->multicast_igmp_version == 3 && group &&
+                    ih3->suppress))
                        goto out;
 
                max_delay = ih3->code ?
@@ -2945,16 +3287,17 @@ static void br_ip4_multicast_query(struct net_bridge *br,
                saddr.proto = htons(ETH_P_IP);
                saddr.src.ip4 = iph->saddr;
 
-               br_ip4_multicast_query_received(br, port, &br->ip4_other_query,
+               br_ip4_multicast_query_received(brmctx, pmctx,
+                                               &brmctx->ip4_other_query,
                                                &saddr, max_delay);
                goto out;
        }
 
-       mp = br_mdb_ip4_get(br, group, vid);
+       mp = br_mdb_ip4_get(brmctx->br, group, vid);
        if (!mp)
                goto out;
 
-       max_delay *= br->multicast_last_member_count;
+       max_delay *= brmctx->multicast_last_member_count;
 
        if (mp->host_joined &&
            (timer_pending(&mp->timer) ?
@@ -2963,23 +3306,23 @@ static void br_ip4_multicast_query(struct net_bridge *br,
                mod_timer(&mp->timer, now + max_delay);
 
        for (pp = &mp->ports;
-            (p = mlock_dereference(*pp, br)) != NULL;
+            (p = mlock_dereference(*pp, brmctx->br)) != NULL;
             pp = &p->next) {
                if (timer_pending(&p->timer) ?
                    time_after(p->timer.expires, now + max_delay) :
                    try_to_del_timer_sync(&p->timer) >= 0 &&
-                   (br->multicast_igmp_version == 2 ||
+                   (brmctx->multicast_igmp_version == 2 ||
                     p->filter_mode == MCAST_EXCLUDE))
                        mod_timer(&p->timer, now + max_delay);
        }
 
 out:
-       spin_unlock(&br->multicast_lock);
+       spin_unlock(&brmctx->br->multicast_lock);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_query(struct net_bridge *br,
-                                 struct net_bridge_port *port,
+static int br_ip6_multicast_query(struct net_bridge_mcast *brmctx,
+                                 struct net_bridge_mcast_port *pmctx,
                                  struct sk_buff *skb,
                                  u16 vid)
 {
@@ -2989,7 +3332,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
        struct mld2_query *mld2q;
        struct net_bridge_port_group *p;
        struct net_bridge_port_group __rcu **pp;
-       struct br_ip saddr;
+       struct br_ip saddr = {};
        unsigned long max_delay;
        unsigned long now = jiffies;
        unsigned int offset = skb_transport_offset(skb);
@@ -2997,9 +3340,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
        bool is_general_query;
        int err = 0;
 
-       spin_lock(&br->multicast_lock);
-       if (!netif_running(br->dev) ||
-           (port && port->state == BR_STATE_DISABLED))
+       spin_lock(&brmctx->br->multicast_lock);
+       if (!br_multicast_ctx_should_use(brmctx, pmctx))
                goto out;
 
        if (transport_len == sizeof(*mld)) {
@@ -3019,7 +3361,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
                mld2q = (struct mld2_query *)icmp6_hdr(skb);
                if (!mld2q->mld2q_nsrcs)
                        group = &mld2q->mld2q_mca;
-               if (br->multicast_mld_version == 2 &&
+               if (brmctx->multicast_mld_version == 2 &&
                    !ipv6_addr_any(&mld2q->mld2q_mca) &&
                    mld2q->mld2q_suppress)
                        goto out;
@@ -3033,18 +3375,19 @@ static int br_ip6_multicast_query(struct net_bridge *br,
                saddr.proto = htons(ETH_P_IPV6);
                saddr.src.ip6 = ipv6_hdr(skb)->saddr;
 
-               br_ip6_multicast_query_received(br, port, &br->ip6_other_query,
+               br_ip6_multicast_query_received(brmctx, pmctx,
+                                               &brmctx->ip6_other_query,
                                                &saddr, max_delay);
                goto out;
        } else if (!group) {
                goto out;
        }
 
-       mp = br_mdb_ip6_get(br, group, vid);
+       mp = br_mdb_ip6_get(brmctx->br, group, vid);
        if (!mp)
                goto out;
 
-       max_delay *= br->multicast_last_member_count;
+       max_delay *= brmctx->multicast_last_member_count;
        if (mp->host_joined &&
            (timer_pending(&mp->timer) ?
             time_after(mp->timer.expires, now + max_delay) :
@@ -3052,25 +3395,25 @@ static int br_ip6_multicast_query(struct net_bridge *br,
                mod_timer(&mp->timer, now + max_delay);
 
        for (pp = &mp->ports;
-            (p = mlock_dereference(*pp, br)) != NULL;
+            (p = mlock_dereference(*pp, brmctx->br)) != NULL;
             pp = &p->next) {
                if (timer_pending(&p->timer) ?
                    time_after(p->timer.expires, now + max_delay) :
                    try_to_del_timer_sync(&p->timer) >= 0 &&
-                   (br->multicast_mld_version == 1 ||
+                   (brmctx->multicast_mld_version == 1 ||
                     p->filter_mode == MCAST_EXCLUDE))
                        mod_timer(&p->timer, now + max_delay);
        }
 
 out:
-       spin_unlock(&br->multicast_lock);
+       spin_unlock(&brmctx->br->multicast_lock);
        return err;
 }
 #endif
 
 static void
-br_multicast_leave_group(struct net_bridge *br,
-                        struct net_bridge_port *port,
+br_multicast_leave_group(struct net_bridge_mcast *brmctx,
+                        struct net_bridge_mcast_port *pmctx,
                         struct br_ip *group,
                         struct bridge_mcast_other_query *other_query,
                         struct bridge_mcast_own_query *own_query,
@@ -3081,22 +3424,21 @@ br_multicast_leave_group(struct net_bridge *br,
        unsigned long now;
        unsigned long time;
 
-       spin_lock(&br->multicast_lock);
-       if (!netif_running(br->dev) ||
-           (port && port->state == BR_STATE_DISABLED))
+       spin_lock(&brmctx->br->multicast_lock);
+       if (!br_multicast_ctx_should_use(brmctx, pmctx))
                goto out;
 
-       mp = br_mdb_ip_get(br, group);
+       mp = br_mdb_ip_get(brmctx->br, group);
        if (!mp)
                goto out;
 
-       if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) {
+       if (pmctx && (pmctx->port->flags & BR_MULTICAST_FAST_LEAVE)) {
                struct net_bridge_port_group __rcu **pp;
 
                for (pp = &mp->ports;
-                    (p = mlock_dereference(*pp, br)) != NULL;
+                    (p = mlock_dereference(*pp, brmctx->br)) != NULL;
                     pp = &p->next) {
-                       if (!br_port_group_equal(p, port, src))
+                       if (!br_port_group_equal(p, pmctx->port, src))
                                continue;
 
                        if (p->flags & MDB_PG_FLAGS_PERMANENT)
@@ -3111,19 +3453,19 @@ br_multicast_leave_group(struct net_bridge *br,
        if (timer_pending(&other_query->timer))
                goto out;
 
-       if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
-               __br_multicast_send_query(br, port, NULL, NULL, &mp->addr,
+       if (brmctx->multicast_querier) {
+               __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &mp->addr,
                                          false, 0, NULL);
 
-               time = jiffies + br->multicast_last_member_count *
-                                br->multicast_last_member_interval;
+               time = jiffies + brmctx->multicast_last_member_count *
+                                brmctx->multicast_last_member_interval;
 
                mod_timer(&own_query->timer, time);
 
-               for (p = mlock_dereference(mp->ports, br);
-                    p != NULL;
-                    p = mlock_dereference(p->next, br)) {
-                       if (!br_port_group_equal(p, port, src))
+               for (p = mlock_dereference(mp->ports, brmctx->br);
+                    p != NULL && pmctx != NULL;
+                    p = mlock_dereference(p->next, brmctx->br)) {
+                       if (!br_port_group_equal(p, pmctx->port, src))
                                continue;
 
                        if (!hlist_unhashed(&p->mglist) &&
@@ -3138,10 +3480,10 @@ br_multicast_leave_group(struct net_bridge *br,
        }
 
        now = jiffies;
-       time = now + br->multicast_last_member_count *
-                    br->multicast_last_member_interval;
+       time = now + brmctx->multicast_last_member_count *
+                    brmctx->multicast_last_member_interval;
 
-       if (!port) {
+       if (!pmctx) {
                if (mp->host_joined &&
                    (timer_pending(&mp->timer) ?
                     time_after(mp->timer.expires, time) :
@@ -3152,10 +3494,10 @@ br_multicast_leave_group(struct net_bridge *br,
                goto out;
        }
 
-       for (p = mlock_dereference(mp->ports, br);
+       for (p = mlock_dereference(mp->ports, brmctx->br);
             p != NULL;
-            p = mlock_dereference(p->next, br)) {
-               if (p->key.port != port)
+            p = mlock_dereference(p->next, brmctx->br)) {
+               if (p->key.port != pmctx->port)
                        continue;
 
                if (!hlist_unhashed(&p->mglist) &&
@@ -3168,11 +3510,11 @@ br_multicast_leave_group(struct net_bridge *br,
                break;
        }
 out:
-       spin_unlock(&br->multicast_lock);
+       spin_unlock(&brmctx->br->multicast_lock);
 }
 
-static void br_ip4_multicast_leave_group(struct net_bridge *br,
-                                        struct net_bridge_port *port,
+static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx,
+                                        struct net_bridge_mcast_port *pmctx,
                                         __be32 group,
                                         __u16 vid,
                                         const unsigned char *src)
@@ -3183,20 +3525,21 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
        if (ipv4_is_local_multicast(group))
                return;
 
-       own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
+       own_query = pmctx ? &pmctx->ip4_own_query : &brmctx->ip4_own_query;
 
        memset(&br_group, 0, sizeof(br_group));
        br_group.dst.ip4 = group;
        br_group.proto = htons(ETH_P_IP);
        br_group.vid = vid;
 
-       br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query,
+       br_multicast_leave_group(brmctx, pmctx, &br_group,
+                                &brmctx->ip4_other_query,
                                 own_query, src);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_leave_group(struct net_bridge *br,
-                                        struct net_bridge_port *port,
+static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx,
+                                        struct net_bridge_mcast_port *pmctx,
                                         const struct in6_addr *group,
                                         __u16 vid,
                                         const unsigned char *src)
@@ -3207,14 +3550,15 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
        if (ipv6_addr_is_ll_all_nodes(group))
                return;
 
-       own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
+       own_query = pmctx ? &pmctx->ip6_own_query : &brmctx->ip6_own_query;
 
        memset(&br_group, 0, sizeof(br_group));
        br_group.dst.ip6 = *group;
        br_group.proto = htons(ETH_P_IPV6);
        br_group.vid = vid;
 
-       br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query,
+       br_multicast_leave_group(brmctx, pmctx, &br_group,
+                                &brmctx->ip6_other_query,
                                 own_query, src);
 }
 #endif
@@ -3252,8 +3596,8 @@ static void br_multicast_err_count(const struct net_bridge *br,
        u64_stats_update_end(&pstats->syncp);
 }
 
-static void br_multicast_pim(struct net_bridge *br,
-                            struct net_bridge_port *port,
+static void br_multicast_pim(struct net_bridge_mcast *brmctx,
+                            struct net_bridge_mcast_port *pmctx,
                             const struct sk_buff *skb)
 {
        unsigned int offset = skb_transport_offset(skb);
@@ -3264,31 +3608,32 @@ static void br_multicast_pim(struct net_bridge *br,
            pim_hdr_type(pimhdr) != PIM_TYPE_HELLO)
                return;
 
-       spin_lock(&br->multicast_lock);
-       br_ip4_multicast_mark_router(br, port);
-       spin_unlock(&br->multicast_lock);
+       spin_lock(&brmctx->br->multicast_lock);
+       br_ip4_multicast_mark_router(brmctx, pmctx);
+       spin_unlock(&brmctx->br->multicast_lock);
 }
 
-static int br_ip4_multicast_mrd_rcv(struct net_bridge *br,
-                                   struct net_bridge_port *port,
+static int br_ip4_multicast_mrd_rcv(struct net_bridge_mcast *brmctx,
+                                   struct net_bridge_mcast_port *pmctx,
                                    struct sk_buff *skb)
 {
        if (ip_hdr(skb)->protocol != IPPROTO_IGMP ||
            igmp_hdr(skb)->type != IGMP_MRDISC_ADV)
                return -ENOMSG;
 
-       spin_lock(&br->multicast_lock);
-       br_ip4_multicast_mark_router(br, port);
-       spin_unlock(&br->multicast_lock);
+       spin_lock(&brmctx->br->multicast_lock);
+       br_ip4_multicast_mark_router(brmctx, pmctx);
+       spin_unlock(&brmctx->br->multicast_lock);
 
        return 0;
 }
 
-static int br_multicast_ipv4_rcv(struct net_bridge *br,
-                                struct net_bridge_port *port,
+static int br_multicast_ipv4_rcv(struct net_bridge_mcast *brmctx,
+                                struct net_bridge_mcast_port *pmctx,
                                 struct sk_buff *skb,
                                 u16 vid)
 {
+       struct net_bridge_port *p = pmctx ? pmctx->port : NULL;
        const unsigned char *src;
        struct igmphdr *ih;
        int err;
@@ -3300,14 +3645,14 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
                        BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
                } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) {
                        if (ip_hdr(skb)->protocol == IPPROTO_PIM)
-                               br_multicast_pim(br, port, skb);
+                               br_multicast_pim(brmctx, pmctx, skb);
                } else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) {
-                       br_ip4_multicast_mrd_rcv(br, port, skb);
+                       br_ip4_multicast_mrd_rcv(brmctx, pmctx, skb);
                }
 
                return 0;
        } else if (err < 0) {
-               br_multicast_err_count(br, port, skb->protocol);
+               br_multicast_err_count(brmctx->br, p, skb->protocol);
                return err;
        }
 
@@ -3319,44 +3664,45 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
        case IGMP_HOST_MEMBERSHIP_REPORT:
        case IGMPV2_HOST_MEMBERSHIP_REPORT:
                BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
-               err = br_ip4_multicast_add_group(br, port, ih->group, vid, src,
-                                                true);
+               err = br_ip4_multicast_add_group(brmctx, pmctx, ih->group, vid,
+                                                src, true);
                break;
        case IGMPV3_HOST_MEMBERSHIP_REPORT:
-               err = br_ip4_multicast_igmp3_report(br, port, skb, vid);
+               err = br_ip4_multicast_igmp3_report(brmctx, pmctx, skb, vid);
                break;
        case IGMP_HOST_MEMBERSHIP_QUERY:
-               br_ip4_multicast_query(br, port, skb, vid);
+               br_ip4_multicast_query(brmctx, pmctx, skb, vid);
                break;
        case IGMP_HOST_LEAVE_MESSAGE:
-               br_ip4_multicast_leave_group(br, port, ih->group, vid, src);
+               br_ip4_multicast_leave_group(brmctx, pmctx, ih->group, vid, src);
                break;
        }
 
-       br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+       br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp,
                           BR_MCAST_DIR_RX);
 
        return err;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_mrd_rcv(struct net_bridge *br,
-                                    struct net_bridge_port *port,
+static void br_ip6_multicast_mrd_rcv(struct net_bridge_mcast *brmctx,
+                                    struct net_bridge_mcast_port *pmctx,
                                     struct sk_buff *skb)
 {
        if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV)
                return;
 
-       spin_lock(&br->multicast_lock);
-       br_ip6_multicast_mark_router(br, port);
-       spin_unlock(&br->multicast_lock);
+       spin_lock(&brmctx->br->multicast_lock);
+       br_ip6_multicast_mark_router(brmctx, pmctx);
+       spin_unlock(&brmctx->br->multicast_lock);
 }
 
-static int br_multicast_ipv6_rcv(struct net_bridge *br,
-                                struct net_bridge_port *port,
+static int br_multicast_ipv6_rcv(struct net_bridge_mcast *brmctx,
+                                struct net_bridge_mcast_port *pmctx,
                                 struct sk_buff *skb,
                                 u16 vid)
 {
+       struct net_bridge_port *p = pmctx ? pmctx->port : NULL;
        const unsigned char *src;
        struct mld_msg *mld;
        int err;
@@ -3368,11 +3714,11 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
                        BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
                if (err == -ENODATA &&
                    ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr))
-                       br_ip6_multicast_mrd_rcv(br, port, skb);
+                       br_ip6_multicast_mrd_rcv(brmctx, pmctx, skb);
 
                return 0;
        } else if (err < 0) {
-               br_multicast_err_count(br, port, skb->protocol);
+               br_multicast_err_count(brmctx->br, p, skb->protocol);
                return err;
        }
 
@@ -3383,29 +3729,32 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
        case ICMPV6_MGM_REPORT:
                src = eth_hdr(skb)->h_source;
                BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
-               err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid,
-                                                src, true);
+               err = br_ip6_multicast_add_group(brmctx, pmctx, &mld->mld_mca,
+                                                vid, src, true);
                break;
        case ICMPV6_MLD2_REPORT:
-               err = br_ip6_multicast_mld2_report(br, port, skb, vid);
+               err = br_ip6_multicast_mld2_report(brmctx, pmctx, skb, vid);
                break;
        case ICMPV6_MGM_QUERY:
-               err = br_ip6_multicast_query(br, port, skb, vid);
+               err = br_ip6_multicast_query(brmctx, pmctx, skb, vid);
                break;
        case ICMPV6_MGM_REDUCTION:
                src = eth_hdr(skb)->h_source;
-               br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid, src);
+               br_ip6_multicast_leave_group(brmctx, pmctx, &mld->mld_mca, vid,
+                                            src);
                break;
        }
 
-       br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+       br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp,
                           BR_MCAST_DIR_RX);
 
        return err;
 }
 #endif
 
-int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
+int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+                    struct net_bridge_mcast_port **pmctx,
+                    struct net_bridge_vlan *vlan,
                     struct sk_buff *skb, u16 vid)
 {
        int ret = 0;
@@ -3413,16 +3762,36 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
        BR_INPUT_SKB_CB(skb)->igmp = 0;
        BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
 
-       if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+       if (!br_opt_get((*brmctx)->br, BROPT_MULTICAST_ENABLED))
                return 0;
 
+       if (br_opt_get((*brmctx)->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && vlan) {
+               const struct net_bridge_vlan *masterv;
+
+               /* the vlan has the master flag set only when transmitting
+                * through the bridge device
+                */
+               if (br_vlan_is_master(vlan)) {
+                       masterv = vlan;
+                       *brmctx = &vlan->br_mcast_ctx;
+                       *pmctx = NULL;
+               } else {
+                       masterv = vlan->brvlan;
+                       *brmctx = &vlan->brvlan->br_mcast_ctx;
+                       *pmctx = &vlan->port_mcast_ctx;
+               }
+
+               if (!(masterv->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED))
+                       return 0;
+       }
+
        switch (skb->protocol) {
        case htons(ETH_P_IP):
-               ret = br_multicast_ipv4_rcv(br, port, skb, vid);
+               ret = br_multicast_ipv4_rcv(*brmctx, *pmctx, skb, vid);
                break;
 #if IS_ENABLED(CONFIG_IPV6)
        case htons(ETH_P_IPV6):
-               ret = br_multicast_ipv6_rcv(br, port, skb, vid);
+               ret = br_multicast_ipv6_rcv(*brmctx, *pmctx, skb, vid);
                break;
 #endif
        }
@@ -3430,32 +3799,39 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
        return ret;
 }
 
-static void br_multicast_query_expired(struct net_bridge *br,
+static void br_multicast_query_expired(struct net_bridge_mcast *brmctx,
                                       struct bridge_mcast_own_query *query,
                                       struct bridge_mcast_querier *querier)
 {
-       spin_lock(&br->multicast_lock);
-       if (query->startup_sent < br->multicast_startup_query_count)
+       spin_lock(&brmctx->br->multicast_lock);
+       if (br_multicast_ctx_vlan_disabled(brmctx))
+               goto out;
+
+       if (query->startup_sent < brmctx->multicast_startup_query_count)
                query->startup_sent++;
 
-       RCU_INIT_POINTER(querier->port, NULL);
-       br_multicast_send_query(br, NULL, query);
-       spin_unlock(&br->multicast_lock);
+       br_multicast_send_query(brmctx, NULL, query);
+out:
+       spin_unlock(&brmctx->br->multicast_lock);
 }
 
 static void br_ip4_multicast_query_expired(struct timer_list *t)
 {
-       struct net_bridge *br = from_timer(br, t, ip4_own_query.timer);
+       struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+                                                    ip4_own_query.timer);
 
-       br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
+       br_multicast_query_expired(brmctx, &brmctx->ip4_own_query,
+                                  &brmctx->ip4_querier);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_query_expired(struct timer_list *t)
 {
-       struct net_bridge *br = from_timer(br, t, ip6_own_query.timer);
+       struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+                                                    ip6_own_query.timer);
 
-       br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
+       br_multicast_query_expired(brmctx, &brmctx->ip6_own_query,
+                                  &brmctx->ip6_querier);
 }
 #endif
 
@@ -3472,47 +3848,65 @@ static void br_multicast_gc_work(struct work_struct *work)
        br_multicast_gc(&deleted_head);
 }
 
-void br_multicast_init(struct net_bridge *br)
+void br_multicast_ctx_init(struct net_bridge *br,
+                          struct net_bridge_vlan *vlan,
+                          struct net_bridge_mcast *brmctx)
 {
-       br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
+       brmctx->br = br;
+       brmctx->vlan = vlan;
+       brmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+       brmctx->multicast_last_member_count = 2;
+       brmctx->multicast_startup_query_count = 2;
 
-       br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
-       br->multicast_last_member_count = 2;
-       br->multicast_startup_query_count = 2;
+       brmctx->multicast_last_member_interval = HZ;
+       brmctx->multicast_query_response_interval = 10 * HZ;
+       brmctx->multicast_startup_query_interval = 125 * HZ / 4;
+       brmctx->multicast_query_interval = 125 * HZ;
+       brmctx->multicast_querier_interval = 255 * HZ;
+       brmctx->multicast_membership_interval = 260 * HZ;
 
-       br->multicast_last_member_interval = HZ;
-       br->multicast_query_response_interval = 10 * HZ;
-       br->multicast_startup_query_interval = 125 * HZ / 4;
-       br->multicast_query_interval = 125 * HZ;
-       br->multicast_querier_interval = 255 * HZ;
-       br->multicast_membership_interval = 260 * HZ;
-
-       br->ip4_other_query.delay_time = 0;
-       br->ip4_querier.port = NULL;
-       br->multicast_igmp_version = 2;
+       brmctx->ip4_other_query.delay_time = 0;
+       brmctx->ip4_querier.port_ifidx = 0;
+       seqcount_init(&brmctx->ip4_querier.seq);
+       brmctx->multicast_igmp_version = 2;
 #if IS_ENABLED(CONFIG_IPV6)
-       br->multicast_mld_version = 1;
-       br->ip6_other_query.delay_time = 0;
-       br->ip6_querier.port = NULL;
+       brmctx->multicast_mld_version = 1;
+       brmctx->ip6_other_query.delay_time = 0;
+       brmctx->ip6_querier.port_ifidx = 0;
+       seqcount_init(&brmctx->ip6_querier.seq);
 #endif
-       br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true);
-       br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
 
-       spin_lock_init(&br->multicast_lock);
-       timer_setup(&br->ip4_mc_router_timer,
+       timer_setup(&brmctx->ip4_mc_router_timer,
                    br_ip4_multicast_local_router_expired, 0);
-       timer_setup(&br->ip4_other_query.timer,
+       timer_setup(&brmctx->ip4_other_query.timer,
                    br_ip4_multicast_querier_expired, 0);
-       timer_setup(&br->ip4_own_query.timer,
+       timer_setup(&brmctx->ip4_own_query.timer,
                    br_ip4_multicast_query_expired, 0);
 #if IS_ENABLED(CONFIG_IPV6)
-       timer_setup(&br->ip6_mc_router_timer,
+       timer_setup(&brmctx->ip6_mc_router_timer,
                    br_ip6_multicast_local_router_expired, 0);
-       timer_setup(&br->ip6_other_query.timer,
+       timer_setup(&brmctx->ip6_other_query.timer,
                    br_ip6_multicast_querier_expired, 0);
-       timer_setup(&br->ip6_own_query.timer,
+       timer_setup(&brmctx->ip6_own_query.timer,
                    br_ip6_multicast_query_expired, 0);
 #endif
+}
+
+void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx)
+{
+       __br_multicast_stop(brmctx);
+}
+
+void br_multicast_init(struct net_bridge *br)
+{
+       br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
+
+       br_multicast_ctx_init(br, NULL, &br->multicast_ctx);
+
+       br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true);
+       br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
+
+       spin_lock_init(&br->multicast_lock);
        INIT_HLIST_HEAD(&br->mdb_list);
        INIT_HLIST_HEAD(&br->mcast_gc_list);
        INIT_WORK(&br->mcast_gc_work, br_multicast_gc_work);
@@ -3580,8 +3974,8 @@ void br_multicast_leave_snoopers(struct net_bridge *br)
        br_ip6_multicast_leave_snoopers(br);
 }
 
-static void __br_multicast_open(struct net_bridge *br,
-                               struct bridge_mcast_own_query *query)
+static void __br_multicast_open_query(struct net_bridge *br,
+                                     struct bridge_mcast_own_query *query)
 {
        query->startup_sent = 0;
 
@@ -3591,26 +3985,194 @@ static void __br_multicast_open(struct net_bridge *br,
        mod_timer(&query->timer, jiffies);
 }
 
-void br_multicast_open(struct net_bridge *br)
+static void __br_multicast_open(struct net_bridge_mcast *brmctx)
 {
-       __br_multicast_open(br, &br->ip4_own_query);
+       __br_multicast_open_query(brmctx->br, &brmctx->ip4_own_query);
 #if IS_ENABLED(CONFIG_IPV6)
-       __br_multicast_open(br, &br->ip6_own_query);
+       __br_multicast_open_query(brmctx->br, &brmctx->ip6_own_query);
 #endif
 }
 
-void br_multicast_stop(struct net_bridge *br)
+void br_multicast_open(struct net_bridge *br)
 {
-       del_timer_sync(&br->ip4_mc_router_timer);
-       del_timer_sync(&br->ip4_other_query.timer);
-       del_timer_sync(&br->ip4_own_query.timer);
+       ASSERT_RTNL();
+
+       if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+               struct net_bridge_vlan_group *vg;
+               struct net_bridge_vlan *vlan;
+
+               vg = br_vlan_group(br);
+               if (vg) {
+                       list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+                               struct net_bridge_mcast *brmctx;
+
+                               brmctx = &vlan->br_mcast_ctx;
+                               if (br_vlan_is_brentry(vlan) &&
+                                   !br_multicast_ctx_vlan_disabled(brmctx))
+                                       __br_multicast_open(&vlan->br_mcast_ctx);
+                       }
+               }
+       } else {
+               __br_multicast_open(&br->multicast_ctx);
+       }
+}
+
+static void __br_multicast_stop(struct net_bridge_mcast *brmctx)
+{
+       del_timer_sync(&brmctx->ip4_mc_router_timer);
+       del_timer_sync(&brmctx->ip4_other_query.timer);
+       del_timer_sync(&brmctx->ip4_own_query.timer);
 #if IS_ENABLED(CONFIG_IPV6)
-       del_timer_sync(&br->ip6_mc_router_timer);
-       del_timer_sync(&br->ip6_other_query.timer);
-       del_timer_sync(&br->ip6_own_query.timer);
+       del_timer_sync(&brmctx->ip6_mc_router_timer);
+       del_timer_sync(&brmctx->ip6_other_query.timer);
+       del_timer_sync(&brmctx->ip6_own_query.timer);
 #endif
 }
 
+void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+       struct net_bridge *br;
+
+       /* it's okay to check for the flag without the multicast lock because it
+        * can only change under RTNL -> multicast_lock, we need the latter to
+        * sync with timers and packets
+        */
+       if (on == !!(vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED))
+               return;
+
+       if (br_vlan_is_master(vlan)) {
+               br = vlan->br;
+
+               if (!br_vlan_is_brentry(vlan) ||
+                   (on &&
+                    br_multicast_ctx_vlan_global_disabled(&vlan->br_mcast_ctx)))
+                       return;
+
+               spin_lock_bh(&br->multicast_lock);
+               vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED;
+               spin_unlock_bh(&br->multicast_lock);
+
+               if (on)
+                       __br_multicast_open(&vlan->br_mcast_ctx);
+               else
+                       __br_multicast_stop(&vlan->br_mcast_ctx);
+       } else {
+               struct net_bridge_mcast *brmctx;
+
+               brmctx = br_multicast_port_ctx_get_global(&vlan->port_mcast_ctx);
+               if (on && br_multicast_ctx_vlan_global_disabled(brmctx))
+                       return;
+
+               br = vlan->port->br;
+               spin_lock_bh(&br->multicast_lock);
+               vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED;
+               if (on)
+                       __br_multicast_enable_port_ctx(&vlan->port_mcast_ctx);
+               else
+                       __br_multicast_disable_port_ctx(&vlan->port_mcast_ctx);
+               spin_unlock_bh(&br->multicast_lock);
+       }
+}
+
+static void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+       struct net_bridge_port *p;
+
+       if (WARN_ON_ONCE(!br_vlan_is_master(vlan)))
+               return;
+
+       list_for_each_entry(p, &vlan->br->port_list, list) {
+               struct net_bridge_vlan *vport;
+
+               vport = br_vlan_find(nbp_vlan_group(p), vlan->vid);
+               if (!vport)
+                       continue;
+               br_multicast_toggle_one_vlan(vport, on);
+       }
+
+       if (br_vlan_is_brentry(vlan))
+               br_multicast_toggle_one_vlan(vlan, on);
+}
+
+int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
+                                     struct netlink_ext_ack *extack)
+{
+       struct net_bridge_vlan_group *vg;
+       struct net_bridge_vlan *vlan;
+       struct net_bridge_port *p;
+
+       if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) == on)
+               return 0;
+
+       if (on && !br_opt_get(br, BROPT_VLAN_ENABLED)) {
+               NL_SET_ERR_MSG_MOD(extack, "Cannot enable multicast vlan snooping with vlan filtering disabled");
+               return -EINVAL;
+       }
+
+       vg = br_vlan_group(br);
+       if (!vg)
+               return 0;
+
+       br_opt_toggle(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED, on);
+
+       /* disable/enable non-vlan mcast contexts based on vlan snooping */
+       if (on)
+               __br_multicast_stop(&br->multicast_ctx);
+       else
+               __br_multicast_open(&br->multicast_ctx);
+       list_for_each_entry(p, &br->port_list, list) {
+               if (on)
+                       br_multicast_disable_port(p);
+               else
+                       br_multicast_enable_port(p);
+       }
+
+       list_for_each_entry(vlan, &vg->vlan_list, vlist)
+               br_multicast_toggle_vlan(vlan, on);
+
+       return 0;
+}
+
+bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+       ASSERT_RTNL();
+
+       /* BR_VLFLAG_GLOBAL_MCAST_ENABLED relies on eventual consistency and
+        * requires only RTNL to change
+        */
+       if (on == !!(vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED))
+               return false;
+
+       vlan->priv_flags ^= BR_VLFLAG_GLOBAL_MCAST_ENABLED;
+       br_multicast_toggle_vlan(vlan, on);
+
+       return true;
+}
+
+void br_multicast_stop(struct net_bridge *br)
+{
+       ASSERT_RTNL();
+
+       if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+               struct net_bridge_vlan_group *vg;
+               struct net_bridge_vlan *vlan;
+
+               vg = br_vlan_group(br);
+               if (vg) {
+                       list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+                               struct net_bridge_mcast *brmctx;
+
+                               brmctx = &vlan->br_mcast_ctx;
+                               if (br_vlan_is_brentry(vlan) &&
+                                   !br_multicast_ctx_vlan_disabled(brmctx))
+                                       __br_multicast_stop(&vlan->br_mcast_ctx);
+                       }
+               }
+       } else {
+               __br_multicast_stop(&br->multicast_ctx);
+       }
+}
+
 void br_multicast_dev_del(struct net_bridge *br)
 {
        struct net_bridge_mdb_entry *mp;
@@ -3623,44 +4185,45 @@ void br_multicast_dev_del(struct net_bridge *br)
        hlist_move_list(&br->mcast_gc_list, &deleted_head);
        spin_unlock_bh(&br->multicast_lock);
 
+       br_multicast_ctx_deinit(&br->multicast_ctx);
        br_multicast_gc(&deleted_head);
        cancel_work_sync(&br->mcast_gc_work);
 
        rcu_barrier();
 }
 
-int br_multicast_set_router(struct net_bridge *br, unsigned long val)
+int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val)
 {
        int err = -EINVAL;
 
-       spin_lock_bh(&br->multicast_lock);
+       spin_lock_bh(&brmctx->br->multicast_lock);
 
        switch (val) {
        case MDB_RTR_TYPE_DISABLED:
        case MDB_RTR_TYPE_PERM:
-               br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
-               del_timer(&br->ip4_mc_router_timer);
+               br_mc_router_state_change(brmctx->br, val == MDB_RTR_TYPE_PERM);
+               del_timer(&brmctx->ip4_mc_router_timer);
 #if IS_ENABLED(CONFIG_IPV6)
-               del_timer(&br->ip6_mc_router_timer);
+               del_timer(&brmctx->ip6_mc_router_timer);
 #endif
-               br->multicast_router = val;
+               brmctx->multicast_router = val;
                err = 0;
                break;
        case MDB_RTR_TYPE_TEMP_QUERY:
-               if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
-                       br_mc_router_state_change(br, false);
-               br->multicast_router = val;
+               if (brmctx->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
+                       br_mc_router_state_change(brmctx->br, false);
+               brmctx->multicast_router = val;
                err = 0;
                break;
        }
 
-       spin_unlock_bh(&br->multicast_lock);
+       spin_unlock_bh(&brmctx->br->multicast_lock);
 
        return err;
 }
 
 static void
-br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted)
+br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted)
 {
        if (!deleted)
                return;
@@ -3668,37 +4231,39 @@ br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted)
        /* For backwards compatibility for now, only notify if there is
         * no multicast router anymore for both IPv4 and IPv6.
         */
-       if (!hlist_unhashed(&p->ip4_rlist))
+       if (!hlist_unhashed(&pmctx->ip4_rlist))
                return;
 #if IS_ENABLED(CONFIG_IPV6)
-       if (!hlist_unhashed(&p->ip6_rlist))
+       if (!hlist_unhashed(&pmctx->ip6_rlist))
                return;
 #endif
 
-       br_rtr_notify(p->br->dev, p, RTM_DELMDB);
-       br_port_mc_router_state_change(p, false);
+       br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_DELMDB);
+       br_port_mc_router_state_change(pmctx->port, false);
 
        /* don't allow timer refresh */
-       if (p->multicast_router == MDB_RTR_TYPE_TEMP)
-               p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+       if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP)
+               pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
 }
 
-int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
+int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx,
+                                unsigned long val)
 {
-       struct net_bridge *br = p->br;
+       struct net_bridge_mcast *brmctx;
        unsigned long now = jiffies;
        int err = -EINVAL;
        bool del = false;
 
-       spin_lock(&br->multicast_lock);
-       if (p->multicast_router == val) {
+       brmctx = br_multicast_port_ctx_get_global(pmctx);
+       spin_lock(&brmctx->br->multicast_lock);
+       if (pmctx->multicast_router == val) {
                /* Refresh the temp router port timer */
-               if (p->multicast_router == MDB_RTR_TYPE_TEMP) {
-                       mod_timer(&p->ip4_mc_router_timer,
-                                 now + br->multicast_querier_interval);
+               if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) {
+                       mod_timer(&pmctx->ip4_mc_router_timer,
+                                 now + brmctx->multicast_querier_interval);
 #if IS_ENABLED(CONFIG_IPV6)
-                       mod_timer(&p->ip6_mc_router_timer,
-                                 now + br->multicast_querier_interval);
+                       mod_timer(&pmctx->ip6_mc_router_timer,
+                                 now + brmctx->multicast_querier_interval);
 #endif
                }
                err = 0;
@@ -3706,63 +4271,103 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
        }
        switch (val) {
        case MDB_RTR_TYPE_DISABLED:
-               p->multicast_router = MDB_RTR_TYPE_DISABLED;
-               del |= br_ip4_multicast_rport_del(p);
-               del_timer(&p->ip4_mc_router_timer);
-               del |= br_ip6_multicast_rport_del(p);
+               pmctx->multicast_router = MDB_RTR_TYPE_DISABLED;
+               del |= br_ip4_multicast_rport_del(pmctx);
+               del_timer(&pmctx->ip4_mc_router_timer);
+               del |= br_ip6_multicast_rport_del(pmctx);
 #if IS_ENABLED(CONFIG_IPV6)
-               del_timer(&p->ip6_mc_router_timer);
+               del_timer(&pmctx->ip6_mc_router_timer);
 #endif
-               br_multicast_rport_del_notify(p, del);
+               br_multicast_rport_del_notify(pmctx, del);
                break;
        case MDB_RTR_TYPE_TEMP_QUERY:
-               p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
-               del |= br_ip4_multicast_rport_del(p);
-               del |= br_ip6_multicast_rport_del(p);
-               br_multicast_rport_del_notify(p, del);
+               pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+               del |= br_ip4_multicast_rport_del(pmctx);
+               del |= br_ip6_multicast_rport_del(pmctx);
+               br_multicast_rport_del_notify(pmctx, del);
                break;
        case MDB_RTR_TYPE_PERM:
-               p->multicast_router = MDB_RTR_TYPE_PERM;
-               del_timer(&p->ip4_mc_router_timer);
-               br_ip4_multicast_add_router(br, p);
+               pmctx->multicast_router = MDB_RTR_TYPE_PERM;
+               del_timer(&pmctx->ip4_mc_router_timer);
+               br_ip4_multicast_add_router(brmctx, pmctx);
 #if IS_ENABLED(CONFIG_IPV6)
-               del_timer(&p->ip6_mc_router_timer);
+               del_timer(&pmctx->ip6_mc_router_timer);
 #endif
-               br_ip6_multicast_add_router(br, p);
+               br_ip6_multicast_add_router(brmctx, pmctx);
                break;
        case MDB_RTR_TYPE_TEMP:
-               p->multicast_router = MDB_RTR_TYPE_TEMP;
-               br_ip4_multicast_mark_router(br, p);
-               br_ip6_multicast_mark_router(br, p);
+               pmctx->multicast_router = MDB_RTR_TYPE_TEMP;
+               br_ip4_multicast_mark_router(brmctx, pmctx);
+               br_ip6_multicast_mark_router(brmctx, pmctx);
                break;
        default:
                goto unlock;
        }
        err = 0;
 unlock:
-       spin_unlock(&br->multicast_lock);
+       spin_unlock(&brmctx->br->multicast_lock);
 
        return err;
 }
 
-static void br_multicast_start_querier(struct net_bridge *br,
+int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router)
+{
+       int err;
+
+       if (br_vlan_is_master(v))
+               err = br_multicast_set_router(&v->br_mcast_ctx, mcast_router);
+       else
+               err = br_multicast_set_port_router(&v->port_mcast_ctx,
+                                                  mcast_router);
+
+       return err;
+}
+
+static void br_multicast_start_querier(struct net_bridge_mcast *brmctx,
                                       struct bridge_mcast_own_query *query)
 {
        struct net_bridge_port *port;
 
-       __br_multicast_open(br, query);
+       if (!br_multicast_ctx_matches_vlan_snooping(brmctx))
+               return;
+
+       __br_multicast_open_query(brmctx->br, query);
 
        rcu_read_lock();
-       list_for_each_entry_rcu(port, &br->port_list, list) {
-               if (port->state == BR_STATE_DISABLED ||
-                   port->state == BR_STATE_BLOCKING)
+       list_for_each_entry_rcu(port, &brmctx->br->port_list, list) {
+               struct bridge_mcast_own_query *ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+               struct bridge_mcast_own_query *ip6_own_query;
+#endif
+
+               if (br_multicast_port_ctx_state_stopped(&port->multicast_ctx))
                        continue;
 
-               if (query == &br->ip4_own_query)
-                       br_multicast_enable(&port->ip4_own_query);
+               if (br_multicast_ctx_is_vlan(brmctx)) {
+                       struct net_bridge_vlan *vlan;
+
+                       vlan = br_vlan_find(nbp_vlan_group_rcu(port),
+                                           brmctx->vlan->vid);
+                       if (!vlan ||
+                           br_multicast_port_ctx_state_stopped(&vlan->port_mcast_ctx))
+                               continue;
+
+                       ip4_own_query = &vlan->port_mcast_ctx.ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+                       ip6_own_query = &vlan->port_mcast_ctx.ip6_own_query;
+#endif
+               } else {
+                       ip4_own_query = &port->multicast_ctx.ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+                       ip6_own_query = &port->multicast_ctx.ip6_own_query;
+#endif
+               }
+
+               if (query == &brmctx->ip4_own_query)
+                       br_multicast_enable(ip4_own_query);
 #if IS_ENABLED(CONFIG_IPV6)
                else
-                       br_multicast_enable(&port->ip6_own_query);
+                       br_multicast_enable(ip6_own_query);
 #endif
        }
        rcu_read_unlock();
@@ -3796,7 +4401,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val,
 
        br_multicast_open(br);
        list_for_each_entry(port, &br->port_list, list)
-               __br_multicast_enable_port(port);
+               __br_multicast_enable_port_ctx(&port->multicast_ctx);
 
        change_snoopers = true;
 
@@ -3839,47 +4444,48 @@ bool br_multicast_router(const struct net_device *dev)
        bool is_router;
 
        spin_lock_bh(&br->multicast_lock);
-       is_router = br_multicast_is_router(br, NULL);
+       is_router = br_multicast_is_router(&br->multicast_ctx, NULL);
        spin_unlock_bh(&br->multicast_lock);
        return is_router;
 }
 EXPORT_SYMBOL_GPL(br_multicast_router);
 
-int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
+int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val)
 {
        unsigned long max_delay;
 
        val = !!val;
 
-       spin_lock_bh(&br->multicast_lock);
-       if (br_opt_get(br, BROPT_MULTICAST_QUERIER) == val)
+       spin_lock_bh(&brmctx->br->multicast_lock);
+       if (brmctx->multicast_querier == val)
                goto unlock;
 
-       br_opt_toggle(br, BROPT_MULTICAST_QUERIER, !!val);
+       WRITE_ONCE(brmctx->multicast_querier, val);
        if (!val)
                goto unlock;
 
-       max_delay = br->multicast_query_response_interval;
+       max_delay = brmctx->multicast_query_response_interval;
 
-       if (!timer_pending(&br->ip4_other_query.timer))
-               br->ip4_other_query.delay_time = jiffies + max_delay;
+       if (!timer_pending(&brmctx->ip4_other_query.timer))
+               brmctx->ip4_other_query.delay_time = jiffies + max_delay;
 
-       br_multicast_start_querier(br, &br->ip4_own_query);
+       br_multicast_start_querier(brmctx, &brmctx->ip4_own_query);
 
 #if IS_ENABLED(CONFIG_IPV6)
-       if (!timer_pending(&br->ip6_other_query.timer))
-               br->ip6_other_query.delay_time = jiffies + max_delay;
+       if (!timer_pending(&brmctx->ip6_other_query.timer))
+               brmctx->ip6_other_query.delay_time = jiffies + max_delay;
 
-       br_multicast_start_querier(br, &br->ip6_own_query);
+       br_multicast_start_querier(brmctx, &brmctx->ip6_own_query);
 #endif
 
 unlock:
-       spin_unlock_bh(&br->multicast_lock);
+       spin_unlock_bh(&brmctx->br->multicast_lock);
 
        return 0;
 }
 
-int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
+int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx,
+                                 unsigned long val)
 {
        /* Currently we support only version 2 and 3 */
        switch (val) {
@@ -3890,15 +4496,16 @@ int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
                return -EINVAL;
        }
 
-       spin_lock_bh(&br->multicast_lock);
-       br->multicast_igmp_version = val;
-       spin_unlock_bh(&br->multicast_lock);
+       spin_lock_bh(&brmctx->br->multicast_lock);
+       brmctx->multicast_igmp_version = val;
+       spin_unlock_bh(&brmctx->br->multicast_lock);
 
        return 0;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
+int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx,
+                                unsigned long val)
 {
        /* Currently we support version 1 and 2 */
        switch (val) {
@@ -3909,9 +4516,9 @@ int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
                return -EINVAL;
        }
 
-       spin_lock_bh(&br->multicast_lock);
-       br->multicast_mld_version = val;
-       spin_unlock_bh(&br->multicast_lock);
+       spin_lock_bh(&brmctx->br->multicast_lock);
+       brmctx->multicast_mld_version = val;
+       spin_unlock_bh(&brmctx->br->multicast_lock);
 
        return 0;
 }
@@ -4003,7 +4610,7 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto)
        memset(&eth, 0, sizeof(eth));
        eth.h_proto = htons(proto);
 
-       ret = br_multicast_querier_exists(br, &eth, NULL);
+       ret = br_multicast_querier_exists(&br->multicast_ctx, &eth, NULL);
 
 unlock:
        rcu_read_unlock();
@@ -4022,9 +4629,11 @@ EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere);
  */
 bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
 {
+       struct net_bridge_mcast *brmctx;
        struct net_bridge *br;
        struct net_bridge_port *port;
        bool ret = false;
+       int port_ifidx;
 
        rcu_read_lock();
        if (!netif_is_bridge_port(dev))
@@ -4035,17 +4644,20 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
                goto unlock;
 
        br = port->br;
+       brmctx = &br->multicast_ctx;
 
        switch (proto) {
        case ETH_P_IP:
-               if (!timer_pending(&br->ip4_other_query.timer) ||
-                   rcu_dereference(br->ip4_querier.port) == port)
+               port_ifidx = brmctx->ip4_querier.port_ifidx;
+               if (!timer_pending(&brmctx->ip4_other_query.timer) ||
+                   port_ifidx == port->dev->ifindex)
                        goto unlock;
                break;
 #if IS_ENABLED(CONFIG_IPV6)
        case ETH_P_IPV6:
-               if (!timer_pending(&br->ip6_other_query.timer) ||
-                   rcu_dereference(br->ip6_querier.port) == port)
+               port_ifidx = brmctx->ip6_querier.port_ifidx;
+               if (!timer_pending(&brmctx->ip6_other_query.timer) ||
+                   port_ifidx == port->dev->ifindex)
                        goto unlock;
                break;
 #endif
@@ -4071,7 +4683,9 @@ EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
  */
 bool br_multicast_has_router_adjacent(struct net_device *dev, int proto)
 {
-       struct net_bridge_port *port, *p;
+       struct net_bridge_mcast_port *pmctx;
+       struct net_bridge_mcast *brmctx;
+       struct net_bridge_port *port;
        bool ret = false;
 
        rcu_read_lock();
@@ -4079,11 +4693,12 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto)
        if (!port)
                goto unlock;
 
+       brmctx = &port->br->multicast_ctx;
        switch (proto) {
        case ETH_P_IP:
-               hlist_for_each_entry_rcu(p, &port->br->ip4_mc_router_list,
+               hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list,
                                         ip4_rlist) {
-                       if (p == port)
+                       if (pmctx->port == port)
                                continue;
 
                        ret = true;
@@ -4092,9 +4707,9 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto)
                break;
 #if IS_ENABLED(CONFIG_IPV6)
        case ETH_P_IPV6:
-               hlist_for_each_entry_rcu(p, &port->br->ip6_mc_router_list,
+               hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list,
                                         ip6_rlist) {
-                       if (p == port)
+                       if (pmctx->port == port)
                                continue;
 
                        ret = true;
@@ -4186,7 +4801,8 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
        u64_stats_update_end(&pstats->syncp);
 }
 
-void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+void br_multicast_count(struct net_bridge *br,
+                       const struct net_bridge_port *p,
                        const struct sk_buff *skb, u8 type, u8 dir)
 {
        struct bridge_mcast_stats __percpu *stats;
index 13290a7..f91c071 100644 (file)
@@ -33,7 +33,8 @@
 static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg,
                                           union net_bridge_eht_addr *src_addr,
                                           union net_bridge_eht_addr *h_addr);
-static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx,
+                                             struct net_bridge_port_group *pg,
                                              union net_bridge_eht_addr *src_addr,
                                              union net_bridge_eht_addr *h_addr,
                                              int filter_mode,
@@ -388,7 +389,8 @@ static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src,
        }
 }
 
-static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg,
+static void br_eht_convert_host_filter_mode(const struct net_bridge_mcast *brmctx,
+                                           struct net_bridge_port_group *pg,
                                            union net_bridge_eht_addr *h_addr,
                                            int filter_mode)
 {
@@ -405,14 +407,15 @@ static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg,
                br_multicast_del_eht_set_entry(pg, &zero_addr, h_addr);
                break;
        case MCAST_EXCLUDE:
-               br_multicast_create_eht_set_entry(pg, &zero_addr, h_addr,
-                                                 MCAST_EXCLUDE,
+               br_multicast_create_eht_set_entry(brmctx, pg, &zero_addr,
+                                                 h_addr, MCAST_EXCLUDE,
                                                  true);
                break;
        }
 }
 
-static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx,
+                                             struct net_bridge_port_group *pg,
                                              union net_bridge_eht_addr *src_addr,
                                              union net_bridge_eht_addr *h_addr,
                                              int filter_mode,
@@ -441,8 +444,8 @@ static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
        if (!set_h)
                goto fail_set_entry;
 
-       mod_timer(&set_h->timer, jiffies + br_multicast_gmi(br));
-       mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(br));
+       mod_timer(&set_h->timer, jiffies + br_multicast_gmi(brmctx));
+       mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(brmctx));
 
        return;
 
@@ -499,7 +502,8 @@ static void br_multicast_del_eht_host(struct net_bridge_port_group *pg,
 }
 
 /* create new set entries from reports */
-static void __eht_create_set_entries(struct net_bridge_port_group *pg,
+static void __eht_create_set_entries(const struct net_bridge_mcast *brmctx,
+                                    struct net_bridge_port_group *pg,
                                     union net_bridge_eht_addr *h_addr,
                                     void *srcs,
                                     u32 nsrcs,
@@ -512,8 +516,8 @@ static void __eht_create_set_entries(struct net_bridge_port_group *pg,
        memset(&eht_src_addr, 0, sizeof(eht_src_addr));
        for (src_idx = 0; src_idx < nsrcs; src_idx++) {
                memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
-               br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
-                                                 filter_mode,
+               br_multicast_create_eht_set_entry(brmctx, pg, &eht_src_addr,
+                                                 h_addr, filter_mode,
                                                  false);
        }
 }
@@ -549,7 +553,8 @@ static bool __eht_del_set_entries(struct net_bridge_port_group *pg,
        return changed;
 }
 
-static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_allow(const struct net_bridge_mcast *brmctx,
+                                  struct net_bridge_port_group *pg,
                                   union net_bridge_eht_addr *h_addr,
                                   void *srcs,
                                   u32 nsrcs,
@@ -559,8 +564,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
 
        switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
        case MCAST_INCLUDE:
-               __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
-                                        MCAST_INCLUDE);
+               __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs,
+                                        addr_size, MCAST_INCLUDE);
                break;
        case MCAST_EXCLUDE:
                changed = __eht_del_set_entries(pg, h_addr, srcs, nsrcs,
@@ -571,7 +576,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
        return changed;
 }
 
-static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_block(const struct net_bridge_mcast *brmctx,
+                                  struct net_bridge_port_group *pg,
                                   union net_bridge_eht_addr *h_addr,
                                   void *srcs,
                                   u32 nsrcs,
@@ -585,7 +591,7 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
                                                addr_size);
                break;
        case MCAST_EXCLUDE:
-               __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+               __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
                                         MCAST_EXCLUDE);
                break;
        }
@@ -594,7 +600,8 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
 }
 
 /* flush_entries is true when changing mode */
-static bool __eht_inc_exc(struct net_bridge_port_group *pg,
+static bool __eht_inc_exc(const struct net_bridge_mcast *brmctx,
+                         struct net_bridge_port_group *pg,
                          union net_bridge_eht_addr *h_addr,
                          void *srcs,
                          u32 nsrcs,
@@ -612,11 +619,10 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
        /* if we're changing mode del host and its entries */
        if (flush_entries)
                br_multicast_del_eht_host(pg, h_addr);
-       __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+       __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
                                 filter_mode);
        /* we can be missing sets only if we've deleted some entries */
        if (flush_entries) {
-               struct net_bridge *br = pg->key.port->br;
                struct net_bridge_group_eht_set *eht_set;
                struct net_bridge_group_src *src_ent;
                struct hlist_node *tmp;
@@ -647,14 +653,15 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
                                                              &eht_src_addr);
                        if (!eht_set)
                                continue;
-                       mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(br));
+                       mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(brmctx));
                }
        }
 
        return changed;
 }
 
-static bool br_multicast_eht_inc(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_inc(const struct net_bridge_mcast *brmctx,
+                                struct net_bridge_port_group *pg,
                                 union net_bridge_eht_addr *h_addr,
                                 void *srcs,
                                 u32 nsrcs,
@@ -663,14 +670,15 @@ static bool br_multicast_eht_inc(struct net_bridge_port_group *pg,
 {
        bool changed;
 
-       changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+       changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
                                MCAST_INCLUDE, to_report);
-       br_eht_convert_host_filter_mode(pg, h_addr, MCAST_INCLUDE);
+       br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_INCLUDE);
 
        return changed;
 }
 
-static bool br_multicast_eht_exc(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_exc(const struct net_bridge_mcast *brmctx,
+                                struct net_bridge_port_group *pg,
                                 union net_bridge_eht_addr *h_addr,
                                 void *srcs,
                                 u32 nsrcs,
@@ -679,14 +687,15 @@ static bool br_multicast_eht_exc(struct net_bridge_port_group *pg,
 {
        bool changed;
 
-       changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+       changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
                                MCAST_EXCLUDE, to_report);
-       br_eht_convert_host_filter_mode(pg, h_addr, MCAST_EXCLUDE);
+       br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_EXCLUDE);
 
        return changed;
 }
 
-static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
+static bool __eht_ip4_handle(const struct net_bridge_mcast *brmctx,
+                            struct net_bridge_port_group *pg,
                             union net_bridge_eht_addr *h_addr,
                             void *srcs,
                             u32 nsrcs,
@@ -696,24 +705,25 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
 
        switch (grec_type) {
        case IGMPV3_ALLOW_NEW_SOURCES:
-               br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, sizeof(__be32));
+               br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs,
+                                      sizeof(__be32));
                break;
        case IGMPV3_BLOCK_OLD_SOURCES:
-               changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+               changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs,
                                                 sizeof(__be32));
                break;
        case IGMPV3_CHANGE_TO_INCLUDE:
                to_report = true;
                fallthrough;
        case IGMPV3_MODE_IS_INCLUDE:
-               changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+               changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs,
                                               sizeof(__be32), to_report);
                break;
        case IGMPV3_CHANGE_TO_EXCLUDE:
                to_report = true;
                fallthrough;
        case IGMPV3_MODE_IS_EXCLUDE:
-               changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+               changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs,
                                               sizeof(__be32), to_report);
                break;
        }
@@ -722,7 +732,8 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
+static bool __eht_ip6_handle(const struct net_bridge_mcast *brmctx,
+                            struct net_bridge_port_group *pg,
                             union net_bridge_eht_addr *h_addr,
                             void *srcs,
                             u32 nsrcs,
@@ -732,18 +743,18 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
 
        switch (grec_type) {
        case MLD2_ALLOW_NEW_SOURCES:
-               br_multicast_eht_allow(pg, h_addr, srcs, nsrcs,
+               br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs,
                                       sizeof(struct in6_addr));
                break;
        case MLD2_BLOCK_OLD_SOURCES:
-               changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+               changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs,
                                                 sizeof(struct in6_addr));
                break;
        case MLD2_CHANGE_TO_INCLUDE:
                to_report = true;
                fallthrough;
        case MLD2_MODE_IS_INCLUDE:
-               changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+               changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs,
                                               sizeof(struct in6_addr),
                                               to_report);
                break;
@@ -751,7 +762,7 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
                to_report = true;
                fallthrough;
        case MLD2_MODE_IS_EXCLUDE:
-               changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+               changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs,
                                               sizeof(struct in6_addr),
                                               to_report);
                break;
@@ -762,7 +773,8 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
 #endif
 
 /* true means an entry was deleted */
-bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx,
+                            struct net_bridge_port_group *pg,
                             void *h_addr,
                             void *srcs,
                             u32 nsrcs,
@@ -779,12 +791,12 @@ bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
        memset(&eht_host_addr, 0, sizeof(eht_host_addr));
        memcpy(&eht_host_addr, h_addr, addr_size);
        if (addr_size == sizeof(__be32))
-               changed = __eht_ip4_handle(pg, &eht_host_addr, srcs, nsrcs,
-                                          grec_type);
+               changed = __eht_ip4_handle(brmctx, pg, &eht_host_addr, srcs,
+                                          nsrcs, grec_type);
 #if IS_ENABLED(CONFIG_IPV6)
        else
-               changed = __eht_ip6_handle(pg, &eht_host_addr, srcs, nsrcs,
-                                          grec_type);
+               changed = __eht_ip6_handle(brmctx, pg, &eht_host_addr, srcs,
+                                          nsrcs, grec_type);
 #endif
 
 out:
index 8642e56..6c58fc1 100644 (file)
@@ -287,7 +287,7 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
        if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER,
-                      p->multicast_router) ||
+                      p->multicast_ctx.multicast_router) ||
            nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
                        p->multicast_eht_hosts_limit) ||
            nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
@@ -932,7 +932,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
        if (tb[IFLA_BRPORT_MULTICAST_ROUTER]) {
                u8 mcast_router = nla_get_u8(tb[IFLA_BRPORT_MULTICAST_ROUTER]);
 
-               err = br_multicast_set_port_router(p, mcast_router);
+               err = br_multicast_set_port_router(&p->multicast_ctx,
+                                                  mcast_router);
                if (err)
                        return err;
        }
@@ -1286,7 +1287,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
        if (data[IFLA_BR_MCAST_ROUTER]) {
                u8 multicast_router = nla_get_u8(data[IFLA_BR_MCAST_ROUTER]);
 
-               err = br_multicast_set_router(br, multicast_router);
+               err = br_multicast_set_router(&br->multicast_ctx,
+                                             multicast_router);
                if (err)
                        return err;
        }
@@ -1309,7 +1311,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
        if (data[IFLA_BR_MCAST_QUERIER]) {
                u8 mcast_querier = nla_get_u8(data[IFLA_BR_MCAST_QUERIER]);
 
-               err = br_multicast_set_querier(br, mcast_querier);
+               err = br_multicast_set_querier(&br->multicast_ctx,
+                                              mcast_querier);
                if (err)
                        return err;
        }
@@ -1324,49 +1327,49 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
        if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) {
                u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]);
 
-               br->multicast_last_member_count = val;
+               br->multicast_ctx.multicast_last_member_count = val;
        }
 
        if (data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]) {
                u32 val = nla_get_u32(data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]);
 
-               br->multicast_startup_query_count = val;
+               br->multicast_ctx.multicast_startup_query_count = val;
        }
 
        if (data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]) {
                u64 val = nla_get_u64(data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]);
 
-               br->multicast_last_member_interval = clock_t_to_jiffies(val);
+               br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val);
        }
 
        if (data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]) {
                u64 val = nla_get_u64(data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]);
 
-               br->multicast_membership_interval = clock_t_to_jiffies(val);
+               br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val);
        }
 
        if (data[IFLA_BR_MCAST_QUERIER_INTVL]) {
                u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERIER_INTVL]);
 
-               br->multicast_querier_interval = clock_t_to_jiffies(val);
+               br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val);
        }
 
        if (data[IFLA_BR_MCAST_QUERY_INTVL]) {
                u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]);
 
-               br->multicast_query_interval = clock_t_to_jiffies(val);
+               br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
        }
 
        if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) {
                u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]);
 
-               br->multicast_query_response_interval = clock_t_to_jiffies(val);
+               br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val);
        }
 
        if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) {
                u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]);
 
-               br->multicast_startup_query_interval = clock_t_to_jiffies(val);
+               br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
        }
 
        if (data[IFLA_BR_MCAST_STATS_ENABLED]) {
@@ -1380,7 +1383,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
                __u8 igmp_version;
 
                igmp_version = nla_get_u8(data[IFLA_BR_MCAST_IGMP_VERSION]);
-               err = br_multicast_set_igmp_version(br, igmp_version);
+               err = br_multicast_set_igmp_version(&br->multicast_ctx,
+                                                   igmp_version);
                if (err)
                        return err;
        }
@@ -1390,7 +1394,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
                __u8 mld_version;
 
                mld_version = nla_get_u8(data[IFLA_BR_MCAST_MLD_VERSION]);
-               err = br_multicast_set_mld_version(br, mld_version);
+               err = br_multicast_set_mld_version(&br->multicast_ctx,
+                                                  mld_version);
                if (err)
                        return err;
        }
@@ -1497,6 +1502,7 @@ static size_t br_get_size(const struct net_device *brdev)
               nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */
               nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_IGMP_VERSION */
               nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_MLD_VERSION */
+              br_multicast_querier_state_size() + /* IFLA_BR_MCAST_QUERIER_STATE */
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
               nla_total_size(sizeof(u8)) +     /* IFLA_BR_NF_CALL_IPTABLES */
@@ -1566,50 +1572,53 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
                return -EMSGSIZE;
 #endif
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-       if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) ||
+       if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER,
+                      br->multicast_ctx.multicast_router) ||
            nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING,
                       br_opt_get(br, BROPT_MULTICAST_ENABLED)) ||
            nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
                       br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR)) ||
            nla_put_u8(skb, IFLA_BR_MCAST_QUERIER,
-                      br_opt_get(br, BROPT_MULTICAST_QUERIER)) ||
+                      br->multicast_ctx.multicast_querier) ||
            nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED,
                       br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) ||
            nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, RHT_ELASTICITY) ||
            nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
            nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
-                       br->multicast_last_member_count) ||
+                       br->multicast_ctx.multicast_last_member_count) ||
            nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT,
-                       br->multicast_startup_query_count) ||
+                       br->multicast_ctx.multicast_startup_query_count) ||
            nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION,
-                      br->multicast_igmp_version))
+                      br->multicast_ctx.multicast_igmp_version) ||
+           br_multicast_dump_querier_state(skb, &br->multicast_ctx,
+                                           IFLA_BR_MCAST_QUERIER_STATE))
                return -EMSGSIZE;
 #if IS_ENABLED(CONFIG_IPV6)
        if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION,
-                      br->multicast_mld_version))
+                      br->multicast_ctx.multicast_mld_version))
                return -EMSGSIZE;
 #endif
-       clockval = jiffies_to_clock_t(br->multicast_last_member_interval);
+       clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval);
        if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval,
                              IFLA_BR_PAD))
                return -EMSGSIZE;
-       clockval = jiffies_to_clock_t(br->multicast_membership_interval);
+       clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval);
        if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval,
                              IFLA_BR_PAD))
                return -EMSGSIZE;
-       clockval = jiffies_to_clock_t(br->multicast_querier_interval);
+       clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval);
        if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval,
                              IFLA_BR_PAD))
                return -EMSGSIZE;
-       clockval = jiffies_to_clock_t(br->multicast_query_interval);
+       clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval);
        if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval,
                              IFLA_BR_PAD))
                return -EMSGSIZE;
-       clockval = jiffies_to_clock_t(br->multicast_query_response_interval);
+       clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval);
        if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval,
                              IFLA_BR_PAD))
                return -EMSGSIZE;
-       clockval = jiffies_to_clock_t(br->multicast_startup_query_interval);
+       clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval);
        if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval,
                              IFLA_BR_PAD))
                return -EMSGSIZE;
index 2b48b20..b4cef3a 100644 (file)
@@ -29,6 +29,8 @@
 
 #define BR_MULTICAST_DEFAULT_HASH_MAX 4096
 
+#define BR_HWDOM_MAX BITS_PER_LONG
+
 #define BR_VERSION     "2.3"
 
 /* Control of forwarding link local multicast */
@@ -79,7 +81,8 @@ struct bridge_mcast_other_query {
 /* selected querier */
 struct bridge_mcast_querier {
        struct br_ip addr;
-       struct net_bridge_port __rcu    *port;
+       int port_ifidx;
+       seqcount_t seq;
 };
 
 /* IGMP/MLD statistics */
@@ -89,6 +92,60 @@ struct bridge_mcast_stats {
 };
 #endif
 
+/* net_bridge_mcast_port must be always defined due to forwarding stubs */
+struct net_bridge_mcast_port {
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+       struct net_bridge_port          *port;
+       struct net_bridge_vlan          *vlan;
+
+       struct bridge_mcast_own_query   ip4_own_query;
+       struct timer_list               ip4_mc_router_timer;
+       struct hlist_node               ip4_rlist;
+#if IS_ENABLED(CONFIG_IPV6)
+       struct bridge_mcast_own_query   ip6_own_query;
+       struct timer_list               ip6_mc_router_timer;
+       struct hlist_node               ip6_rlist;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+       unsigned char                   multicast_router;
+#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
+};
+
+/* net_bridge_mcast must be always defined due to forwarding stubs */
+struct net_bridge_mcast {
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+       struct net_bridge               *br;
+       struct net_bridge_vlan          *vlan;
+
+       u32                             multicast_last_member_count;
+       u32                             multicast_startup_query_count;
+
+       u8                              multicast_querier;
+       u8                              multicast_igmp_version;
+       u8                              multicast_router;
+#if IS_ENABLED(CONFIG_IPV6)
+       u8                              multicast_mld_version;
+#endif
+       unsigned long                   multicast_last_member_interval;
+       unsigned long                   multicast_membership_interval;
+       unsigned long                   multicast_querier_interval;
+       unsigned long                   multicast_query_interval;
+       unsigned long                   multicast_query_response_interval;
+       unsigned long                   multicast_startup_query_interval;
+       struct hlist_head               ip4_mc_router_list;
+       struct timer_list               ip4_mc_router_timer;
+       struct bridge_mcast_other_query ip4_other_query;
+       struct bridge_mcast_own_query   ip4_own_query;
+       struct bridge_mcast_querier     ip4_querier;
+#if IS_ENABLED(CONFIG_IPV6)
+       struct hlist_head               ip6_mc_router_list;
+       struct timer_list               ip6_mc_router_timer;
+       struct bridge_mcast_other_query ip6_other_query;
+       struct bridge_mcast_own_query   ip6_own_query;
+       struct bridge_mcast_querier     ip6_querier;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
+};
+
 struct br_tunnel_info {
        __be64                          tunnel_id;
        struct metadata_dst __rcu       *tunnel_dst;
@@ -98,6 +155,8 @@ struct br_tunnel_info {
 enum {
        BR_VLFLAG_PER_PORT_STATS = BIT(0),
        BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1),
+       BR_VLFLAG_MCAST_ENABLED = BIT(2),
+       BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT(3),
 };
 
 /**
@@ -114,6 +173,9 @@ enum {
  * @refcnt: if MASTER flag set, this is bumped for each port referencing it
  * @brvlan: if MASTER flag unset, this points to the global per-VLAN context
  *          for this VLAN entry
+ * @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context
+ * @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast
+ *                  context
  * @vlist: sorted list of VLAN entries
  * @rcu: used for entry destruction
  *
@@ -141,6 +203,11 @@ struct net_bridge_vlan {
 
        struct br_tunnel_info           tinfo;
 
+       union {
+               struct net_bridge_mcast         br_mcast_ctx;
+               struct net_bridge_mcast_port    port_mcast_ctx;
+       };
+
        struct list_head                vlist;
 
        struct rcu_head                 rcu;
@@ -305,19 +372,13 @@ struct net_bridge_port {
        struct kobject                  kobj;
        struct rcu_head                 rcu;
 
+       struct net_bridge_mcast_port    multicast_ctx;
+
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-       struct bridge_mcast_own_query   ip4_own_query;
-       struct timer_list               ip4_mc_router_timer;
-       struct hlist_node               ip4_rlist;
-#if IS_ENABLED(CONFIG_IPV6)
-       struct bridge_mcast_own_query   ip6_own_query;
-       struct timer_list               ip6_mc_router_timer;
-       struct hlist_node               ip6_rlist;
-#endif /* IS_ENABLED(CONFIG_IPV6) */
+       struct bridge_mcast_stats       __percpu *mcast_stats;
+
        u32                             multicast_eht_hosts_limit;
        u32                             multicast_eht_hosts_cnt;
-       unsigned char                   multicast_router;
-       struct bridge_mcast_stats       __percpu *mcast_stats;
        struct hlist_head               mglist;
 #endif
 
@@ -329,7 +390,12 @@ struct net_bridge_port {
        struct netpoll                  *np;
 #endif
 #ifdef CONFIG_NET_SWITCHDEV
-       int                             offload_fwd_mark;
+       /* Identifier used to group ports that share the same switchdev
+        * hardware domain.
+        */
+       int                             hwdom;
+       int                             offload_count;
+       struct netdev_phys_item_id      ppid;
 #endif
        u16                             group_fwd_mask;
        u16                             backup_redirected_cnt;
@@ -367,7 +433,6 @@ enum net_bridge_opts {
        BROPT_NF_CALL_ARPTABLES,
        BROPT_GROUP_ADDR_SET,
        BROPT_MULTICAST_ENABLED,
-       BROPT_MULTICAST_QUERIER,
        BROPT_MULTICAST_QUERY_USE_IFADDR,
        BROPT_MULTICAST_STATS_ENABLED,
        BROPT_HAS_IPV6_ADDR,
@@ -376,6 +441,7 @@ enum net_bridge_opts {
        BROPT_VLAN_STATS_PER_PORT,
        BROPT_NO_LL_LEARN,
        BROPT_VLAN_BRIDGE_BINDING,
+       BROPT_MCAST_VLAN_SNOOPING_ENABLED,
 };
 
 struct net_bridge {
@@ -426,25 +492,14 @@ struct net_bridge {
                BR_USER_STP,            /* new RSTP in userspace */
        } stp_enabled;
 
+       struct net_bridge_mcast         multicast_ctx;
+
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+       struct bridge_mcast_stats       __percpu *mcast_stats;
 
        u32                             hash_max;
 
-       u32                             multicast_last_member_count;
-       u32                             multicast_startup_query_count;
-
-       u8                              multicast_igmp_version;
-       u8                              multicast_router;
-#if IS_ENABLED(CONFIG_IPV6)
-       u8                              multicast_mld_version;
-#endif
        spinlock_t                      multicast_lock;
-       unsigned long                   multicast_last_member_interval;
-       unsigned long                   multicast_membership_interval;
-       unsigned long                   multicast_querier_interval;
-       unsigned long                   multicast_query_interval;
-       unsigned long                   multicast_query_response_interval;
-       unsigned long                   multicast_startup_query_interval;
 
        struct rhashtable               mdb_hash_tbl;
        struct rhashtable               sg_port_tbl;
@@ -452,19 +507,6 @@ struct net_bridge {
        struct hlist_head               mcast_gc_list;
        struct hlist_head               mdb_list;
 
-       struct hlist_head               ip4_mc_router_list;
-       struct timer_list               ip4_mc_router_timer;
-       struct bridge_mcast_other_query ip4_other_query;
-       struct bridge_mcast_own_query   ip4_own_query;
-       struct bridge_mcast_querier     ip4_querier;
-       struct bridge_mcast_stats       __percpu *mcast_stats;
-#if IS_ENABLED(CONFIG_IPV6)
-       struct hlist_head               ip6_mc_router_list;
-       struct timer_list               ip6_mc_router_timer;
-       struct bridge_mcast_other_query ip6_other_query;
-       struct bridge_mcast_own_query   ip6_own_query;
-       struct bridge_mcast_querier     ip6_querier;
-#endif /* IS_ENABLED(CONFIG_IPV6) */
        struct work_struct              mcast_gc_work;
 #endif
 
@@ -476,7 +518,12 @@ struct net_bridge {
        u32                             auto_cnt;
 
 #ifdef CONFIG_NET_SWITCHDEV
-       int offload_fwd_mark;
+       /* Counter used to make sure that hardware domains get unique
+        * identifiers in case a bridge spans multiple switchdev instances.
+        */
+       int                             last_hwdom;
+       /* Bit mask of hardware domain numbers in use */
+       unsigned long                   busy_hwdoms;
 #endif
        struct hlist_head               fdb_list;
 
@@ -506,7 +553,20 @@ struct br_input_skb_cb {
 #endif
 
 #ifdef CONFIG_NET_SWITCHDEV
-       int offload_fwd_mark;
+       /* Set if TX data plane offloading is used towards at least one
+        * hardware domain.
+        */
+       u8 tx_fwd_offload:1;
+       /* The switchdev hardware domain from which this packet was received.
+        * If skb->offload_fwd_mark was set, then this packet was already
+        * forwarded by hardware to the other ports in the source hardware
+        * domain, otherwise it wasn't.
+        */
+       int src_hwdom;
+       /* Bit mask of hardware domains towards this packet has already been
+        * transmitted using the TX data plane offload.
+        */
+       unsigned long fwd_hwdoms;
 #endif
 };
 
@@ -616,6 +676,20 @@ static inline bool br_vlan_valid_range(const struct bridge_vlan_info *cur,
        return true;
 }
 
+static inline u8 br_vlan_multicast_router(const struct net_bridge_vlan *v)
+{
+       u8 mcast_router = MDB_RTR_TYPE_DISABLED;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+       if (!br_vlan_is_master(v))
+               mcast_router = v->port_mcast_ctx.multicast_router;
+       else
+               mcast_router = v->br_mcast_ctx.multicast_router;
+#endif
+
+       return mcast_router;
+}
+
 static inline int br_afspec_cmd_to_rtm(int cmd)
 {
        switch (cmd) {
@@ -718,6 +792,8 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
                              bool swdev_notify);
 void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
                          const unsigned char *addr, u16 vid, bool offloaded);
+int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding,
+                 struct notifier_block *nb);
 
 /* br_forward.c */
 enum br_pkt_type {
@@ -790,15 +866,18 @@ br_port_get_check_rtnl(const struct net_device *dev)
 }
 
 /* br_ioctl.c */
-int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd,
-                            void __user *arg);
+int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+                         void __user *data, int cmd);
+int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd,
+                 struct ifreq *ifr, void __user *uarg);
 
 /* br_multicast.c */
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
+int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+                    struct net_bridge_mcast_port **pmctx,
+                    struct net_bridge_vlan *vlan,
                     struct sk_buff *skb, u16 vid);
-struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
                                        struct sk_buff *skb, u16 vid);
 int br_multicast_add_port(struct net_bridge_port *port);
 void br_multicast_del_port(struct net_bridge_port *port);
@@ -810,17 +889,22 @@ void br_multicast_leave_snoopers(struct net_bridge *br);
 void br_multicast_open(struct net_bridge *br);
 void br_multicast_stop(struct net_bridge *br);
 void br_multicast_dev_del(struct net_bridge *br);
-void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
-                       struct sk_buff *skb, bool local_rcv, bool local_orig);
-int br_multicast_set_router(struct net_bridge *br, unsigned long val);
-int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val);
+void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb,
+                       struct net_bridge_mcast *brmctx,
+                       bool local_rcv, bool local_orig);
+int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val);
+int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx,
+                                unsigned long val);
+int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router);
 int br_multicast_toggle(struct net_bridge *br, unsigned long val,
                        struct netlink_ext_ack *extack);
-int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
+int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val);
 int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
-int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val);
+int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx,
+                                 unsigned long val);
 #if IS_ENABLED(CONFIG_IPV6)
-int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val);
+int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx,
+                                unsigned long val);
 #endif
 struct net_bridge_mdb_entry *
 br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst);
@@ -835,12 +919,13 @@ int br_mdb_hash_init(struct net_bridge *br);
 void br_mdb_hash_fini(struct net_bridge *br);
 void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
                   struct net_bridge_port_group *pg, int type);
-void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
+void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx,
                   int type);
 void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
                         struct net_bridge_port_group *pg,
                         struct net_bridge_port_group __rcu **pp);
-void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+void br_multicast_count(struct net_bridge *br,
+                       const struct net_bridge_port *p,
                        const struct sk_buff *skb, u8 type, u8 dir);
 int br_multicast_init_stats(struct net_bridge *br);
 void br_multicast_uninit_stats(struct net_bridge *br);
@@ -849,7 +934,8 @@ void br_multicast_get_stats(const struct net_bridge *br,
                            struct br_mcast_stats *dest);
 void br_mdb_init(void);
 void br_mdb_uninit(void);
-void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify);
+void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
+                           struct net_bridge_mdb_entry *mp, bool notify);
 void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
 void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
                                     u8 filter_mode);
@@ -859,6 +945,29 @@ struct net_bridge_group_src *
 br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip);
 void br_multicast_del_group_src(struct net_bridge_group_src *src,
                                bool fastleave);
+void br_multicast_ctx_init(struct net_bridge *br,
+                          struct net_bridge_vlan *vlan,
+                          struct net_bridge_mcast *brmctx);
+void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx);
+void br_multicast_port_ctx_init(struct net_bridge_port *port,
+                               struct net_bridge_vlan *vlan,
+                               struct net_bridge_mcast_port *pmctx);
+void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx);
+void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on);
+int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
+                                     struct netlink_ext_ack *extack);
+bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on);
+
+int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
+                 const void *ctx, bool adding, struct notifier_block *nb,
+                 struct netlink_ext_ack *extack);
+int br_rports_fill_info(struct sk_buff *skb,
+                       const struct net_bridge_mcast *brmctx);
+int br_multicast_dump_querier_state(struct sk_buff *skb,
+                                   const struct net_bridge_mcast *brmctx,
+                                   int nest_attr);
+size_t br_multicast_querier_state_size(void);
+size_t br_rports_size(const struct net_bridge_mcast *brmctx);
 
 static inline bool br_group_is_l2(const struct br_ip *group)
 {
@@ -869,52 +978,65 @@ static inline bool br_group_is_l2(const struct br_ip *group)
        rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
 
 static inline struct hlist_node *
-br_multicast_get_first_rport_node(struct net_bridge *b, struct sk_buff *skb) {
+br_multicast_get_first_rport_node(struct net_bridge_mcast *brmctx,
+                                 struct sk_buff *skb)
+{
 #if IS_ENABLED(CONFIG_IPV6)
        if (skb->protocol == htons(ETH_P_IPV6))
-               return rcu_dereference(hlist_first_rcu(&b->ip6_mc_router_list));
+               return rcu_dereference(hlist_first_rcu(&brmctx->ip6_mc_router_list));
 #endif
-       return rcu_dereference(hlist_first_rcu(&b->ip4_mc_router_list));
+       return rcu_dereference(hlist_first_rcu(&brmctx->ip4_mc_router_list));
 }
 
 static inline struct net_bridge_port *
-br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) {
+br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb)
+{
+       struct net_bridge_mcast_port *mctx;
+
 #if IS_ENABLED(CONFIG_IPV6)
        if (skb->protocol == htons(ETH_P_IPV6))
-               return hlist_entry_safe(rp, struct net_bridge_port, ip6_rlist);
+               mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port,
+                                       ip6_rlist);
+       else
 #endif
-       return hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist);
+               mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port,
+                                       ip4_rlist);
+
+       if (mctx)
+               return mctx->port;
+       else
+               return NULL;
 }
 
-static inline bool br_ip4_multicast_is_router(struct net_bridge *br)
+static inline bool br_ip4_multicast_is_router(struct net_bridge_mcast *brmctx)
 {
-       return timer_pending(&br->ip4_mc_router_timer);
+       return timer_pending(&brmctx->ip4_mc_router_timer);
 }
 
-static inline bool br_ip6_multicast_is_router(struct net_bridge *br)
+static inline bool br_ip6_multicast_is_router(struct net_bridge_mcast *brmctx)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-       return timer_pending(&br->ip6_mc_router_timer);
+       return timer_pending(&brmctx->ip6_mc_router_timer);
 #else
        return false;
 #endif
 }
 
 static inline bool
-br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb)
+br_multicast_is_router(struct net_bridge_mcast *brmctx, struct sk_buff *skb)
 {
-       switch (br->multicast_router) {
+       switch (brmctx->multicast_router) {
        case MDB_RTR_TYPE_PERM:
                return true;
        case MDB_RTR_TYPE_TEMP_QUERY:
                if (skb) {
                        if (skb->protocol == htons(ETH_P_IP))
-                               return br_ip4_multicast_is_router(br);
+                               return br_ip4_multicast_is_router(brmctx);
                        else if (skb->protocol == htons(ETH_P_IPV6))
-                               return br_ip6_multicast_is_router(br);
+                               return br_ip6_multicast_is_router(brmctx);
                } else {
-                       return br_ip4_multicast_is_router(br) ||
-                              br_ip6_multicast_is_router(br);
+                       return br_ip4_multicast_is_router(brmctx) ||
+                              br_ip6_multicast_is_router(brmctx);
                }
                fallthrough;
        default:
@@ -923,14 +1045,14 @@ br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb)
 }
 
 static inline bool
-__br_multicast_querier_exists(struct net_bridge *br,
-                               struct bridge_mcast_other_query *querier,
-                               const bool is_ipv6)
+__br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
+                             struct bridge_mcast_other_query *querier,
+                             const bool is_ipv6)
 {
        bool own_querier_enabled;
 
-       if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
-               if (is_ipv6 && !br_opt_get(br, BROPT_HAS_IPV6_ADDR))
+       if (brmctx->multicast_querier) {
+               if (is_ipv6 && !br_opt_get(brmctx->br, BROPT_HAS_IPV6_ADDR))
                        own_querier_enabled = false;
                else
                        own_querier_enabled = true;
@@ -942,18 +1064,18 @@ __br_multicast_querier_exists(struct net_bridge *br,
               (own_querier_enabled || timer_pending(&querier->timer));
 }
 
-static inline bool br_multicast_querier_exists(struct net_bridge *br,
+static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
                                               struct ethhdr *eth,
                                               const struct net_bridge_mdb_entry *mdb)
 {
        switch (eth->h_proto) {
        case (htons(ETH_P_IP)):
-               return __br_multicast_querier_exists(br,
-                       &br->ip4_other_query, false);
+               return __br_multicast_querier_exists(brmctx,
+                       &brmctx->ip4_other_query, false);
 #if IS_ENABLED(CONFIG_IPV6)
        case (htons(ETH_P_IPV6)):
-               return __br_multicast_querier_exists(br,
-                       &br->ip6_other_query, true);
+               return __br_multicast_querier_exists(brmctx,
+                       &brmctx->ip6_other_query, true);
 #endif
        default:
                return !!mdb && br_group_is_l2(&mdb->addr);
@@ -974,15 +1096,16 @@ static inline bool br_multicast_is_star_g(const struct br_ip *ip)
        }
 }
 
-static inline bool br_multicast_should_handle_mode(const struct net_bridge *br,
-                                                  __be16 proto)
+static inline bool
+br_multicast_should_handle_mode(const struct net_bridge_mcast *brmctx,
+                               __be16 proto)
 {
        switch (proto) {
        case htons(ETH_P_IP):
-               return !!(br->multicast_igmp_version == 3);
+               return !!(brmctx->multicast_igmp_version == 3);
 #if IS_ENABLED(CONFIG_IPV6)
        case htons(ETH_P_IPV6):
-               return !!(br->multicast_mld_version == 2);
+               return !!(brmctx->multicast_mld_version == 2);
 #endif
        default:
                return false;
@@ -994,28 +1117,145 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb)
        return BR_INPUT_SKB_CB(skb)->igmp;
 }
 
-static inline unsigned long br_multicast_lmqt(const struct net_bridge *br)
+static inline unsigned long br_multicast_lmqt(const struct net_bridge_mcast *brmctx)
 {
-       return br->multicast_last_member_interval *
-              br->multicast_last_member_count;
+       return brmctx->multicast_last_member_interval *
+              brmctx->multicast_last_member_count;
 }
 
-static inline unsigned long br_multicast_gmi(const struct net_bridge *br)
+static inline unsigned long br_multicast_gmi(const struct net_bridge_mcast *brmctx)
 {
        /* use the RFC default of 2 for QRV */
-       return 2 * br->multicast_query_interval +
-              br->multicast_query_response_interval;
+       return 2 * brmctx->multicast_query_interval +
+              brmctx->multicast_query_response_interval;
+}
+
+static inline bool
+br_multicast_ctx_is_vlan(const struct net_bridge_mcast *brmctx)
+{
+       return !!brmctx->vlan;
+}
+
+static inline bool
+br_multicast_port_ctx_is_vlan(const struct net_bridge_mcast_port *pmctx)
+{
+       return !!pmctx->vlan;
+}
+
+static inline struct net_bridge_mcast *
+br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx)
+{
+       if (!br_multicast_port_ctx_is_vlan(pmctx))
+               return &pmctx->port->br->multicast_ctx;
+       else
+               return &pmctx->vlan->brvlan->br_mcast_ctx;
+}
+
+static inline bool
+br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx)
+{
+       return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) &&
+              br_multicast_ctx_is_vlan(brmctx) &&
+              !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_ctx_vlan_disabled(const struct net_bridge_mcast *brmctx)
+{
+       return br_multicast_ctx_is_vlan(brmctx) &&
+              !(brmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_port_ctx_vlan_disabled(const struct net_bridge_mcast_port *pmctx)
+{
+       return br_multicast_port_ctx_is_vlan(pmctx) &&
+              !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_port_ctx_state_disabled(const struct net_bridge_mcast_port *pmctx)
+{
+       return pmctx->port->state == BR_STATE_DISABLED ||
+              (br_multicast_port_ctx_is_vlan(pmctx) &&
+               (br_multicast_port_ctx_vlan_disabled(pmctx) ||
+                pmctx->vlan->state == BR_STATE_DISABLED));
+}
+
+static inline bool
+br_multicast_port_ctx_state_stopped(const struct net_bridge_mcast_port *pmctx)
+{
+       return br_multicast_port_ctx_state_disabled(pmctx) ||
+              pmctx->port->state == BR_STATE_BLOCKING ||
+              (br_multicast_port_ctx_is_vlan(pmctx) &&
+               pmctx->vlan->state == BR_STATE_BLOCKING);
+}
+
+static inline bool
+br_rports_have_mc_router(const struct net_bridge_mcast *brmctx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       return !hlist_empty(&brmctx->ip4_mc_router_list) ||
+              !hlist_empty(&brmctx->ip6_mc_router_list);
+#else
+       return !hlist_empty(&brmctx->ip4_mc_router_list);
+#endif
+}
+
+static inline bool
+br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1,
+                              const struct net_bridge_mcast *brmctx2)
+{
+       return brmctx1->multicast_igmp_version ==
+              brmctx2->multicast_igmp_version &&
+              brmctx1->multicast_last_member_count ==
+              brmctx2->multicast_last_member_count &&
+              brmctx1->multicast_startup_query_count ==
+              brmctx2->multicast_startup_query_count &&
+              brmctx1->multicast_last_member_interval ==
+              brmctx2->multicast_last_member_interval &&
+              brmctx1->multicast_membership_interval ==
+              brmctx2->multicast_membership_interval &&
+              brmctx1->multicast_querier_interval ==
+              brmctx2->multicast_querier_interval &&
+              brmctx1->multicast_query_interval ==
+              brmctx2->multicast_query_interval &&
+              brmctx1->multicast_query_response_interval ==
+              brmctx2->multicast_query_response_interval &&
+              brmctx1->multicast_startup_query_interval ==
+              brmctx2->multicast_startup_query_interval &&
+              brmctx1->multicast_querier == brmctx2->multicast_querier &&
+              brmctx1->multicast_router == brmctx2->multicast_router &&
+              !br_rports_have_mc_router(brmctx1) &&
+              !br_rports_have_mc_router(brmctx2) &&
+#if IS_ENABLED(CONFIG_IPV6)
+              brmctx1->multicast_mld_version ==
+              brmctx2->multicast_mld_version &&
+#endif
+              true;
+}
+
+static inline bool
+br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx)
+{
+       bool vlan_snooping_enabled;
+
+       vlan_snooping_enabled = !!br_opt_get(brmctx->br,
+                                            BROPT_MCAST_VLAN_SNOOPING_ENABLED);
+
+       return !!(vlan_snooping_enabled == br_multicast_ctx_is_vlan(brmctx));
 }
 #else
-static inline int br_multicast_rcv(struct net_bridge *br,
-                                  struct net_bridge_port *port,
+static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+                                  struct net_bridge_mcast_port **pmctx,
+                                  struct net_bridge_vlan *vlan,
                                   struct sk_buff *skb,
                                   u16 vid)
 {
        return 0;
 }
 
-static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
                                                      struct sk_buff *skb, u16 vid)
 {
        return NULL;
@@ -1064,17 +1304,18 @@ static inline void br_multicast_dev_del(struct net_bridge *br)
 
 static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
                                      struct sk_buff *skb,
+                                     struct net_bridge_mcast *brmctx,
                                      bool local_rcv, bool local_orig)
 {
 }
 
-static inline bool br_multicast_is_router(struct net_bridge *br,
+static inline bool br_multicast_is_router(struct net_bridge_mcast *brmctx,
                                          struct sk_buff *skb)
 {
        return false;
 }
 
-static inline bool br_multicast_querier_exists(struct net_bridge *br,
+static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
                                               struct ethhdr *eth,
                                               const struct net_bridge_mdb_entry *mdb)
 {
@@ -1118,13 +1359,67 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb)
 {
        return 0;
 }
+
+static inline void br_multicast_ctx_init(struct net_bridge *br,
+                                        struct net_bridge_vlan *vlan,
+                                        struct net_bridge_mcast *brmctx)
+{
+}
+
+static inline void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx)
+{
+}
+
+static inline void br_multicast_port_ctx_init(struct net_bridge_port *port,
+                                             struct net_bridge_vlan *vlan,
+                                             struct net_bridge_mcast_port *pmctx)
+{
+}
+
+static inline void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
+{
+}
+
+static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan,
+                                               bool on)
+{
+}
+
+static inline int br_multicast_toggle_vlan_snooping(struct net_bridge *br,
+                                                   bool on,
+                                                   struct netlink_ext_ack *extack)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan,
+                                                  bool on)
+{
+       return false;
+}
+
+static inline int br_mdb_replay(struct net_device *br_dev,
+                               struct net_device *dev, const void *ctx,
+                               bool adding, struct notifier_block *nb,
+                               struct netlink_ext_ack *extack)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline bool
+br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1,
+                              const struct net_bridge_mcast *brmctx2)
+{
+       return true;
+}
 #endif
 
 /* br_vlan.c */
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 bool br_allowed_ingress(const struct net_bridge *br,
                        struct net_bridge_vlan_group *vg, struct sk_buff *skb,
-                       u16 *vid, u8 *state);
+                       u16 *vid, u8 *state,
+                       struct net_bridge_vlan **vlan);
 bool br_allowed_egress(struct net_bridge_vlan_group *vg,
                       const struct sk_buff *skb);
 bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid);
@@ -1168,6 +1463,9 @@ void br_vlan_notify(const struct net_bridge *br,
                    const struct net_bridge_port *p,
                    u16 vid, u16 vid_range,
                    int cmd);
+int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
+                  const void *ctx, bool adding, struct notifier_block *nb,
+                  struct netlink_ext_ack *extack);
 bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
                             const struct net_bridge_vlan *range_end);
 
@@ -1236,8 +1534,11 @@ static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid)
 static inline bool br_allowed_ingress(const struct net_bridge *br,
                                      struct net_bridge_vlan_group *vg,
                                      struct sk_buff *skb,
-                                     u16 *vid, u8 *state)
+                                     u16 *vid, u8 *state,
+                                     struct net_bridge_vlan **vlan)
+
 {
+       *vlan = NULL;
        return true;
 }
 
@@ -1410,6 +1711,14 @@ static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
 {
        return true;
 }
+
+static inline int br_vlan_replay(struct net_device *br_dev,
+                                struct net_device *dev, const void *ctx,
+                                bool adding, struct notifier_block *nb,
+                                struct netlink_ext_ack *extack)
+{
+       return -EOPNOTSUPP;
+}
 #endif
 
 /* br_vlan_options.c */
@@ -1424,6 +1733,14 @@ int br_vlan_process_options(const struct net_bridge *br,
                            struct net_bridge_vlan *range_end,
                            struct nlattr **tb,
                            struct netlink_ext_ack *extack);
+int br_vlan_rtm_process_global_options(struct net_device *dev,
+                                      const struct nlattr *attr,
+                                      int cmd,
+                                      struct netlink_ext_ack *extack);
+bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr,
+                                        const struct net_bridge_vlan *r_end);
+bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range,
+                             const struct net_bridge_vlan *v_opts);
 
 /* vlan state manipulation helpers using *_ONCE to annotate lock-free access */
 static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v)
@@ -1645,7 +1962,25 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; }
 
 /* br_switchdev.c */
 #ifdef CONFIG_NET_SWITCHDEV
-int nbp_switchdev_mark_set(struct net_bridge_port *p);
+int br_switchdev_port_offload(struct net_bridge_port *p,
+                             struct net_device *dev, const void *ctx,
+                             struct notifier_block *atomic_nb,
+                             struct notifier_block *blocking_nb,
+                             bool tx_fwd_offload,
+                             struct netlink_ext_ack *extack);
+
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+                                struct notifier_block *atomic_nb,
+                                struct notifier_block *blocking_nb);
+
+bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
+
+void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb);
+
+void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+                                            struct sk_buff *skb);
+void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+                                             struct sk_buff *skb);
 void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
                              struct sk_buff *skb);
 bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
@@ -1659,15 +1994,50 @@ void br_switchdev_fdb_notify(struct net_bridge *br,
 int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
                               struct netlink_ext_ack *extack);
 int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
+void br_switchdev_init(struct net_bridge *br);
 
 static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
 {
        skb->offload_fwd_mark = 0;
 }
 #else
-static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
+static inline int
+br_switchdev_port_offload(struct net_bridge_port *p,
+                         struct net_device *dev, const void *ctx,
+                         struct notifier_block *atomic_nb,
+                         struct notifier_block *blocking_nb,
+                         bool tx_fwd_offload,
+                         struct netlink_ext_ack *extack)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void
+br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+                           struct notifier_block *atomic_nb,
+                           struct notifier_block *blocking_nb)
+{
+}
+
+static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
+{
+       return false;
+}
+
+static inline void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb)
+{
+}
+
+static inline void
+nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+                                       struct sk_buff *skb)
+{
+}
+
+static inline void
+nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+                                        struct sk_buff *skb)
 {
-       return 0;
 }
 
 static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
@@ -1710,6 +2080,11 @@ br_switchdev_fdb_notify(struct net_bridge *br,
 static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
 {
 }
+
+static inline void br_switchdev_init(struct net_bridge *br)
+{
+}
+
 #endif /* CONFIG_NET_SWITCHDEV */
 
 /* br_arp_nd_proxy.c */
index f89049f..adf82a0 100644 (file)
@@ -51,7 +51,8 @@ struct net_bridge_group_eht_set {
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg);
-bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx,
+                            struct net_bridge_port_group *pg,
                             void *h_addr,
                             void *srcs,
                             u32 nsrcs,
index c54cc26..2b05328 100644 (file)
@@ -38,9 +38,9 @@ int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid,
 void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port);
 void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
                          struct net_bridge_vlan *vlan);
-int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
-                                 struct net_bridge_port *p,
-                                 struct net_bridge_vlan_group *vg);
+void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
+                                  struct net_bridge_port *p,
+                                  struct net_bridge_vlan_group *vg);
 int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
                                 struct net_bridge_vlan *vlan);
 bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr,
index d3adee0..6bf518d 100644 (file)
@@ -8,50 +8,65 @@
 
 #include "br_private.h"
 
-static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
-{
-       struct net_bridge_port *p;
+static struct static_key_false br_switchdev_tx_fwd_offload;
 
-       /* dev is yet to be added to the port list. */
-       list_for_each_entry(p, &br->port_list, list) {
-               if (netdev_port_same_parent_id(dev, p->dev))
-                       return p->offload_fwd_mark;
-       }
+static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
+                                            const struct sk_buff *skb)
+{
+       if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+               return false;
 
-       return ++br->offload_fwd_mark;
+       return (p->flags & BR_TX_FWD_OFFLOAD) &&
+              (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
 }
 
-int nbp_switchdev_mark_set(struct net_bridge_port *p)
+bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
 {
-       struct netdev_phys_item_id ppid = { };
-       int err;
+       if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+               return false;
 
-       ASSERT_RTNL();
+       return BR_INPUT_SKB_CB(skb)->tx_fwd_offload;
+}
 
-       err = dev_get_port_parent_id(p->dev, &ppid, true);
-       if (err) {
-               if (err == -EOPNOTSUPP)
-                       return 0;
-               return err;
-       }
+void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb)
+{
+       skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb);
+}
 
-       p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev);
+/* Mark the frame for TX forwarding offload if this egress port supports it */
+void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+                                            struct sk_buff *skb)
+{
+       if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+               BR_INPUT_SKB_CB(skb)->tx_fwd_offload = true;
+}
 
-       return 0;
+/* Lazily adds the hwdom of the egress bridge port to the bit mask of hwdoms
+ * that the skb has been already forwarded to, to avoid further cloning to
+ * other ports in the same hwdom by making nbp_switchdev_allowed_egress()
+ * return false.
+ */
+void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+                                             struct sk_buff *skb)
+{
+       if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+               set_bit(p->hwdom, &BR_INPUT_SKB_CB(skb)->fwd_hwdoms);
 }
 
 void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
                              struct sk_buff *skb)
 {
-       if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark))
-               BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark;
+       if (p->hwdom)
+               BR_INPUT_SKB_CB(skb)->src_hwdom = p->hwdom;
 }
 
 bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
                                  const struct sk_buff *skb)
 {
-       return !skb->offload_fwd_mark ||
-              BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark;
+       struct br_input_skb_cb *cb = BR_INPUT_SKB_CB(skb);
+
+       return !test_bit(p->hwdom, &cb->fwd_hwdoms) &&
+               (!skb->offload_fwd_mark || cb->src_hwdom != p->hwdom);
 }
 
 /* Flags that can be offloaded to hardware */
@@ -112,7 +127,6 @@ br_switchdev_fdb_notify(struct net_bridge *br,
                        const struct net_bridge_fdb_entry *fdb, int type)
 {
        const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
-       struct net_device *dev = dst ? dst->dev : br->dev;
        struct switchdev_notifier_fdb_info info = {
                .addr = fdb->key.addr.addr,
                .vid = fdb->key.vlan_id,
@@ -120,6 +134,7 @@ br_switchdev_fdb_notify(struct net_bridge *br,
                .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags),
                .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
        };
+       struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev;
 
        switch (type) {
        case RTM_DELNEIGH:
@@ -156,3 +171,182 @@ int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
 
        return switchdev_port_obj_del(dev, &v.obj);
 }
+
+static int nbp_switchdev_hwdom_set(struct net_bridge_port *joining)
+{
+       struct net_bridge *br = joining->br;
+       struct net_bridge_port *p;
+       int hwdom;
+
+       /* joining is yet to be added to the port list. */
+       list_for_each_entry(p, &br->port_list, list) {
+               if (netdev_phys_item_id_same(&joining->ppid, &p->ppid)) {
+                       joining->hwdom = p->hwdom;
+                       return 0;
+               }
+       }
+
+       hwdom = find_next_zero_bit(&br->busy_hwdoms, BR_HWDOM_MAX, 1);
+       if (hwdom >= BR_HWDOM_MAX)
+               return -EBUSY;
+
+       set_bit(hwdom, &br->busy_hwdoms);
+       joining->hwdom = hwdom;
+       return 0;
+}
+
+static void nbp_switchdev_hwdom_put(struct net_bridge_port *leaving)
+{
+       struct net_bridge *br = leaving->br;
+       struct net_bridge_port *p;
+
+       /* leaving is no longer in the port list. */
+       list_for_each_entry(p, &br->port_list, list) {
+               if (p->hwdom == leaving->hwdom)
+                       return;
+       }
+
+       clear_bit(leaving->hwdom, &br->busy_hwdoms);
+}
+
+static int nbp_switchdev_add(struct net_bridge_port *p,
+                            struct netdev_phys_item_id ppid,
+                            bool tx_fwd_offload,
+                            struct netlink_ext_ack *extack)
+{
+       int err;
+
+       if (p->offload_count) {
+               /* Prevent unsupported configurations such as a bridge port
+                * which is a bonding interface, and the member ports are from
+                * different hardware switches.
+                */
+               if (!netdev_phys_item_id_same(&p->ppid, &ppid)) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Same bridge port cannot be offloaded by two physical switches");
+                       return -EBUSY;
+               }
+
+               /* Tolerate drivers that call switchdev_bridge_port_offload()
+                * more than once for the same bridge port, such as when the
+                * bridge port is an offloaded bonding/team interface.
+                */
+               p->offload_count++;
+
+               return 0;
+       }
+
+       p->ppid = ppid;
+       p->offload_count = 1;
+
+       err = nbp_switchdev_hwdom_set(p);
+       if (err)
+               return err;
+
+       if (tx_fwd_offload) {
+               p->flags |= BR_TX_FWD_OFFLOAD;
+               static_branch_inc(&br_switchdev_tx_fwd_offload);
+       }
+
+       return 0;
+}
+
+static void nbp_switchdev_del(struct net_bridge_port *p)
+{
+       if (WARN_ON(!p->offload_count))
+               return;
+
+       p->offload_count--;
+
+       if (p->offload_count)
+               return;
+
+       if (p->hwdom)
+               nbp_switchdev_hwdom_put(p);
+
+       if (p->flags & BR_TX_FWD_OFFLOAD) {
+               p->flags &= ~BR_TX_FWD_OFFLOAD;
+               static_branch_dec(&br_switchdev_tx_fwd_offload);
+       }
+}
+
+static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
+                                  struct notifier_block *atomic_nb,
+                                  struct notifier_block *blocking_nb,
+                                  struct netlink_ext_ack *extack)
+{
+       struct net_device *br_dev = p->br->dev;
+       struct net_device *dev = p->dev;
+       int err;
+
+       err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+       if (err && err != -EOPNOTSUPP)
+               return err;
+
+       err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+       if (err && err != -EOPNOTSUPP)
+               return err;
+
+       err = br_fdb_replay(br_dev, ctx, true, atomic_nb);
+       if (err && err != -EOPNOTSUPP)
+               return err;
+
+       return 0;
+}
+
+static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
+                                     const void *ctx,
+                                     struct notifier_block *atomic_nb,
+                                     struct notifier_block *blocking_nb)
+{
+       struct net_device *br_dev = p->br->dev;
+       struct net_device *dev = p->dev;
+
+       br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+
+       br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+
+       br_fdb_replay(br_dev, ctx, false, atomic_nb);
+}
+
+/* Let the bridge know that this port is offloaded, so that it can assign a
+ * switchdev hardware domain to it.
+ */
+int br_switchdev_port_offload(struct net_bridge_port *p,
+                             struct net_device *dev, const void *ctx,
+                             struct notifier_block *atomic_nb,
+                             struct notifier_block *blocking_nb,
+                             bool tx_fwd_offload,
+                             struct netlink_ext_ack *extack)
+{
+       struct netdev_phys_item_id ppid;
+       int err;
+
+       err = dev_get_port_parent_id(dev, &ppid, false);
+       if (err)
+               return err;
+
+       err = nbp_switchdev_add(p, ppid, tx_fwd_offload, extack);
+       if (err)
+               return err;
+
+       err = nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
+       if (err)
+               goto out_switchdev_del;
+
+       return 0;
+
+out_switchdev_del:
+       nbp_switchdev_del(p);
+
+       return err;
+}
+
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+                                struct notifier_block *atomic_nb,
+                                struct notifier_block *blocking_nb)
+{
+       nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb);
+
+       nbp_switchdev_del(p);
+}
index 381467b..d9a89dd 100644 (file)
@@ -384,13 +384,13 @@ static ssize_t multicast_router_show(struct device *d,
                                     struct device_attribute *attr, char *buf)
 {
        struct net_bridge *br = to_bridge(d);
-       return sprintf(buf, "%d\n", br->multicast_router);
+       return sprintf(buf, "%d\n", br->multicast_ctx.multicast_router);
 }
 
 static int set_multicast_router(struct net_bridge *br, unsigned long val,
                                struct netlink_ext_ack *extack)
 {
-       return br_multicast_set_router(br, val);
+       return br_multicast_set_router(&br->multicast_ctx, val);
 }
 
 static ssize_t multicast_router_store(struct device *d,
@@ -447,13 +447,13 @@ static ssize_t multicast_querier_show(struct device *d,
                                      char *buf)
 {
        struct net_bridge *br = to_bridge(d);
-       return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_QUERIER));
+       return sprintf(buf, "%d\n", br->multicast_ctx.multicast_querier);
 }
 
 static int set_multicast_querier(struct net_bridge *br, unsigned long val,
                                 struct netlink_ext_ack *extack)
 {
-       return br_multicast_set_querier(br, val);
+       return br_multicast_set_querier(&br->multicast_ctx, val);
 }
 
 static ssize_t multicast_querier_store(struct device *d,
@@ -514,13 +514,13 @@ static ssize_t multicast_igmp_version_show(struct device *d,
 {
        struct net_bridge *br = to_bridge(d);
 
-       return sprintf(buf, "%u\n", br->multicast_igmp_version);
+       return sprintf(buf, "%u\n", br->multicast_ctx.multicast_igmp_version);
 }
 
 static int set_multicast_igmp_version(struct net_bridge *br, unsigned long val,
                                      struct netlink_ext_ack *extack)
 {
-       return br_multicast_set_igmp_version(br, val);
+       return br_multicast_set_igmp_version(&br->multicast_ctx, val);
 }
 
 static ssize_t multicast_igmp_version_store(struct device *d,
@@ -536,13 +536,13 @@ static ssize_t multicast_last_member_count_show(struct device *d,
                                                char *buf)
 {
        struct net_bridge *br = to_bridge(d);
-       return sprintf(buf, "%u\n", br->multicast_last_member_count);
+       return sprintf(buf, "%u\n", br->multicast_ctx.multicast_last_member_count);
 }
 
 static int set_last_member_count(struct net_bridge *br, unsigned long val,
                                 struct netlink_ext_ack *extack)
 {
-       br->multicast_last_member_count = val;
+       br->multicast_ctx.multicast_last_member_count = val;
        return 0;
 }
 
@@ -558,13 +558,13 @@ static ssize_t multicast_startup_query_count_show(
        struct device *d, struct device_attribute *attr, char *buf)
 {
        struct net_bridge *br = to_bridge(d);
-       return sprintf(buf, "%u\n", br->multicast_startup_query_count);
+       return sprintf(buf, "%u\n", br->multicast_ctx.multicast_startup_query_count);
 }
 
 static int set_startup_query_count(struct net_bridge *br, unsigned long val,
                                   struct netlink_ext_ack *extack)
 {
-       br->multicast_startup_query_count = val;
+       br->multicast_ctx.multicast_startup_query_count = val;
        return 0;
 }
 
@@ -581,13 +581,13 @@ static ssize_t multicast_last_member_interval_show(
 {
        struct net_bridge *br = to_bridge(d);
        return sprintf(buf, "%lu\n",
-                      jiffies_to_clock_t(br->multicast_last_member_interval));
+                      jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval));
 }
 
 static int set_last_member_interval(struct net_bridge *br, unsigned long val,
                                    struct netlink_ext_ack *extack)
 {
-       br->multicast_last_member_interval = clock_t_to_jiffies(val);
+       br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val);
        return 0;
 }
 
@@ -604,13 +604,13 @@ static ssize_t multicast_membership_interval_show(
 {
        struct net_bridge *br = to_bridge(d);
        return sprintf(buf, "%lu\n",
-                      jiffies_to_clock_t(br->multicast_membership_interval));
+                      jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval));
 }
 
 static int set_membership_interval(struct net_bridge *br, unsigned long val,
                                   struct netlink_ext_ack *extack)
 {
-       br->multicast_membership_interval = clock_t_to_jiffies(val);
+       br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val);
        return 0;
 }
 
@@ -628,13 +628,13 @@ static ssize_t multicast_querier_interval_show(struct device *d,
 {
        struct net_bridge *br = to_bridge(d);
        return sprintf(buf, "%lu\n",
-                      jiffies_to_clock_t(br->multicast_querier_interval));
+                      jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval));
 }
 
 static int set_querier_interval(struct net_bridge *br, unsigned long val,
                                struct netlink_ext_ack *extack)
 {
-       br->multicast_querier_interval = clock_t_to_jiffies(val);
+       br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val);
        return 0;
 }
 
@@ -652,13 +652,13 @@ static ssize_t multicast_query_interval_show(struct device *d,
 {
        struct net_bridge *br = to_bridge(d);
        return sprintf(buf, "%lu\n",
-                      jiffies_to_clock_t(br->multicast_query_interval));
+                      jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval));
 }
 
 static int set_query_interval(struct net_bridge *br, unsigned long val,
                              struct netlink_ext_ack *extack)
 {
-       br->multicast_query_interval = clock_t_to_jiffies(val);
+       br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
        return 0;
 }
 
@@ -676,13 +676,13 @@ static ssize_t multicast_query_response_interval_show(
        struct net_bridge *br = to_bridge(d);
        return sprintf(
                buf, "%lu\n",
-               jiffies_to_clock_t(br->multicast_query_response_interval));
+               jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval));
 }
 
 static int set_query_response_interval(struct net_bridge *br, unsigned long val,
                                       struct netlink_ext_ack *extack)
 {
-       br->multicast_query_response_interval = clock_t_to_jiffies(val);
+       br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val);
        return 0;
 }
 
@@ -700,13 +700,13 @@ static ssize_t multicast_startup_query_interval_show(
        struct net_bridge *br = to_bridge(d);
        return sprintf(
                buf, "%lu\n",
-               jiffies_to_clock_t(br->multicast_startup_query_interval));
+               jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval));
 }
 
 static int set_startup_query_interval(struct net_bridge *br, unsigned long val,
                                      struct netlink_ext_ack *extack)
 {
-       br->multicast_startup_query_interval = clock_t_to_jiffies(val);
+       br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
        return 0;
 }
 
@@ -751,13 +751,13 @@ static ssize_t multicast_mld_version_show(struct device *d,
 {
        struct net_bridge *br = to_bridge(d);
 
-       return sprintf(buf, "%u\n", br->multicast_mld_version);
+       return sprintf(buf, "%u\n", br->multicast_ctx.multicast_mld_version);
 }
 
 static int set_multicast_mld_version(struct net_bridge *br, unsigned long val,
                                     struct netlink_ext_ack *extack)
 {
-       return br_multicast_set_mld_version(br, val);
+       return br_multicast_set_mld_version(&br->multicast_ctx, val);
 }
 
 static ssize_t multicast_mld_version_store(struct device *d,
index 72e9237..07fa760 100644 (file)
@@ -244,13 +244,13 @@ BRPORT_ATTR_FLAG(isolated, BR_ISOLATED);
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
 {
-       return sprintf(buf, "%d\n", p->multicast_router);
+       return sprintf(buf, "%d\n", p->multicast_ctx.multicast_router);
 }
 
 static int store_multicast_router(struct net_bridge_port *p,
                                      unsigned long v)
 {
-       return br_multicast_set_port_router(p, v);
+       return br_multicast_set_port_router(&p->multicast_ctx, v);
 }
 static BRPORT_ATTR(multicast_router, 0644, show_multicast_router,
                   store_multicast_router);
index a08e9f1..19f65ab 100644 (file)
@@ -190,6 +190,8 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv)
                rhashtable_remove_fast(&vg->vlan_hash,
                                       &masterv->vnode, br_vlan_rht_params);
                __vlan_del_list(masterv);
+               br_multicast_toggle_one_vlan(masterv, false);
+               br_multicast_ctx_deinit(&masterv->br_mcast_ctx);
                call_rcu(&masterv->rcu, br_master_vlan_rcu_free);
        }
 }
@@ -280,10 +282,13 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
                } else {
                        v->stats = masterv->stats;
                }
+               br_multicast_port_ctx_init(p, v, &v->port_mcast_ctx);
        } else {
                err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
                if (err && err != -EOPNOTSUPP)
                        goto out;
+               br_multicast_ctx_init(br, v, &v->br_mcast_ctx);
+               v->priv_flags |= BR_VLFLAG_GLOBAL_MCAST_ENABLED;
        }
 
        /* Add the dev mac and count the vlan only if it's usable */
@@ -306,6 +311,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
 
        __vlan_add_list(v);
        __vlan_add_flags(v, flags);
+       br_multicast_toggle_one_vlan(v, true);
 
        if (p)
                nbp_vlan_set_vlan_dev_state(p, v->vid);
@@ -374,6 +380,8 @@ static int __vlan_del(struct net_bridge_vlan *v)
                                       br_vlan_rht_params);
                __vlan_del_list(v);
                nbp_vlan_set_vlan_dev_state(p, v->vid);
+               br_multicast_toggle_one_vlan(v, false);
+               br_multicast_port_ctx_deinit(&v->port_mcast_ctx);
                call_rcu(&v->rcu, nbp_vlan_rcu_free);
        }
 
@@ -457,7 +465,15 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
                u64_stats_update_end(&stats->syncp);
        }
 
-       if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
+       /* If the skb will be sent using forwarding offload, the assumption is
+        * that the switchdev will inject the packet into hardware together
+        * with the bridge VLAN, so that it can be forwarded according to that
+        * VLAN. The switchdev should deal with popping the VLAN header in
+        * hardware on each egress port as appropriate. So only strip the VLAN
+        * header if forwarding offload is not being used.
+        */
+       if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED &&
+           !br_switchdev_frame_uses_tx_fwd_offload(skb))
                __vlan_hwaccel_clear_tag(skb);
 
        if (p && (p->flags & BR_VLAN_TUNNEL) &&
@@ -473,7 +489,8 @@ out:
 static bool __allowed_ingress(const struct net_bridge *br,
                              struct net_bridge_vlan_group *vg,
                              struct sk_buff *skb, u16 *vid,
-                             u8 *state)
+                             u8 *state,
+                             struct net_bridge_vlan **vlan)
 {
        struct pcpu_sw_netstats *stats;
        struct net_bridge_vlan *v;
@@ -538,8 +555,9 @@ static bool __allowed_ingress(const struct net_bridge *br,
                         */
                        skb->vlan_tci |= pvid;
 
-               /* if stats are disabled we can avoid the lookup */
-               if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
+               /* if snooping and stats are disabled we can avoid the lookup */
+               if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) &&
+                   !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
                        if (*state == BR_STATE_FORWARDING) {
                                *state = br_vlan_get_pvid_state(vg);
                                return br_vlan_state_allowed(*state, true);
@@ -566,6 +584,8 @@ static bool __allowed_ingress(const struct net_bridge *br,
                u64_stats_update_end(&stats->syncp);
        }
 
+       *vlan = v;
+
        return true;
 
 drop:
@@ -575,17 +595,19 @@ drop:
 
 bool br_allowed_ingress(const struct net_bridge *br,
                        struct net_bridge_vlan_group *vg, struct sk_buff *skb,
-                       u16 *vid, u8 *state)
+                       u16 *vid, u8 *state,
+                       struct net_bridge_vlan **vlan)
 {
        /* If VLAN filtering is disabled on the bridge, all packets are
         * permitted.
         */
+       *vlan = NULL;
        if (!br_opt_get(br, BROPT_VLAN_ENABLED)) {
                BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
                return true;
        }
 
-       return __allowed_ingress(br, vg, skb, vid, state);
+       return __allowed_ingress(br, vg, skb, vid, state, vlan);
 }
 
 /* Called under RCU. */
@@ -672,6 +694,7 @@ static int br_vlan_add_existing(struct net_bridge *br,
                vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
                vg->num_vlans++;
                *changed = true;
+               br_multicast_toggle_one_vlan(vlan, true);
        }
 
        if (__vlan_add_flags(vlan, flags))
@@ -818,14 +841,21 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val,
        if (br_opt_get(br, BROPT_VLAN_ENABLED) == !!val)
                return 0;
 
+       br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val);
+
        err = switchdev_port_attr_set(br->dev, &attr, extack);
-       if (err && err != -EOPNOTSUPP)
+       if (err && err != -EOPNOTSUPP) {
+               br_opt_toggle(br, BROPT_VLAN_ENABLED, !val);
                return err;
+       }
 
-       br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val);
        br_manage_promisc(br);
        recalculate_group_addr(br);
        br_recalculate_fwd_mask(br);
+       if (!val && br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+               br_info(br, "vlan filtering disabled, automatically disabling multicast vlan snooping\n");
+               br_multicast_toggle_vlan_snooping(br, false, NULL);
+       }
 
        return 0;
 }
@@ -1420,6 +1450,33 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid,
 }
 EXPORT_SYMBOL_GPL(br_vlan_get_info);
 
+int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid,
+                        struct bridge_vlan_info *p_vinfo)
+{
+       struct net_bridge_vlan_group *vg;
+       struct net_bridge_vlan *v;
+       struct net_bridge_port *p;
+
+       p = br_port_get_check_rcu(dev);
+       if (p)
+               vg = nbp_vlan_group_rcu(p);
+       else if (netif_is_bridge_master(dev))
+               vg = br_vlan_group_rcu(netdev_priv(dev));
+       else
+               return -EINVAL;
+
+       v = br_vlan_find(vg, vid);
+       if (!v)
+               return -ENOENT;
+
+       p_vinfo->vid = vid;
+       p_vinfo->flags = v->flags;
+       if (vid == br_get_pvid(vg))
+               p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(br_vlan_get_info_rcu);
+
 static int br_vlan_is_bind_vlan_dev(const struct net_device *dev)
 {
        return is_vlan_dev(dev) &&
@@ -1838,6 +1895,9 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
 
        ASSERT_RTNL();
 
+       if (!nb)
+               return 0;
+
        if (!netif_is_bridge_master(br_dev))
                return -EINVAL;
 
@@ -1884,7 +1944,6 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
 
        return err;
 }
-EXPORT_SYMBOL_GPL(br_vlan_replay);
 
 /* check if v_curr can enter a range ending in range_end */
 bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
@@ -1901,6 +1960,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
                            u32 dump_flags)
 {
        struct net_bridge_vlan *v, *range_start = NULL, *range_end = NULL;
+       bool dump_global = !!(dump_flags & BRIDGE_VLANDB_DUMPF_GLOBAL);
        bool dump_stats = !!(dump_flags & BRIDGE_VLANDB_DUMPF_STATS);
        struct net_bridge_vlan_group *vg;
        int idx = 0, s_idx = cb->args[1];
@@ -1919,6 +1979,10 @@ static int br_vlan_dump_dev(const struct net_device *dev,
                vg = br_vlan_group_rcu(br);
                p = NULL;
        } else {
+               /* global options are dumped only for bridge devices */
+               if (dump_global)
+                       return 0;
+
                p = br_port_get_rcu(dev);
                if (WARN_ON(!p))
                        return -EINVAL;
@@ -1941,7 +2005,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
 
        /* idx must stay at range's beginning until it is filled in */
        list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
-               if (!br_vlan_should_use(v))
+               if (!dump_global && !br_vlan_should_use(v))
                        continue;
                if (idx < s_idx) {
                        idx++;
@@ -1954,8 +2018,21 @@ static int br_vlan_dump_dev(const struct net_device *dev,
                        continue;
                }
 
-               if (dump_stats || v->vid == pvid ||
-                   !br_vlan_can_enter_range(v, range_end)) {
+               if (dump_global) {
+                       if (br_vlan_global_opts_can_enter_range(v, range_end))
+                               goto update_end;
+                       if (!br_vlan_global_opts_fill(skb, range_start->vid,
+                                                     range_end->vid,
+                                                     range_start)) {
+                               err = -EMSGSIZE;
+                               break;
+                       }
+                       /* advance number of filled vlans */
+                       idx += range_end->vid - range_start->vid + 1;
+
+                       range_start = v;
+               } else if (dump_stats || v->vid == pvid ||
+                          !br_vlan_can_enter_range(v, range_end)) {
                        u16 vlan_flags = br_vlan_flags(range_start, pvid);
 
                        if (!br_vlan_fill_vids(skb, range_start->vid,
@@ -1969,6 +2046,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
 
                        range_start = v;
                }
+update_end:
                range_end = v;
        }
 
@@ -1977,11 +2055,18 @@ static int br_vlan_dump_dev(const struct net_device *dev,
         * - last vlan (range_start == range_end, not in range)
         * - last vlan range (range_start != range_end, in range)
         */
-       if (!err && range_start &&
-           !br_vlan_fill_vids(skb, range_start->vid, range_end->vid,
-                              range_start, br_vlan_flags(range_start, pvid),
-                              dump_stats))
-               err = -EMSGSIZE;
+       if (!err && range_start) {
+               if (dump_global &&
+                   !br_vlan_global_opts_fill(skb, range_start->vid,
+                                             range_end->vid, range_start))
+                       err = -EMSGSIZE;
+               else if (!dump_global &&
+                        !br_vlan_fill_vids(skb, range_start->vid,
+                                           range_end->vid, range_start,
+                                           br_vlan_flags(range_start, pvid),
+                                           dump_stats))
+                       err = -EMSGSIZE;
+       }
 
        cb->args[1] = err ? idx : 0;
 
@@ -2051,6 +2136,7 @@ static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] =
        [BRIDGE_VLANDB_ENTRY_RANGE]     = { .type = NLA_U16 },
        [BRIDGE_VLANDB_ENTRY_STATE]     = { .type = NLA_U8 },
        [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED },
+       [BRIDGE_VLANDB_ENTRY_MCAST_ROUTER]      = { .type = NLA_U8 },
 };
 
 static int br_vlan_rtm_process_one(struct net_device *dev,
@@ -2185,12 +2271,22 @@ static int br_vlan_rtm_process(struct sk_buff *skb, struct nlmsghdr *nlh,
        }
 
        nlmsg_for_each_attr(attr, nlh, sizeof(*bvm), rem) {
-               if (nla_type(attr) != BRIDGE_VLANDB_ENTRY)
+               switch (nla_type(attr)) {
+               case BRIDGE_VLANDB_ENTRY:
+                       err = br_vlan_rtm_process_one(dev, attr,
+                                                     nlh->nlmsg_type,
+                                                     extack);
+                       break;
+               case BRIDGE_VLANDB_GLOBAL_OPTIONS:
+                       err = br_vlan_rtm_process_global_options(dev, attr,
+                                                                nlh->nlmsg_type,
+                                                                extack);
+                       break;
+               default:
                        continue;
+               }
 
                vlans++;
-               err = br_vlan_rtm_process_one(dev, attr, nlh->nlmsg_type,
-                                             extack);
                if (err)
                        break;
        }
index b4add9e..8ffd4ed 100644 (file)
@@ -40,22 +40,38 @@ static bool __vlan_tun_can_enter_range(const struct net_bridge_vlan *v_curr,
 bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
                           const struct net_bridge_vlan *range_end)
 {
+       u8 range_mc_rtr = br_vlan_multicast_router(range_end);
+       u8 curr_mc_rtr = br_vlan_multicast_router(v_curr);
+
        return v_curr->state == range_end->state &&
-              __vlan_tun_can_enter_range(v_curr, range_end);
+              __vlan_tun_can_enter_range(v_curr, range_end) &&
+              curr_mc_rtr == range_mc_rtr;
 }
 
 bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
 {
-       return !nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE,
-                          br_vlan_get_state(v)) &&
-              __vlan_tun_put(skb, v);
+       if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, br_vlan_get_state(v)) ||
+           !__vlan_tun_put(skb, v))
+               return false;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+       if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
+                      br_vlan_multicast_router(v)))
+               return false;
+#endif
+
+       return true;
 }
 
 size_t br_vlan_opts_nl_size(void)
 {
        return nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_STATE */
               + nla_total_size(0) /* BRIDGE_VLANDB_ENTRY_TUNNEL_INFO */
-              + nla_total_size(sizeof(u32)); /* BRIDGE_VLANDB_TINFO_ID */
+              + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_TINFO_ID */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+              + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_MCAST_ROUTER */
+#endif
+              + 0;
 }
 
 static int br_vlan_modify_state(struct net_bridge_vlan_group *vg,
@@ -181,6 +197,18 @@ static int br_vlan_process_one_opts(const struct net_bridge *br,
                        return err;
        }
 
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+       if (tb[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER]) {
+               u8 val;
+
+               val = nla_get_u8(tb[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER]);
+               err = br_multicast_set_vlan_router(v, val);
+               if (err)
+                       return err;
+               *changed = true;
+       }
+#endif
+
        return 0;
 }
 
@@ -258,3 +286,392 @@ int br_vlan_process_options(const struct net_bridge *br,
 
        return err;
 }
+
+bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr,
+                                        const struct net_bridge_vlan *r_end)
+{
+       return v_curr->vid - r_end->vid == 1 &&
+              ((v_curr->priv_flags ^ r_end->priv_flags) &
+               BR_VLFLAG_GLOBAL_MCAST_ENABLED) == 0 &&
+               br_multicast_ctx_options_equal(&v_curr->br_mcast_ctx,
+                                              &r_end->br_mcast_ctx);
+}
+
+bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range,
+                             const struct net_bridge_vlan *v_opts)
+{
+       struct nlattr *nest2 __maybe_unused;
+       u64 clockval __maybe_unused;
+       struct nlattr *nest;
+
+       nest = nla_nest_start(skb, BRIDGE_VLANDB_GLOBAL_OPTIONS);
+       if (!nest)
+               return false;
+
+       if (nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_ID, vid))
+               goto out_err;
+
+       if (vid_range && vid < vid_range &&
+           nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_RANGE, vid_range))
+               goto out_err;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+       if (nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING,
+                      !!(v_opts->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)) ||
+           nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION,
+                      v_opts->br_mcast_ctx.multicast_igmp_version) ||
+           nla_put_u32(skb, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT,
+                       v_opts->br_mcast_ctx.multicast_last_member_count) ||
+           nla_put_u32(skb, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT,
+                       v_opts->br_mcast_ctx.multicast_startup_query_count) ||
+           nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER,
+                      v_opts->br_mcast_ctx.multicast_querier) ||
+           br_multicast_dump_querier_state(skb, &v_opts->br_mcast_ctx,
+                                           BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE))
+               goto out_err;
+
+       clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_last_member_interval);
+       if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL,
+                             clockval, BRIDGE_VLANDB_GOPTS_PAD))
+               goto out_err;
+       clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_membership_interval);
+       if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL,
+                             clockval, BRIDGE_VLANDB_GOPTS_PAD))
+               goto out_err;
+       clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_querier_interval);
+       if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL,
+                             clockval, BRIDGE_VLANDB_GOPTS_PAD))
+               goto out_err;
+       clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_query_interval);
+       if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL,
+                             clockval, BRIDGE_VLANDB_GOPTS_PAD))
+               goto out_err;
+       clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_query_response_interval);
+       if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL,
+                             clockval, BRIDGE_VLANDB_GOPTS_PAD))
+               goto out_err;
+       clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_startup_query_interval);
+       if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL,
+                             clockval, BRIDGE_VLANDB_GOPTS_PAD))
+               goto out_err;
+
+       if (br_rports_have_mc_router(&v_opts->br_mcast_ctx)) {
+               nest2 = nla_nest_start(skb,
+                                      BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS);
+               if (!nest2)
+                       goto out_err;
+
+               rcu_read_lock();
+               if (br_rports_fill_info(skb, &v_opts->br_mcast_ctx)) {
+                       rcu_read_unlock();
+                       nla_nest_cancel(skb, nest2);
+                       goto out_err;
+               }
+               rcu_read_unlock();
+
+               nla_nest_end(skb, nest2);
+       }
+
+#if IS_ENABLED(CONFIG_IPV6)
+       if (nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION,
+                      v_opts->br_mcast_ctx.multicast_mld_version))
+               goto out_err;
+#endif
+#endif
+
+       nla_nest_end(skb, nest);
+
+       return true;
+
+out_err:
+       nla_nest_cancel(skb, nest);
+       return false;
+}
+
+static size_t rtnl_vlan_global_opts_nlmsg_size(const struct net_bridge_vlan *v)
+{
+       return NLMSG_ALIGN(sizeof(struct br_vlan_msg))
+               + nla_total_size(0) /* BRIDGE_VLANDB_GLOBAL_OPTIONS */
+               + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_GOPTS_ID */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+               + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING */
+               + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION */
+               + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION */
+               + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT */
+               + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT */
+               + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL */
+               + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL */
+               + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL */
+               + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL */
+               + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL */
+               + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL */
+               + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER */
+               + br_multicast_querier_state_size() /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE */
+               + nla_total_size(0) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */
+               + br_rports_size(&v->br_mcast_ctx) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */
+#endif
+               + nla_total_size(sizeof(u16)); /* BRIDGE_VLANDB_GOPTS_RANGE */
+}
+
+static void br_vlan_global_opts_notify(const struct net_bridge *br,
+                                      u16 vid, u16 vid_range)
+{
+       struct net_bridge_vlan *v;
+       struct br_vlan_msg *bvm;
+       struct nlmsghdr *nlh;
+       struct sk_buff *skb;
+       int err = -ENOBUFS;
+
+       /* right now notifications are done only with rtnl held */
+       ASSERT_RTNL();
+
+       /* need to find the vlan due to flags/options */
+       v = br_vlan_find(br_vlan_group(br), vid);
+       if (!v)
+               return;
+
+       skb = nlmsg_new(rtnl_vlan_global_opts_nlmsg_size(v), GFP_KERNEL);
+       if (!skb)
+               goto out_err;
+
+       err = -EMSGSIZE;
+       nlh = nlmsg_put(skb, 0, 0, RTM_NEWVLAN, sizeof(*bvm), 0);
+       if (!nlh)
+               goto out_err;
+       bvm = nlmsg_data(nlh);
+       memset(bvm, 0, sizeof(*bvm));
+       bvm->family = AF_BRIDGE;
+       bvm->ifindex = br->dev->ifindex;
+
+       if (!br_vlan_global_opts_fill(skb, vid, vid_range, v))
+               goto out_err;
+
+       nlmsg_end(skb, nlh);
+       rtnl_notify(skb, dev_net(br->dev), 0, RTNLGRP_BRVLAN, NULL, GFP_KERNEL);
+       return;
+
+out_err:
+       rtnl_set_sk_err(dev_net(br->dev), RTNLGRP_BRVLAN, err);
+       kfree_skb(skb);
+}
+
+static int br_vlan_process_global_one_opts(const struct net_bridge *br,
+                                          struct net_bridge_vlan_group *vg,
+                                          struct net_bridge_vlan *v,
+                                          struct nlattr **tb,
+                                          bool *changed,
+                                          struct netlink_ext_ack *extack)
+{
+       int err __maybe_unused;
+
+       *changed = false;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]) {
+               u8 mc_snooping;
+
+               mc_snooping = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]);
+               if (br_multicast_toggle_global_vlan(v, !!mc_snooping))
+                       *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION]) {
+               u8 ver;
+
+               ver = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION]);
+               err = br_multicast_set_igmp_version(&v->br_mcast_ctx, ver);
+               if (err)
+                       return err;
+               *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT]) {
+               u32 cnt;
+
+               cnt = nla_get_u32(tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT]);
+               v->br_mcast_ctx.multicast_last_member_count = cnt;
+               *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT]) {
+               u32 cnt;
+
+               cnt = nla_get_u32(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT]);
+               v->br_mcast_ctx.multicast_startup_query_count = cnt;
+               *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL]) {
+               u64 val;
+
+               val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL]);
+               v->br_mcast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val);
+               *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL]) {
+               u64 val;
+
+               val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL]);
+               v->br_mcast_ctx.multicast_membership_interval = clock_t_to_jiffies(val);
+               *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL]) {
+               u64 val;
+
+               val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL]);
+               v->br_mcast_ctx.multicast_querier_interval = clock_t_to_jiffies(val);
+               *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]) {
+               u64 val;
+
+               val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]);
+               v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
+               *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) {
+               u64 val;
+
+               val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]);
+               v->br_mcast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val);
+               *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]) {
+               u64 val;
+
+               val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]);
+               v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
+               *changed = true;
+       }
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) {
+               u8 val;
+
+               val = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]);
+               err = br_multicast_set_querier(&v->br_mcast_ctx, val);
+               if (err)
+                       return err;
+               *changed = true;
+       }
+#if IS_ENABLED(CONFIG_IPV6)
+       if (tb[BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION]) {
+               u8 ver;
+
+               ver = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION]);
+               err = br_multicast_set_mld_version(&v->br_mcast_ctx, ver);
+               if (err)
+                       return err;
+               *changed = true;
+       }
+#endif
+#endif
+
+       return 0;
+}
+
+static const struct nla_policy br_vlan_db_gpol[BRIDGE_VLANDB_GOPTS_MAX + 1] = {
+       [BRIDGE_VLANDB_GOPTS_ID]        = { .type = NLA_U16 },
+       [BRIDGE_VLANDB_GOPTS_RANGE]     = { .type = NLA_U16 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]    = { .type = NLA_U8 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION] = { .type = NLA_U8 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL] = { .type = NLA_U64 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]     = { .type = NLA_U8 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION]        = { .type = NLA_U8 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT]     = { .type = NLA_U32 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT]   = { .type = NLA_U32 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL]   = { .type = NLA_U64 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL]    = { .type = NLA_U64 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL]       = { .type = NLA_U64 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL] = { .type = NLA_U64 },
+       [BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL] = { .type = NLA_U64 },
+};
+
+int br_vlan_rtm_process_global_options(struct net_device *dev,
+                                      const struct nlattr *attr,
+                                      int cmd,
+                                      struct netlink_ext_ack *extack)
+{
+       struct net_bridge_vlan *v, *curr_start = NULL, *curr_end = NULL;
+       struct nlattr *tb[BRIDGE_VLANDB_GOPTS_MAX + 1];
+       struct net_bridge_vlan_group *vg;
+       u16 vid, vid_range = 0;
+       struct net_bridge *br;
+       int err = 0;
+
+       if (cmd != RTM_NEWVLAN) {
+               NL_SET_ERR_MSG_MOD(extack, "Global vlan options support only set operation");
+               return -EINVAL;
+       }
+       if (!netif_is_bridge_master(dev)) {
+               NL_SET_ERR_MSG_MOD(extack, "Global vlan options can only be set on bridge device");
+               return -EINVAL;
+       }
+       br = netdev_priv(dev);
+       vg = br_vlan_group(br);
+       if (WARN_ON(!vg))
+               return -ENODEV;
+
+       err = nla_parse_nested(tb, BRIDGE_VLANDB_GOPTS_MAX, attr,
+                              br_vlan_db_gpol, extack);
+       if (err)
+               return err;
+
+       if (!tb[BRIDGE_VLANDB_GOPTS_ID]) {
+               NL_SET_ERR_MSG_MOD(extack, "Missing vlan entry id");
+               return -EINVAL;
+       }
+       vid = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_ID]);
+       if (!br_vlan_valid_id(vid, extack))
+               return -EINVAL;
+
+       if (tb[BRIDGE_VLANDB_GOPTS_RANGE]) {
+               vid_range = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_RANGE]);
+               if (!br_vlan_valid_id(vid_range, extack))
+                       return -EINVAL;
+               if (vid >= vid_range) {
+                       NL_SET_ERR_MSG_MOD(extack, "End vlan id is less than or equal to start vlan id");
+                       return -EINVAL;
+               }
+       } else {
+               vid_range = vid;
+       }
+
+       for (; vid <= vid_range; vid++) {
+               bool changed = false;
+
+               v = br_vlan_find(vg, vid);
+               if (!v) {
+                       NL_SET_ERR_MSG_MOD(extack, "Vlan in range doesn't exist, can't process global options");
+                       err = -ENOENT;
+                       break;
+               }
+
+               err = br_vlan_process_global_one_opts(br, vg, v, tb, &changed,
+                                                     extack);
+               if (err)
+                       break;
+
+               if (changed) {
+                       /* vlan options changed, check for range */
+                       if (!curr_start) {
+                               curr_start = v;
+                               curr_end = v;
+                               continue;
+                       }
+
+                       if (!br_vlan_global_opts_can_enter_range(v, curr_end)) {
+                               br_vlan_global_opts_notify(br, curr_start->vid,
+                                                          curr_end->vid);
+                               curr_start = v;
+                       }
+                       curr_end = v;
+               } else {
+                       /* nothing changed and nothing to notify yet */
+                       if (!curr_start)
+                               continue;
+
+                       br_vlan_global_opts_notify(br, curr_start->vid,
+                                                  curr_end->vid);
+                       curr_start = NULL;
+                       curr_end = NULL;
+               }
+       }
+       if (curr_start)
+               br_vlan_global_opts_notify(br, curr_start->vid, curr_end->vid);
+
+       return err;
+}
index 0101744..6399a8a 100644 (file)
@@ -158,30 +158,28 @@ void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg)
        rhashtable_destroy(&vg->tunnel_hash);
 }
 
-int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
-                                 struct net_bridge_port *p,
-                                 struct net_bridge_vlan_group *vg)
+void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
+                                  struct net_bridge_port *p,
+                                  struct net_bridge_vlan_group *vg)
 {
        struct ip_tunnel_info *tinfo = skb_tunnel_info(skb);
        struct net_bridge_vlan *vlan;
 
        if (!vg || !tinfo)
-               return 0;
+               return;
 
        /* if already tagged, ignore */
        if (skb_vlan_tagged(skb))
-               return 0;
+               return;
 
        /* lookup vid, given tunnel id */
        vlan = br_vlan_tunnel_lookup(&vg->tunnel_hash, tinfo->key.tun_id);
        if (!vlan)
-               return 0;
+               return;
 
        skb_dst_drop(skb);
 
        __vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid);
-
-       return 0;
 }
 
 int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
index 020b148..a7af4ea 100644 (file)
@@ -98,7 +98,7 @@ static const struct nf_hook_ops ebt_ops_broute = {
        .priority       = NF_BR_PRI_FIRST,
 };
 
-static int __net_init broute_net_init(struct net *net)
+static int broute_table_init(struct net *net)
 {
        return ebt_register_table(net, &broute_table, &ebt_ops_broute);
 }
@@ -114,19 +114,30 @@ static void __net_exit broute_net_exit(struct net *net)
 }
 
 static struct pernet_operations broute_net_ops = {
-       .init = broute_net_init,
        .exit = broute_net_exit,
        .pre_exit = broute_net_pre_exit,
 };
 
 static int __init ebtable_broute_init(void)
 {
-       return register_pernet_subsys(&broute_net_ops);
+       int ret = ebt_register_template(&broute_table, broute_table_init);
+
+       if (ret)
+               return ret;
+
+       ret = register_pernet_subsys(&broute_net_ops);
+       if (ret) {
+               ebt_unregister_template(&broute_table);
+               return ret;
+       }
+
+       return 0;
 }
 
 static void __exit ebtable_broute_fini(void)
 {
        unregister_pernet_subsys(&broute_net_ops);
+       ebt_unregister_template(&broute_table);
 }
 
 module_init(ebtable_broute_init);
index 8ec0b37..c0b121d 100644 (file)
@@ -86,7 +86,7 @@ static const struct nf_hook_ops ebt_ops_filter[] = {
        },
 };
 
-static int __net_init frame_filter_net_init(struct net *net)
+static int frame_filter_table_init(struct net *net)
 {
        return ebt_register_table(net, &frame_filter, ebt_ops_filter);
 }
@@ -102,19 +102,30 @@ static void __net_exit frame_filter_net_exit(struct net *net)
 }
 
 static struct pernet_operations frame_filter_net_ops = {
-       .init = frame_filter_net_init,
        .exit = frame_filter_net_exit,
        .pre_exit = frame_filter_net_pre_exit,
 };
 
 static int __init ebtable_filter_init(void)
 {
-       return register_pernet_subsys(&frame_filter_net_ops);
+       int ret = ebt_register_template(&frame_filter, frame_filter_table_init);
+
+       if (ret)
+               return ret;
+
+       ret = register_pernet_subsys(&frame_filter_net_ops);
+       if (ret) {
+               ebt_unregister_template(&frame_filter);
+               return ret;
+       }
+
+       return 0;
 }
 
 static void __exit ebtable_filter_fini(void)
 {
        unregister_pernet_subsys(&frame_filter_net_ops);
+       ebt_unregister_template(&frame_filter);
 }
 
 module_init(ebtable_filter_init);
index 7c8a106..4078151 100644 (file)
@@ -85,7 +85,7 @@ static const struct nf_hook_ops ebt_ops_nat[] = {
        },
 };
 
-static int __net_init frame_nat_net_init(struct net *net)
+static int frame_nat_table_init(struct net *net)
 {
        return ebt_register_table(net, &frame_nat, ebt_ops_nat);
 }
@@ -101,19 +101,30 @@ static void __net_exit frame_nat_net_exit(struct net *net)
 }
 
 static struct pernet_operations frame_nat_net_ops = {
-       .init = frame_nat_net_init,
        .exit = frame_nat_net_exit,
        .pre_exit = frame_nat_net_pre_exit,
 };
 
 static int __init ebtable_nat_init(void)
 {
-       return register_pernet_subsys(&frame_nat_net_ops);
+       int ret = ebt_register_template(&frame_nat, frame_nat_table_init);
+
+       if (ret)
+               return ret;
+
+       ret = register_pernet_subsys(&frame_nat_net_ops);
+       if (ret) {
+               ebt_unregister_template(&frame_nat);
+               return ret;
+       }
+
+       return ret;
 }
 
 static void __exit ebtable_nat_fini(void)
 {
        unregister_pernet_subsys(&frame_nat_net_ops);
+       ebt_unregister_template(&frame_nat);
 }
 
 module_init(ebtable_nat_init);
index f022deb..83d1798 100644 (file)
@@ -44,7 +44,16 @@ struct ebt_pernet {
        struct list_head tables;
 };
 
+struct ebt_template {
+       struct list_head list;
+       char name[EBT_TABLE_MAXNAMELEN];
+       struct module *owner;
+       /* called when table is needed in the given netns */
+       int (*table_init)(struct net *net);
+};
+
 static unsigned int ebt_pernet_id __read_mostly;
+static LIST_HEAD(template_tables);
 static DEFINE_MUTEX(ebt_mutex);
 
 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
@@ -309,30 +318,57 @@ letscontinue:
 
 /* If it succeeds, returns element and locks mutex */
 static inline void *
-find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
+find_inlist_lock_noload(struct net *net, const char *name, int *error,
                        struct mutex *mutex)
 {
-       struct {
-               struct list_head list;
-               char name[EBT_FUNCTION_MAXNAMELEN];
-       } *e;
+       struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
+       struct ebt_template *tmpl;
+       struct ebt_table *table;
 
        mutex_lock(mutex);
-       list_for_each_entry(e, head, list) {
-               if (strcmp(e->name, name) == 0)
-                       return e;
+       list_for_each_entry(table, &ebt_net->tables, list) {
+               if (strcmp(table->name, name) == 0)
+                       return table;
        }
+
+       list_for_each_entry(tmpl, &template_tables, list) {
+               if (strcmp(name, tmpl->name) == 0) {
+                       struct module *owner = tmpl->owner;
+
+                       if (!try_module_get(owner))
+                               goto out;
+
+                       mutex_unlock(mutex);
+
+                       *error = tmpl->table_init(net);
+                       if (*error) {
+                               module_put(owner);
+                               return NULL;
+                       }
+
+                       mutex_lock(mutex);
+                       module_put(owner);
+                       break;
+               }
+       }
+
+       list_for_each_entry(table, &ebt_net->tables, list) {
+               if (strcmp(table->name, name) == 0)
+                       return table;
+       }
+
+out:
        *error = -ENOENT;
        mutex_unlock(mutex);
        return NULL;
 }
 
 static void *
-find_inlist_lock(struct list_head *head, const char *name, const char *prefix,
+find_inlist_lock(struct net *net, const char *name, const char *prefix,
                 int *error, struct mutex *mutex)
 {
        return try_then_request_module(
-                       find_inlist_lock_noload(head, name, error, mutex),
+                       find_inlist_lock_noload(net, name, error, mutex),
                        "%s%s", prefix, name);
 }
 
@@ -340,10 +376,7 @@ static inline struct ebt_table *
 find_table_lock(struct net *net, const char *name, int *error,
                struct mutex *mutex)
 {
-       struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
-
-       return find_inlist_lock(&ebt_net->tables, name,
-                               "ebtable_", error, mutex);
+       return find_inlist_lock(net, name, "ebtable_", error, mutex);
 }
 
 static inline void ebt_free_table_info(struct ebt_table_info *info)
@@ -1258,6 +1291,54 @@ out:
        return ret;
 }
 
+int ebt_register_template(const struct ebt_table *t, int (*table_init)(struct net *net))
+{
+       struct ebt_template *tmpl;
+
+       mutex_lock(&ebt_mutex);
+       list_for_each_entry(tmpl, &template_tables, list) {
+               if (WARN_ON_ONCE(strcmp(t->name, tmpl->name) == 0)) {
+                       mutex_unlock(&ebt_mutex);
+                       return -EEXIST;
+               }
+       }
+
+       tmpl = kzalloc(sizeof(*tmpl), GFP_KERNEL);
+       if (!tmpl) {
+               mutex_unlock(&ebt_mutex);
+               return -ENOMEM;
+       }
+
+       tmpl->table_init = table_init;
+       strscpy(tmpl->name, t->name, sizeof(tmpl->name));
+       tmpl->owner = t->me;
+       list_add(&tmpl->list, &template_tables);
+
+       mutex_unlock(&ebt_mutex);
+       return 0;
+}
+EXPORT_SYMBOL(ebt_register_template);
+
+void ebt_unregister_template(const struct ebt_table *t)
+{
+       struct ebt_template *tmpl;
+
+       mutex_lock(&ebt_mutex);
+       list_for_each_entry(tmpl, &template_tables, list) {
+               if (strcmp(t->name, tmpl->name))
+                       continue;
+
+               list_del(&tmpl->list);
+               mutex_unlock(&ebt_mutex);
+               kfree(tmpl);
+               return;
+       }
+
+       mutex_unlock(&ebt_mutex);
+       WARN_ON_ONCE(1);
+}
+EXPORT_SYMBOL(ebt_unregister_template);
+
 static struct ebt_table *__ebt_find_table(struct net *net, const char *name)
 {
        struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
index 12369b6..f6df208 100644 (file)
 
 struct j1939_session;
 enum j1939_sk_errqueue_type {
-       J1939_ERRQUEUE_ACK,
-       J1939_ERRQUEUE_SCHED,
-       J1939_ERRQUEUE_ABORT,
+       J1939_ERRQUEUE_TX_ACK,
+       J1939_ERRQUEUE_TX_SCHED,
+       J1939_ERRQUEUE_TX_ABORT,
+       J1939_ERRQUEUE_RX_RTS,
+       J1939_ERRQUEUE_RX_DPO,
+       J1939_ERRQUEUE_RX_ABORT,
 };
 
 /* j1939 devices */
@@ -87,6 +90,7 @@ struct j1939_priv {
        struct list_head j1939_socks;
 
        struct kref rx_kref;
+       u32 rx_tskey;
 };
 
 void j1939_ecu_put(struct j1939_ecu *ecu);
index 54f6d52..6dff451 100644 (file)
@@ -352,7 +352,7 @@ static void j1939_sk_sock_destruct(struct sock *sk)
 {
        struct j1939_sock *jsk = j1939_sk(sk);
 
-       /* This function will be call by the generic networking code, when then
+       /* This function will be called by the generic networking code, when
         * the socket is ultimately closed (sk->sk_destruct).
         *
         * The race between
@@ -905,20 +905,33 @@ failure:
        return NULL;
 }
 
-static size_t j1939_sk_opt_stats_get_size(void)
+static size_t j1939_sk_opt_stats_get_size(enum j1939_sk_errqueue_type type)
 {
-       return
-               nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
-               0;
+       switch (type) {
+       case J1939_ERRQUEUE_RX_RTS:
+               return
+                       nla_total_size(sizeof(u32)) + /* J1939_NLA_TOTAL_SIZE */
+                       nla_total_size(sizeof(u32)) + /* J1939_NLA_PGN */
+                       nla_total_size(sizeof(u64)) + /* J1939_NLA_SRC_NAME */
+                       nla_total_size(sizeof(u64)) + /* J1939_NLA_DEST_NAME */
+                       nla_total_size(sizeof(u8)) +  /* J1939_NLA_SRC_ADDR */
+                       nla_total_size(sizeof(u8)) +  /* J1939_NLA_DEST_ADDR */
+                       0;
+       default:
+               return
+                       nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
+                       0;
+       }
 }
 
 static struct sk_buff *
-j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
+j1939_sk_get_timestamping_opt_stats(struct j1939_session *session,
+                                   enum j1939_sk_errqueue_type type)
 {
        struct sk_buff *stats;
        u32 size;
 
-       stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC);
+       stats = alloc_skb(j1939_sk_opt_stats_get_size(type), GFP_ATOMIC);
        if (!stats)
                return NULL;
 
@@ -928,32 +941,67 @@ j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
                size = min(session->pkt.tx_acked * 7,
                           session->total_message_size);
 
-       nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+       switch (type) {
+       case J1939_ERRQUEUE_RX_RTS:
+               nla_put_u32(stats, J1939_NLA_TOTAL_SIZE,
+                           session->total_message_size);
+               nla_put_u32(stats, J1939_NLA_PGN,
+                           session->skcb.addr.pgn);
+               nla_put_u64_64bit(stats, J1939_NLA_SRC_NAME,
+                                 session->skcb.addr.src_name, J1939_NLA_PAD);
+               nla_put_u64_64bit(stats, J1939_NLA_DEST_NAME,
+                                 session->skcb.addr.dst_name, J1939_NLA_PAD);
+               nla_put_u8(stats, J1939_NLA_SRC_ADDR,
+                          session->skcb.addr.sa);
+               nla_put_u8(stats, J1939_NLA_DEST_ADDR,
+                          session->skcb.addr.da);
+               break;
+       default:
+               nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+       }
 
        return stats;
 }
 
-void j1939_sk_errqueue(struct j1939_session *session,
-                      enum j1939_sk_errqueue_type type)
+static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+                               enum j1939_sk_errqueue_type type)
 {
        struct j1939_priv *priv = session->priv;
-       struct sock *sk = session->sk;
        struct j1939_sock *jsk;
        struct sock_exterr_skb *serr;
        struct sk_buff *skb;
        char *state = "UNK";
        int err;
 
-       /* currently we have no sk for the RX session */
-       if (!sk)
-               return;
-
        jsk = j1939_sk(sk);
 
        if (!(jsk->state & J1939_SOCK_ERRQUEUE))
                return;
 
-       skb = j1939_sk_get_timestamping_opt_stats(session);
+       switch (type) {
+       case J1939_ERRQUEUE_TX_ACK:
+               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+                       return;
+               break;
+       case J1939_ERRQUEUE_TX_SCHED:
+               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+                       return;
+               break;
+       case J1939_ERRQUEUE_TX_ABORT:
+               break;
+       case J1939_ERRQUEUE_RX_RTS:
+               fallthrough;
+       case J1939_ERRQUEUE_RX_DPO:
+               fallthrough;
+       case J1939_ERRQUEUE_RX_ABORT:
+               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+                       return;
+               break;
+       default:
+               netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
+       }
+
+       skb = j1939_sk_get_timestamping_opt_stats(session, type);
        if (!skb)
                return;
 
@@ -964,36 +1012,42 @@ void j1939_sk_errqueue(struct j1939_session *session,
        serr = SKB_EXT_ERR(skb);
        memset(serr, 0, sizeof(*serr));
        switch (type) {
-       case J1939_ERRQUEUE_ACK:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) {
-                       kfree_skb(skb);
-                       return;
-               }
-
+       case J1939_ERRQUEUE_TX_ACK:
                serr->ee.ee_errno = ENOMSG;
                serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
                serr->ee.ee_info = SCM_TSTAMP_ACK;
-               state = "ACK";
+               state = "TX ACK";
                break;
-       case J1939_ERRQUEUE_SCHED:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) {
-                       kfree_skb(skb);
-                       return;
-               }
-
+       case J1939_ERRQUEUE_TX_SCHED:
                serr->ee.ee_errno = ENOMSG;
                serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
                serr->ee.ee_info = SCM_TSTAMP_SCHED;
-               state = "SCH";
+               state = "TX SCH";
                break;
-       case J1939_ERRQUEUE_ABORT:
+       case J1939_ERRQUEUE_TX_ABORT:
                serr->ee.ee_errno = session->err;
                serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
                serr->ee.ee_info = J1939_EE_INFO_TX_ABORT;
-               state = "ABT";
+               state = "TX ABT";
+               break;
+       case J1939_ERRQUEUE_RX_RTS:
+               serr->ee.ee_errno = ENOMSG;
+               serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+               serr->ee.ee_info = J1939_EE_INFO_RX_RTS;
+               state = "RX RTS";
+               break;
+       case J1939_ERRQUEUE_RX_DPO:
+               serr->ee.ee_errno = ENOMSG;
+               serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+               serr->ee.ee_info = J1939_EE_INFO_RX_DPO;
+               state = "RX DPO";
+               break;
+       case J1939_ERRQUEUE_RX_ABORT:
+               serr->ee.ee_errno = session->err;
+               serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+               serr->ee.ee_info = J1939_EE_INFO_RX_ABORT;
+               state = "RX ABT";
                break;
-       default:
-               netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
        }
 
        serr->opt_stats = true;
@@ -1008,6 +1062,27 @@ void j1939_sk_errqueue(struct j1939_session *session,
                kfree_skb(skb);
 };
 
+void j1939_sk_errqueue(struct j1939_session *session,
+                      enum j1939_sk_errqueue_type type)
+{
+       struct j1939_priv *priv = session->priv;
+       struct j1939_sock *jsk;
+
+       if (session->sk) {
+               /* send TX notifications to the socket of origin  */
+               __j1939_sk_errqueue(session, session->sk, type);
+               return;
+       }
+
+       /* spread RX notifications to all sockets subscribed to this session */
+       spin_lock_bh(&priv->j1939_socks_lock);
+       list_for_each_entry(jsk, &priv->j1939_socks, list) {
+               if (j1939_sk_recv_match_one(jsk, &session->skcb))
+                       __j1939_sk_errqueue(session, &jsk->sk, type);
+       }
+       spin_unlock_bh(&priv->j1939_socks_lock);
+};
+
 void j1939_sk_send_loop_abort(struct sock *sk, int err)
 {
        sk->sk_err = err;
index bdc95bd..bb5c4b8 100644 (file)
@@ -260,10 +260,14 @@ static void __j1939_session_drop(struct j1939_session *session)
 
 static void j1939_session_destroy(struct j1939_session *session)
 {
-       if (session->err)
-               j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT);
-       else
-               j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK);
+       if (session->transmission) {
+               if (session->err)
+                       j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT);
+               else
+                       j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ACK);
+       } else if (session->err) {
+                       j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+       }
 
        netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
 
@@ -776,7 +780,7 @@ static int j1939_session_tx_dpo(struct j1939_session *session)
 static int j1939_session_tx_dat(struct j1939_session *session)
 {
        struct j1939_priv *priv = session->priv;
-       struct j1939_sk_buff_cb *skcb;
+       struct j1939_sk_buff_cb *se_skcb;
        int offset, pkt_done, pkt_end;
        unsigned int len, pdelay;
        struct sk_buff *se_skb;
@@ -788,7 +792,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
        if (!se_skb)
                return -ENOBUFS;
 
-       skcb = j1939_skb_to_cb(se_skb);
+       se_skcb = j1939_skb_to_cb(se_skb);
        tpdat = se_skb->data;
        ret = 0;
        pkt_done = 0;
@@ -800,7 +804,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
 
        while (session->pkt.tx < pkt_end) {
                dat[0] = session->pkt.tx - session->pkt.dpo + 1;
-               offset = (session->pkt.tx * 7) - skcb->offset;
+               offset = (session->pkt.tx * 7) - se_skcb->offset;
                len =  se_skb->len - offset;
                if (len > 7)
                        len = 7;
@@ -808,7 +812,8 @@ static int j1939_session_tx_dat(struct j1939_session *session)
                if (offset + len > se_skb->len) {
                        netdev_err_once(priv->ndev,
                                        "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n",
-                                       __func__, session, skcb->offset, se_skb->len , session->pkt.tx);
+                                       __func__, session, se_skcb->offset,
+                                       se_skb->len , session->pkt.tx);
                        ret = -EOVERFLOW;
                        goto out_free;
                }
@@ -821,7 +826,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
                memcpy(&dat[1], &tpdat[offset], len);
                ret = j1939_tp_tx_dat(session, dat, len + 1);
                if (ret < 0) {
-                       /* ENOBUS == CAN interface TX queue is full */
+                       /* ENOBUFS == CAN interface TX queue is full */
                        if (ret != -ENOBUFS)
                                netdev_alert(priv->ndev,
                                             "%s: 0x%p: queue data error: %i\n",
@@ -1043,7 +1048,7 @@ static int j1939_simple_txnext(struct j1939_session *session)
        if (ret)
                goto out_free;
 
-       j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
+       j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED);
        j1939_sk_queue_activate_next(session);
 
  out_free:
@@ -1097,7 +1102,7 @@ j1939_session_deactivate_activate_next(struct j1939_session *session)
 }
 
 static void __j1939_session_cancel(struct j1939_session *session,
-                                enum j1939_xtp_abort err)
+                                  enum j1939_xtp_abort err)
 {
        struct j1939_priv *priv = session->priv;
 
@@ -1115,6 +1120,8 @@ static void __j1939_session_cancel(struct j1939_session *session,
 
        if (session->sk)
                j1939_sk_send_loop_abort(session->sk, session->err);
+       else
+               j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
 }
 
 static void j1939_session_cancel(struct j1939_session *session,
@@ -1195,13 +1202,13 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
 
 static void j1939_session_completed(struct j1939_session *session)
 {
-       struct sk_buff *skb;
+       struct sk_buff *se_skb;
 
        if (!session->transmission) {
-               skb = j1939_session_skb_get(session);
+               se_skb = j1939_session_skb_get(session);
                /* distribute among j1939 receivers */
-               j1939_sk_recv(session->priv, skb);
-               consume_skb(skb);
+               j1939_sk_recv(session->priv, se_skb);
+               consume_skb(se_skb);
        }
 
        j1939_session_deactivate_activate_next(session);
@@ -1268,12 +1275,14 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,
                break;
 
        case J1939_ETP_CMD_RTS:
-       case J1939_TP_CMD_RTS: /* fall through */
+               fallthrough;
+       case J1939_TP_CMD_RTS:
                abort = J1939_XTP_ABORT_BUSY;
                break;
 
        case J1939_ETP_CMD_CTS:
-       case J1939_TP_CMD_CTS: /* fall through */
+               fallthrough;
+       case J1939_TP_CMD_CTS:
                abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN;
                break;
 
@@ -1282,7 +1291,8 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,
                break;
 
        case J1939_ETP_CMD_EOMA:
-       case J1939_TP_CMD_EOMA: /* fall through */
+               fallthrough;
+       case J1939_TP_CMD_EOMA:
                abort = J1939_XTP_ABORT_OTHER;
                break;
 
@@ -1326,6 +1336,8 @@ static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb,
        session->err = j1939_xtp_abort_to_errno(priv, abort);
        if (session->sk)
                j1939_sk_send_loop_abort(session->sk, session->err);
+       else
+               j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
        j1939_session_deactivate_activate_next(session);
 
 abort_put:
@@ -1434,7 +1446,7 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
                if (session->transmission) {
                        if (session->pkt.tx_acked)
                                j1939_sk_errqueue(session,
-                                                 J1939_ERRQUEUE_SCHED);
+                                                 J1939_ERRQUEUE_TX_SCHED);
                        j1939_session_txtimer_cancel(session);
                        j1939_tp_schedule_txtimer(session, 0);
                }
@@ -1626,6 +1638,9 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
        session->pkt.rx = 0;
        session->pkt.tx = 0;
 
+       session->tskey = priv->rx_tskey++;
+       j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS);
+
        WARN_ON_ONCE(j1939_session_activate(session));
 
        return session;
@@ -1748,6 +1763,9 @@ static void j1939_xtp_rx_dpo_one(struct j1939_session *session,
        session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data);
        session->last_cmd = dat[0];
        j1939_tp_set_rxtimeout(session, 750);
+
+       if (!session->transmission)
+               j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_DPO);
 }
 
 static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
@@ -1772,7 +1790,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
                                 struct sk_buff *skb)
 {
        struct j1939_priv *priv = session->priv;
-       struct j1939_sk_buff_cb *skcb;
+       struct j1939_sk_buff_cb *skcb, *se_skcb;
        struct sk_buff *se_skb = NULL;
        const u8 *dat;
        u8 *tpdat;
@@ -1797,7 +1815,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
                        break;
                fallthrough;
        case J1939_TP_CMD_BAM:
-       case J1939_TP_CMD_CTS: /* fall through */
+               fallthrough;
+       case J1939_TP_CMD_CTS:
                if (skcb->addr.type != J1939_ETP)
                        break;
                fallthrough;
@@ -1822,8 +1841,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
                goto out_session_cancel;
        }
 
-       skcb = j1939_skb_to_cb(se_skb);
-       offset = packet * 7 - skcb->offset;
+       se_skcb = j1939_skb_to_cb(se_skb);
+       offset = packet * 7 - se_skcb->offset;
        nbytes = se_skb->len - offset;
        if (nbytes > 7)
                nbytes = 7;
@@ -1851,7 +1870,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
        if (packet == session->pkt.rx)
                session->pkt.rx++;
 
-       if (skcb->addr.type != J1939_ETP &&
+       if (se_skcb->addr.type != J1939_ETP &&
            j1939_cb_is_broadcast(&session->skcb)) {
                if (session->pkt.rx >= session->pkt.total)
                        final = true;
@@ -2000,7 +2019,8 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
                extd = J1939_ETP;
                fallthrough;
        case J1939_TP_CMD_BAM:
-       case J1939_TP_CMD_RTS: /* fall through */
+               fallthrough;
+       case J1939_TP_CMD_RTS:
                if (skcb->addr.type != extd)
                        return;
 
index cd5a493..7105fa4 100644 (file)
@@ -592,9 +592,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                ro->count  = count;
 
  out_fil:
-               if (dev)
-                       dev_put(dev);
-
+               dev_put(dev);
                release_sock(sk);
                rtnl_unlock();
 
@@ -638,9 +636,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                ro->err_mask = err_mask;
 
  out_err:
-               if (dev)
-                       dev_put(dev);
-
+               dev_put(dev);
                release_sock(sk);
                rtnl_unlock();
 
index f7f1665..35ced62 100644 (file)
@@ -33,8 +33,6 @@ obj-$(CONFIG_HWBM) += hwbm.o
 obj-$(CONFIG_NET_DEVLINK) += devlink.o
 obj-$(CONFIG_GRO_CELLS) += gro_cells.o
 obj-$(CONFIG_FAILOVER) += failover.o
-ifeq ($(CONFIG_INET),y)
 obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
 obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
-endif
 obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
index f564f82..68d2cbf 100644 (file)
@@ -416,7 +416,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
 BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
           void *, value, u64, flags)
 {
-       if (in_irq() || in_nmi())
+       if (in_hardirq() || in_nmi())
                return (unsigned long)NULL;
 
        return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
@@ -425,7 +425,7 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
 BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
           struct sock *, sk)
 {
-       if (in_irq() || in_nmi())
+       if (in_hardirq() || in_nmi())
                return -EPERM;
 
        return ____bpf_sk_storage_delete(map, sk);
index 8f1a47a..74fd402 100644 (file)
@@ -676,131 +676,6 @@ void dev_remove_offload(struct packet_offload *po)
 }
 EXPORT_SYMBOL(dev_remove_offload);
 
-/******************************************************************************
- *
- *                   Device Boot-time Settings Routines
- *
- ******************************************************************************/
-
-/* Boot time configuration table */
-static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
-
-/**
- *     netdev_boot_setup_add   - add new setup entry
- *     @name: name of the device
- *     @map: configured settings for the device
- *
- *     Adds new setup entry to the dev_boot_setup list.  The function
- *     returns 0 on error and 1 on success.  This is a generic routine to
- *     all netdevices.
- */
-static int netdev_boot_setup_add(char *name, struct ifmap *map)
-{
-       struct netdev_boot_setup *s;
-       int i;
-
-       s = dev_boot_setup;
-       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
-               if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
-                       memset(s[i].name, 0, sizeof(s[i].name));
-                       strlcpy(s[i].name, name, IFNAMSIZ);
-                       memcpy(&s[i].map, map, sizeof(s[i].map));
-                       break;
-               }
-       }
-
-       return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
-}
-
-/**
- * netdev_boot_setup_check     - check boot time settings
- * @dev: the netdevice
- *
- * Check boot time settings for the device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found, 1 if they are.
- */
-int netdev_boot_setup_check(struct net_device *dev)
-{
-       struct netdev_boot_setup *s = dev_boot_setup;
-       int i;
-
-       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
-               if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
-                   !strcmp(dev->name, s[i].name)) {
-                       dev->irq = s[i].map.irq;
-                       dev->base_addr = s[i].map.base_addr;
-                       dev->mem_start = s[i].map.mem_start;
-                       dev->mem_end = s[i].map.mem_end;
-                       return 1;
-               }
-       }
-       return 0;
-}
-EXPORT_SYMBOL(netdev_boot_setup_check);
-
-
-/**
- * netdev_boot_base    - get address from boot time settings
- * @prefix: prefix for network device
- * @unit: id for network device
- *
- * Check boot time settings for the base address of device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found.
- */
-unsigned long netdev_boot_base(const char *prefix, int unit)
-{
-       const struct netdev_boot_setup *s = dev_boot_setup;
-       char name[IFNAMSIZ];
-       int i;
-
-       sprintf(name, "%s%d", prefix, unit);
-
-       /*
-        * If device already registered then return base of 1
-        * to indicate not to probe for this interface
-        */
-       if (__dev_get_by_name(&init_net, name))
-               return 1;
-
-       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
-               if (!strcmp(name, s[i].name))
-                       return s[i].map.base_addr;
-       return 0;
-}
-
-/*
- * Saves at boot time configured settings for any netdevice.
- */
-int __init netdev_boot_setup(char *str)
-{
-       int ints[5];
-       struct ifmap map;
-
-       str = get_options(str, ARRAY_SIZE(ints), ints);
-       if (!str || !*str)
-               return 0;
-
-       /* Save settings */
-       memset(&map, 0, sizeof(map));
-       if (ints[0] > 0)
-               map.irq = ints[1];
-       if (ints[0] > 1)
-               map.base_addr = ints[2];
-       if (ints[0] > 2)
-               map.mem_start = ints[3];
-       if (ints[0] > 3)
-               map.mem_end = ints[4];
-
-       /* Add new entry to the list */
-       return netdev_boot_setup_add(str, &map);
-}
-
-__setup("netdev=", netdev_boot_setup);
-
 /*******************************************************************************
  *
  *                         Device Interface Subroutines
@@ -956,8 +831,7 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
 
        rcu_read_lock();
        dev = dev_get_by_name_rcu(net, name);
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
        rcu_read_unlock();
        return dev;
 }
@@ -1030,8 +904,7 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 
        rcu_read_lock();
        dev = dev_get_by_index_rcu(net, ifindex);
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
        rcu_read_unlock();
        return dev;
 }
@@ -3098,6 +2971,50 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
 EXPORT_SYMBOL(netif_set_real_num_rx_queues);
 #endif
 
+/**
+ *     netif_set_real_num_queues - set actual number of RX and TX queues used
+ *     @dev: Network device
+ *     @txq: Actual number of TX queues
+ *     @rxq: Actual number of RX queues
+ *
+ *     Set the real number of both TX and RX queues.
+ *     Does nothing if the number of queues is already correct.
+ */
+int netif_set_real_num_queues(struct net_device *dev,
+                             unsigned int txq, unsigned int rxq)
+{
+       unsigned int old_rxq = dev->real_num_rx_queues;
+       int err;
+
+       if (txq < 1 || txq > dev->num_tx_queues ||
+           rxq < 1 || rxq > dev->num_rx_queues)
+               return -EINVAL;
+
+       /* Start from increases, so the error path only does decreases -
+        * decreases can't fail.
+        */
+       if (rxq > dev->real_num_rx_queues) {
+               err = netif_set_real_num_rx_queues(dev, rxq);
+               if (err)
+                       return err;
+       }
+       if (txq > dev->real_num_tx_queues) {
+               err = netif_set_real_num_tx_queues(dev, txq);
+               if (err)
+                       goto undo_rx;
+       }
+       if (rxq < dev->real_num_rx_queues)
+               WARN_ON(netif_set_real_num_rx_queues(dev, rxq));
+       if (txq < dev->real_num_tx_queues)
+               WARN_ON(netif_set_real_num_tx_queues(dev, txq));
+
+       return 0;
+undo_rx:
+       WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq));
+       return err;
+}
+EXPORT_SYMBOL(netif_set_real_num_queues);
+
 /**
  * netif_get_num_default_rss_queues - default number of RSS queues
  *
@@ -3190,7 +3107,7 @@ EXPORT_SYMBOL(__dev_kfree_skb_irq);
 
 void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
 {
-       if (in_irq() || irqs_disabled())
+       if (in_hardirq() || irqs_disabled())
                __dev_kfree_skb_irq(skb, reason);
        else
                dev_kfree_skb(skb);
@@ -4012,7 +3929,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
        qdisc_skb_cb(skb)->post_ct = false;
        mini_qdisc_bstats_cpu_update(miniq, skb);
 
-       switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
+       switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
        case TC_ACT_OK:
        case TC_ACT_RECLASSIFY:
                skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -4756,45 +4673,18 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
        return rxqueue;
 }
 
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
-                                    struct xdp_buff *xdp,
-                                    struct bpf_prog *xdp_prog)
+u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
+                            struct bpf_prog *xdp_prog)
 {
        void *orig_data, *orig_data_end, *hard_start;
        struct netdev_rx_queue *rxqueue;
-       u32 metalen, act = XDP_DROP;
        bool orig_bcast, orig_host;
        u32 mac_len, frame_sz;
        __be16 orig_eth_type;
        struct ethhdr *eth;
+       u32 metalen, act;
        int off;
 
-       /* Reinjected packets coming from act_mirred or similar should
-        * not get XDP generic processing.
-        */
-       if (skb_is_redirected(skb))
-               return XDP_PASS;
-
-       /* XDP packets must be linear and must have sufficient headroom
-        * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
-        * native XDP provides, thus we need to do it here as well.
-        */
-       if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
-           skb_headroom(skb) < XDP_PACKET_HEADROOM) {
-               int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
-               int troom = skb->tail + skb->data_len - skb->end;
-
-               /* In case we have to go down the path and also linearize,
-                * then lets do the pskb_expand_head() work just once here.
-                */
-               if (pskb_expand_head(skb,
-                                    hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
-                                    troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
-                       goto do_drop;
-               if (skb_linearize(skb))
-                       goto do_drop;
-       }
-
        /* The XDP program wants to see the packet starting at the MAC
         * header.
         */
@@ -4849,6 +4739,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                skb->protocol = eth_type_trans(skb, skb->dev);
        }
 
+       /* Redirect/Tx gives L2 packet, code that will reuse skb must __skb_pull
+        * before calling us again on redirect path. We do not call do_redirect
+        * as we leave that up to the caller.
+        *
+        * Caller is responsible for managing lifetime of skb (i.e. calling
+        * kfree_skb in response to actions it cannot handle/XDP_DROP).
+        */
        switch (act) {
        case XDP_REDIRECT:
        case XDP_TX:
@@ -4859,6 +4756,49 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                if (metalen)
                        skb_metadata_set(skb, metalen);
                break;
+       }
+
+       return act;
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+                                    struct xdp_buff *xdp,
+                                    struct bpf_prog *xdp_prog)
+{
+       u32 act = XDP_DROP;
+
+       /* Reinjected packets coming from act_mirred or similar should
+        * not get XDP generic processing.
+        */
+       if (skb_is_redirected(skb))
+               return XDP_PASS;
+
+       /* XDP packets must be linear and must have sufficient headroom
+        * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+        * native XDP provides, thus we need to do it here as well.
+        */
+       if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
+           skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+               int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+               int troom = skb->tail + skb->data_len - skb->end;
+
+               /* In case we have to go down the path and also linearize,
+                * then lets do the pskb_expand_head() work just once here.
+                */
+               if (pskb_expand_head(skb,
+                                    hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+                                    troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+                       goto do_drop;
+               if (skb_linearize(skb))
+                       goto do_drop;
+       }
+
+       act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
+       switch (act) {
+       case XDP_REDIRECT:
+       case XDP_TX:
+       case XDP_PASS:
+               break;
        default:
                bpf_warn_invalid_xdp_action(act);
                fallthrough;
@@ -5141,8 +5081,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
        skb->tc_at_ingress = 1;
        mini_qdisc_bstats_cpu_update(miniq, skb);
 
-       switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list,
-                                    &cl_res, false)) {
+       switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
        case TC_ACT_OK:
        case TC_ACT_RECLASSIFY:
                skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -5324,7 +5263,6 @@ another_round:
                        ret = NET_RX_DROP;
                        goto out;
                }
-               skb_reset_mac_len(skb);
        }
 
        if (eth_type_vlan(skb->protocol)) {
@@ -5650,25 +5588,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
        struct bpf_prog *new = xdp->prog;
        int ret = 0;
 
-       if (new) {
-               u32 i;
-
-               mutex_lock(&new->aux->used_maps_mutex);
-
-               /* generic XDP does not work with DEVMAPs that can
-                * have a bpf_prog installed on an entry
-                */
-               for (i = 0; i < new->aux->used_map_cnt; i++) {
-                       if (dev_map_can_have_prog(new->aux->used_maps[i]) ||
-                           cpu_map_prog_allowed(new->aux->used_maps[i])) {
-                               mutex_unlock(&new->aux->used_maps_mutex);
-                               return -EINVAL;
-                       }
-               }
-
-               mutex_unlock(&new->aux->used_maps_mutex);
-       }
-
        switch (xdp->command) {
        case XDP_SETUP_PROG:
                rcu_assign_pointer(dev->xdp_prog, new);
@@ -5876,7 +5795,7 @@ static void flush_all_backlogs(void)
         */
        ASSERT_RTNL();
 
-       get_online_cpus();
+       cpus_read_lock();
 
        cpumask_clear(&flush_cpus);
        for_each_online_cpu(cpu) {
@@ -5894,7 +5813,7 @@ static void flush_all_backlogs(void)
        for_each_cpu(cpu, &flush_cpus)
                flush_work(per_cpu_ptr(&flush_works, cpu));
 
-       put_online_cpus();
+       cpus_read_unlock();
 }
 
 /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
@@ -6011,7 +5930,6 @@ static void gro_list_prepare(const struct list_head *head,
                diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
                if (skb_vlan_tag_present(p))
                        diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
-               diffs |= skb_metadata_dst_cmp(p, skb);
                diffs |= skb_metadata_differs(p, skb);
                if (maclen == ETH_HLEN)
                        diffs |= compare_ether_header(skb_mac_header(p),
@@ -6021,17 +5939,30 @@ static void gro_list_prepare(const struct list_head *head,
                                       skb_mac_header(skb),
                                       maclen);
 
-               diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
+               /* in most common scenarions 'slow_gro' is 0
+                * otherwise we are already on some slower paths
+                * either skip all the infrequent tests altogether or
+                * avoid trying too hard to skip each of them individually
+                */
+               if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) {
 #if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-               if (!diffs) {
-                       struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT);
-                       struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT);
+                       struct tc_skb_ext *skb_ext;
+                       struct tc_skb_ext *p_ext;
+#endif
+
+                       diffs |= p->sk != skb->sk;
+                       diffs |= skb_metadata_dst_cmp(p, skb);
+                       diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
+
+#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+                       skb_ext = skb_ext_find(skb, TC_SKB_EXT);
+                       p_ext = skb_ext_find(p, TC_SKB_EXT);
 
                        diffs |= (!!p_ext) ^ (!!skb_ext);
                        if (!diffs && unlikely(skb_ext))
                                diffs |= p_ext->chain ^ skb_ext->chain;
-               }
 #endif
+               }
 
                NAPI_GRO_CB(p)->same_flow = !diffs;
        }
@@ -6296,8 +6227,12 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
        skb->encapsulation = 0;
        skb_shinfo(skb)->gso_type = 0;
        skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
-       skb_ext_reset(skb);
-       nf_reset_ct(skb);
+       if (unlikely(skb->slow_gro)) {
+               skb_orphan(skb);
+               skb_ext_reset(skb);
+               nf_reset_ct(skb);
+               skb->slow_gro = 0;
+       }
 
        napi->skb = skb;
 }
@@ -7597,7 +7532,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 {
        struct netdev_adjacent *lower;
 
-       WARN_ON_ONCE(!rcu_read_lock_held());
+       WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
 
        lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
 
@@ -9362,7 +9297,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
        return dev->xdp_state[mode].prog;
 }
 
-static u8 dev_xdp_prog_count(struct net_device *dev)
+u8 dev_xdp_prog_count(struct net_device *dev)
 {
        u8 count = 0;
        int i;
@@ -9372,6 +9307,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev)
                        count++;
        return count;
 }
+EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
 
 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
 {
@@ -9465,6 +9401,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
 {
        unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
        struct bpf_prog *cur_prog;
+       struct net_device *upper;
+       struct list_head *iter;
        enum bpf_xdp_mode mode;
        bpf_op_t bpf_op;
        int err;
@@ -9503,6 +9441,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
                return -EBUSY;
        }
 
+       /* don't allow if an upper device already has a program */
+       netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+               if (dev_xdp_prog_count(upper) > 0) {
+                       NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
+                       return -EEXIST;
+               }
+       }
+
        cur_prog = dev_xdp_prog(dev, mode);
        /* can't replace attached prog with link */
        if (link && cur_prog) {
@@ -10134,7 +10080,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 
        BUG_ON(count < 1);
 
-       rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+       rx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
        if (!rx)
                return -ENOMEM;
 
@@ -10201,7 +10147,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
        if (count < 1 || count > 0xffff)
                return -EINVAL;
 
-       tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+       tx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
        if (!tx)
                return -ENOMEM;
 
@@ -10841,7 +10787,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        /* ensure 32-byte alignment of whole construct */
        alloc_size += NETDEV_ALIGN - 1;
 
-       p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+       p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
        if (!p)
                return NULL;
 
index 45ae6ee..8c39283 100644 (file)
  * General list handling functions
  */
 
-static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
-                              const unsigned char *addr, int addr_len,
-                              unsigned char addr_type, bool global,
-                              bool sync)
+static struct netdev_hw_addr*
+__hw_addr_create(const unsigned char *addr, int addr_len,
+                unsigned char addr_type, bool global, bool sync)
 {
        struct netdev_hw_addr *ha;
        int alloc_size;
@@ -29,32 +28,44 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
                alloc_size = L1_CACHE_BYTES;
        ha = kmalloc(alloc_size, GFP_ATOMIC);
        if (!ha)
-               return -ENOMEM;
+               return NULL;
        memcpy(ha->addr, addr, addr_len);
        ha->type = addr_type;
        ha->refcount = 1;
        ha->global_use = global;
        ha->synced = sync ? 1 : 0;
        ha->sync_cnt = 0;
-       list_add_tail_rcu(&ha->list, &list->list);
-       list->count++;
 
-       return 0;
+       return ha;
 }
 
 static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
                            const unsigned char *addr, int addr_len,
                            unsigned char addr_type, bool global, bool sync,
-                           int sync_count)
+                           int sync_count, bool exclusive)
 {
+       struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL;
        struct netdev_hw_addr *ha;
 
        if (addr_len > MAX_ADDR_LEN)
                return -EINVAL;
 
-       list_for_each_entry(ha, &list->list, list) {
-               if (ha->type == addr_type &&
-                   !memcmp(ha->addr, addr, addr_len)) {
+       while (*ins_point) {
+               int diff;
+
+               ha = rb_entry(*ins_point, struct netdev_hw_addr, node);
+               diff = memcmp(addr, ha->addr, addr_len);
+               if (diff == 0)
+                       diff = memcmp(&addr_type, &ha->type, sizeof(addr_type));
+
+               parent = *ins_point;
+               if (diff < 0) {
+                       ins_point = &parent->rb_left;
+               } else if (diff > 0) {
+                       ins_point = &parent->rb_right;
+               } else {
+                       if (exclusive)
+                               return -EEXIST;
                        if (global) {
                                /* check if addr is already used as global */
                                if (ha->global_use)
@@ -73,8 +84,25 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
                }
        }
 
-       return __hw_addr_create_ex(list, addr, addr_len, addr_type, global,
-                                  sync);
+       ha = __hw_addr_create(addr, addr_len, addr_type, global, sync);
+       if (!ha)
+               return -ENOMEM;
+
+       /* The first address in dev->dev_addrs is pointed to by dev->dev_addr
+        * and mutated freely by device drivers and netdev ops, so if we insert
+        * it into the tree we'll end up with an invalid rbtree.
+        */
+       if (list->count > 0) {
+               rb_link_node(&ha->node, parent, ins_point);
+               rb_insert_color(&ha->node, &list->tree);
+       } else {
+               RB_CLEAR_NODE(&ha->node);
+       }
+
+       list_add_tail_rcu(&ha->list, &list->list);
+       list->count++;
+
+       return 0;
 }
 
 static int __hw_addr_add(struct netdev_hw_addr_list *list,
@@ -82,7 +110,7 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list,
                         unsigned char addr_type)
 {
        return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false,
-                               0);
+                               0, false);
 }
 
 static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
@@ -103,24 +131,61 @@ static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
 
        if (--ha->refcount)
                return 0;
+
+       if (!RB_EMPTY_NODE(&ha->node))
+               rb_erase(&ha->node, &list->tree);
+
        list_del_rcu(&ha->list);
        kfree_rcu(ha, rcu_head);
        list->count--;
        return 0;
 }
 
+static struct netdev_hw_addr *__hw_addr_lookup(struct netdev_hw_addr_list *list,
+                                              const unsigned char *addr, int addr_len,
+                                              unsigned char addr_type)
+{
+       struct netdev_hw_addr *ha;
+       struct rb_node *node;
+
+       /* The first address isn't inserted into the tree because in the dev->dev_addrs
+        * list it's the address pointed to by dev->dev_addr which is freely mutated
+        * in place, so we need to check it separately.
+        */
+       ha = list_first_entry(&list->list, struct netdev_hw_addr, list);
+       if (ha && !memcmp(addr, ha->addr, addr_len) &&
+           (!addr_type || addr_type == ha->type))
+               return ha;
+
+       node = list->tree.rb_node;
+
+       while (node) {
+               struct netdev_hw_addr *ha = rb_entry(node, struct netdev_hw_addr, node);
+               int diff = memcmp(addr, ha->addr, addr_len);
+
+               if (diff == 0 && addr_type)
+                       diff = memcmp(&addr_type, &ha->type, sizeof(addr_type));
+
+               if (diff < 0)
+                       node = node->rb_left;
+               else if (diff > 0)
+                       node = node->rb_right;
+               else
+                       return ha;
+       }
+
+       return NULL;
+}
+
 static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
                            const unsigned char *addr, int addr_len,
                            unsigned char addr_type, bool global, bool sync)
 {
-       struct netdev_hw_addr *ha;
+       struct netdev_hw_addr *ha = __hw_addr_lookup(list, addr, addr_len, addr_type);
 
-       list_for_each_entry(ha, &list->list, list) {
-               if (!memcmp(ha->addr, addr, addr_len) &&
-                   (ha->type == addr_type || !addr_type))
-                       return __hw_addr_del_entry(list, ha, global, sync);
-       }
-       return -ENOENT;
+       if (!ha)
+               return -ENOENT;
+       return __hw_addr_del_entry(list, ha, global, sync);
 }
 
 static int __hw_addr_del(struct netdev_hw_addr_list *list,
@@ -137,7 +202,7 @@ static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
        int err;
 
        err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
-                              false, true, ha->sync_cnt);
+                              false, true, ha->sync_cnt, false);
        if (err && err != -EEXIST)
                return err;
 
@@ -407,6 +472,7 @@ static void __hw_addr_flush(struct netdev_hw_addr_list *list)
 {
        struct netdev_hw_addr *ha, *tmp;
 
+       list->tree = RB_ROOT;
        list_for_each_entry_safe(ha, tmp, &list->list, list) {
                list_del_rcu(&ha->list);
                kfree_rcu(ha, rcu_head);
@@ -418,6 +484,7 @@ void __hw_addr_init(struct netdev_hw_addr_list *list)
 {
        INIT_LIST_HEAD(&list->list);
        list->count = 0;
+       list->tree = RB_ROOT;
 }
 EXPORT_SYMBOL(__hw_addr_init);
 
@@ -552,22 +619,14 @@ EXPORT_SYMBOL(dev_addr_del);
  */
 int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
 {
-       struct netdev_hw_addr *ha;
        int err;
 
        netif_addr_lock_bh(dev);
-       list_for_each_entry(ha, &dev->uc.list, list) {
-               if (!memcmp(ha->addr, addr, dev->addr_len) &&
-                   ha->type == NETDEV_HW_ADDR_T_UNICAST) {
-                       err = -EEXIST;
-                       goto out;
-               }
-       }
-       err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len,
-                                 NETDEV_HW_ADDR_T_UNICAST, true, false);
+       err = __hw_addr_add_ex(&dev->uc, addr, dev->addr_len,
+                              NETDEV_HW_ADDR_T_UNICAST, true, false,
+                              0, true);
        if (!err)
                __dev_set_rx_mode(dev);
-out:
        netif_addr_unlock_bh(dev);
        return err;
 }
@@ -745,22 +804,14 @@ EXPORT_SYMBOL(dev_uc_init);
  */
 int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
 {
-       struct netdev_hw_addr *ha;
        int err;
 
        netif_addr_lock_bh(dev);
-       list_for_each_entry(ha, &dev->mc.list, list) {
-               if (!memcmp(ha->addr, addr, dev->addr_len) &&
-                   ha->type == NETDEV_HW_ADDR_T_MULTICAST) {
-                       err = -EEXIST;
-                       goto out;
-               }
-       }
-       err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len,
-                                 NETDEV_HW_ADDR_T_MULTICAST, true, false);
+       err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+                              NETDEV_HW_ADDR_T_MULTICAST, true, false,
+                              0, true);
        if (!err)
                __dev_set_rx_mode(dev);
-out:
        netif_addr_unlock_bh(dev);
        return err;
 }
@@ -773,7 +824,8 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
 
        netif_addr_lock_bh(dev);
        err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
-                              NETDEV_HW_ADDR_T_MULTICAST, global, false, 0);
+                              NETDEV_HW_ADDR_T_MULTICAST, global, false,
+                              0, false);
        if (!err)
                __dev_set_rx_mode(dev);
        netif_addr_unlock_bh(dev);
index 478d032..0e87237 100644 (file)
@@ -1,10 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/kmod.h>
 #include <linux/netdevice.h>
+#include <linux/inetdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/net_tstamp.h>
 #include <linux/wireless.h>
+#include <linux/if_bridge.h>
 #include <net/dsa.h>
 #include <net/wext.h>
 
@@ -25,79 +27,108 @@ static int dev_ifname(struct net *net, struct ifreq *ifr)
        return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex);
 }
 
-static gifconf_func_t *gifconf_list[NPROTO];
-
-/**
- *     register_gifconf        -       register a SIOCGIF handler
- *     @family: Address family
- *     @gifconf: Function handler
- *
- *     Register protocol dependent address dumping routines. The handler
- *     that is passed must not be freed or reused until it has been replaced
- *     by another handler.
- */
-int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
-{
-       if (family >= NPROTO)
-               return -EINVAL;
-       gifconf_list[family] = gifconf;
-       return 0;
-}
-EXPORT_SYMBOL(register_gifconf);
-
 /*
  *     Perform a SIOCGIFCONF call. This structure will change
  *     size eventually, and there is nothing I can do about it.
  *     Thus we will need a 'compatibility mode'.
  */
-
-int dev_ifconf(struct net *net, struct ifconf *ifc, int size)
+int dev_ifconf(struct net *net, struct ifconf __user *uifc)
 {
        struct net_device *dev;
-       char __user *pos;
-       int len;
-       int total;
-       int i;
+       void __user *pos;
+       size_t size;
+       int len, total = 0, done;
 
-       /*
-        *      Fetch the caller's info block.
-        */
+       /* both the ifconf and the ifreq structures are slightly different */
+       if (in_compat_syscall()) {
+               struct compat_ifconf ifc32;
 
-       pos = ifc->ifc_buf;
-       len = ifc->ifc_len;
+               if (copy_from_user(&ifc32, uifc, sizeof(struct compat_ifconf)))
+                       return -EFAULT;
 
-       /*
-        *      Loop over the interfaces, and write an info block for each.
-        */
+               pos = compat_ptr(ifc32.ifcbuf);
+               len = ifc32.ifc_len;
+               size = sizeof(struct compat_ifreq);
+       } else {
+               struct ifconf ifc;
+
+               if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
+                       return -EFAULT;
 
-       total = 0;
+               pos = ifc.ifc_buf;
+               len = ifc.ifc_len;
+               size = sizeof(struct ifreq);
+       }
+
+       /* Loop over the interfaces, and write an info block for each. */
+       rtnl_lock();
        for_each_netdev(net, dev) {
-               for (i = 0; i < NPROTO; i++) {
-                       if (gifconf_list[i]) {
-                               int done;
-                               if (!pos)
-                                       done = gifconf_list[i](dev, NULL, 0, size);
-                               else
-                                       done = gifconf_list[i](dev, pos + total,
-                                                              len - total, size);
-                               if (done < 0)
-                                       return -EFAULT;
-                               total += done;
-                       }
+               if (!pos)
+                       done = inet_gifconf(dev, NULL, 0, size);
+               else
+                       done = inet_gifconf(dev, pos + total,
+                                           len - total, size);
+               if (done < 0) {
+                       rtnl_unlock();
+                       return -EFAULT;
                }
+               total += done;
        }
+       rtnl_unlock();
 
-       /*
-        *      All done.  Write the updated control block back to the caller.
-        */
-       ifc->ifc_len = total;
+       return put_user(total, &uifc->ifc_len);
+}
+
+static int dev_getifmap(struct net_device *dev, struct ifreq *ifr)
+{
+       struct ifmap *ifmap = &ifr->ifr_map;
+
+       if (in_compat_syscall()) {
+               struct compat_ifmap *cifmap = (struct compat_ifmap *)ifmap;
+
+               cifmap->mem_start = dev->mem_start;
+               cifmap->mem_end   = dev->mem_end;
+               cifmap->base_addr = dev->base_addr;
+               cifmap->irq       = dev->irq;
+               cifmap->dma       = dev->dma;
+               cifmap->port      = dev->if_port;
+
+               return 0;
+       }
+
+       ifmap->mem_start  = dev->mem_start;
+       ifmap->mem_end    = dev->mem_end;
+       ifmap->base_addr  = dev->base_addr;
+       ifmap->irq        = dev->irq;
+       ifmap->dma        = dev->dma;
+       ifmap->port       = dev->if_port;
 
-       /*
-        *      Both BSD and Solaris return 0 here, so we do too.
-        */
        return 0;
 }
 
+static int dev_setifmap(struct net_device *dev, struct ifreq *ifr)
+{
+       struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map;
+
+       if (!dev->netdev_ops->ndo_set_config)
+               return -EOPNOTSUPP;
+
+       if (in_compat_syscall()) {
+               struct ifmap ifmap = {
+                       .mem_start  = cifmap->mem_start,
+                       .mem_end    = cifmap->mem_end,
+                       .base_addr  = cifmap->base_addr,
+                       .irq        = cifmap->irq,
+                       .dma        = cifmap->dma,
+                       .port       = cifmap->port,
+               };
+
+               return dev->netdev_ops->ndo_set_config(dev, &ifmap);
+       }
+
+       return dev->netdev_ops->ndo_set_config(dev, &ifr->ifr_map);
+}
+
 /*
  *     Perform the SIOCxIFxxx calls, inside rcu_read_lock()
  */
@@ -128,13 +159,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
                break;
 
        case SIOCGIFMAP:
-               ifr->ifr_map.mem_start = dev->mem_start;
-               ifr->ifr_map.mem_end   = dev->mem_end;
-               ifr->ifr_map.base_addr = dev->base_addr;
-               ifr->ifr_map.irq       = dev->irq;
-               ifr->ifr_map.dma       = dev->dma;
-               ifr->ifr_map.port      = dev->if_port;
-               return 0;
+               return dev_getifmap(dev, ifr);
 
        case SIOCGIFINDEX:
                ifr->ifr_ifindex = dev->ifindex;
@@ -215,19 +240,19 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
        return 0;
 }
 
-static int dev_do_ioctl(struct net_device *dev,
-                       struct ifreq *ifr, unsigned int cmd)
+static int dev_eth_ioctl(struct net_device *dev,
+                        struct ifreq *ifr, unsigned int cmd)
 {
        const struct net_device_ops *ops = dev->netdev_ops;
        int err;
 
-       err = dsa_ndo_do_ioctl(dev, ifr, cmd);
+       err = dsa_ndo_eth_ioctl(dev, ifr, cmd);
        if (err == 0 || err != -EOPNOTSUPP)
                return err;
 
-       if (ops->ndo_do_ioctl) {
+       if (ops->ndo_eth_ioctl) {
                if (netif_device_present(dev))
-                       err = ops->ndo_do_ioctl(dev, ifr, cmd);
+                       err = ops->ndo_eth_ioctl(dev, ifr, cmd);
                else
                        err = -ENODEV;
        }
@@ -235,10 +260,55 @@ static int dev_do_ioctl(struct net_device *dev,
        return err;
 }
 
+static int dev_siocbond(struct net_device *dev,
+                       struct ifreq *ifr, unsigned int cmd)
+{
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       if (ops->ndo_siocbond) {
+               if (netif_device_present(dev))
+                       return ops->ndo_siocbond(dev, ifr, cmd);
+               else
+                       return -ENODEV;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                             void __user *data, unsigned int cmd)
+{
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       if (ops->ndo_siocdevprivate) {
+               if (netif_device_present(dev))
+                       return ops->ndo_siocdevprivate(dev, ifr, data, cmd);
+               else
+                       return -ENODEV;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static int dev_siocwandev(struct net_device *dev, struct if_settings *ifs)
+{
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       if (ops->ndo_siocwandev) {
+               if (netif_device_present(dev))
+                       return ops->ndo_siocwandev(dev, ifs);
+               else
+                       return -ENODEV;
+       }
+
+       return -EOPNOTSUPP;
+}
+
 /*
  *     Perform the SIOCxIFxxx calls, inside rtnl_lock()
  */
-static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
+static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data,
+                     unsigned int cmd)
 {
        int err;
        struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
@@ -275,12 +345,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                return 0;
 
        case SIOCSIFMAP:
-               if (ops->ndo_set_config) {
-                       if (!netif_device_present(dev))
-                               return -ENODEV;
-                       return ops->ndo_set_config(dev, &ifr->ifr_map);
-               }
-               return -EOPNOTSUPP;
+               return dev_setifmap(dev, ifr);
 
        case SIOCADDMULTI:
                if (!ops->ndo_set_rx_mode ||
@@ -307,6 +372,22 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                ifr->ifr_newname[IFNAMSIZ-1] = '\0';
                return dev_change_name(dev, ifr->ifr_newname);
 
+       case SIOCWANDEV:
+               return dev_siocwandev(dev, &ifr->ifr_settings);
+
+       case SIOCBRADDIF:
+       case SIOCBRDELIF:
+               if (!netif_device_present(dev))
+                       return -ENODEV;
+               if (!netif_is_bridge_master(dev))
+                       return -EOPNOTSUPP;
+               dev_hold(dev);
+               rtnl_unlock();
+               err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
+               dev_put(dev);
+               rtnl_lock();
+               return err;
+
        case SIOCSHWTSTAMP:
                err = net_hwtstamp_validate(ifr);
                if (err)
@@ -317,23 +398,23 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
         *      Unknown or private ioctl
         */
        default:
-               if ((cmd >= SIOCDEVPRIVATE &&
-                   cmd <= SIOCDEVPRIVATE + 15) ||
-                   cmd == SIOCBONDENSLAVE ||
+               if (cmd >= SIOCDEVPRIVATE &&
+                   cmd <= SIOCDEVPRIVATE + 15)
+                       return dev_siocdevprivate(dev, ifr, data, cmd);
+
+               if (cmd == SIOCGMIIPHY ||
+                   cmd == SIOCGMIIREG ||
+                   cmd == SIOCSMIIREG ||
+                   cmd == SIOCSHWTSTAMP ||
+                   cmd == SIOCGHWTSTAMP) {
+                       err = dev_eth_ioctl(dev, ifr, cmd);
+               } else if (cmd == SIOCBONDENSLAVE ||
                    cmd == SIOCBONDRELEASE ||
                    cmd == SIOCBONDSETHWADDR ||
                    cmd == SIOCBONDSLAVEINFOQUERY ||
                    cmd == SIOCBONDINFOQUERY ||
-                   cmd == SIOCBONDCHANGEACTIVE ||
-                   cmd == SIOCGMIIPHY ||
-                   cmd == SIOCGMIIREG ||
-                   cmd == SIOCSMIIREG ||
-                   cmd == SIOCBRADDIF ||
-                   cmd == SIOCBRDELIF ||
-                   cmd == SIOCSHWTSTAMP ||
-                   cmd == SIOCGHWTSTAMP ||
-                   cmd == SIOCWANDEV) {
-                       err = dev_do_ioctl(dev, ifr, cmd);
+                   cmd == SIOCBONDCHANGEACTIVE) {
+                       err = dev_siocbond(dev, ifr, cmd);
                } else
                        err = -EINVAL;
 
@@ -386,7 +467,8 @@ EXPORT_SYMBOL(dev_load);
  *     positive or a negative errno code on error.
  */
 
-int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout)
+int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
+             void __user *data, bool *need_copyout)
 {
        int ret;
        char *colon;
@@ -437,7 +519,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
        case SIOCETHTOOL:
                dev_load(net, ifr->ifr_name);
                rtnl_lock();
-               ret = dev_ethtool(net, ifr);
+               ret = dev_ethtool(net, ifr, data);
                rtnl_unlock();
                if (colon)
                        *colon = ':';
@@ -456,7 +538,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
                rtnl_lock();
-               ret = dev_ifsioc(net, ifr, cmd);
+               ret = dev_ifsioc(net, ifr, data, cmd);
                rtnl_unlock();
                if (colon)
                        *colon = ':';
@@ -502,7 +584,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
        case SIOCBONDINFOQUERY:
                dev_load(net, ifr->ifr_name);
                rtnl_lock();
-               ret = dev_ifsioc(net, ifr, cmd);
+               ret = dev_ifsioc(net, ifr, data, cmd);
                rtnl_unlock();
                if (need_copyout)
                        *need_copyout = false;
@@ -527,7 +609,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
                     cmd <= SIOCDEVPRIVATE + 15)) {
                        dev_load(net, ifr->ifr_name);
                        rtnl_lock();
-                       ret = dev_ifsioc(net, ifr, cmd);
+                       ret = dev_ifsioc(net, ifr, data, cmd);
                        rtnl_unlock();
                        return ret;
                }
index 8503262..a856ae4 100644 (file)
@@ -92,7 +92,8 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
                                 DEVLINK_PORT_FN_STATE_ACTIVE),
 };
 
-static LIST_HEAD(devlink_list);
+static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
+#define DEVLINK_REGISTERED XA_MARK_1
 
 /* devlink_mutex
  *
@@ -108,23 +109,23 @@ struct net *devlink_net(const struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devlink_net);
 
-static void __devlink_net_set(struct devlink *devlink, struct net *net)
+static void devlink_put(struct devlink *devlink)
 {
-       write_pnet(&devlink->_net, net);
+       if (refcount_dec_and_test(&devlink->refcount))
+               complete(&devlink->comp);
 }
 
-void devlink_net_set(struct devlink *devlink, struct net *net)
+static bool __must_check devlink_try_get(struct devlink *devlink)
 {
-       if (WARN_ON(devlink->registered))
-               return;
-       __devlink_net_set(devlink, net);
+       return refcount_inc_not_zero(&devlink->refcount);
 }
-EXPORT_SYMBOL_GPL(devlink_net_set);
 
 static struct devlink *devlink_get_from_attrs(struct net *net,
                                              struct nlattr **attrs)
 {
        struct devlink *devlink;
+       unsigned long index;
+       bool found = false;
        char *busname;
        char *devname;
 
@@ -136,19 +137,19 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
 
        lockdep_assert_held(&devlink_mutex);
 
-       list_for_each_entry(devlink, &devlink_list, list) {
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
                if (strcmp(devlink->dev->bus->name, busname) == 0 &&
                    strcmp(dev_name(devlink->dev), devname) == 0 &&
-                   net_eq(devlink_net(devlink), net))
-                       return devlink;
+                   net_eq(devlink_net(devlink), net)) {
+                       found = true;
+                       break;
+               }
        }
 
-       return ERR_PTR(-ENODEV);
-}
+       if (!found || !devlink_try_get(devlink))
+               devlink = ERR_PTR(-ENODEV);
 
-static struct devlink *devlink_get_from_info(struct genl_info *info)
-{
-       return devlink_get_from_attrs(genl_info_net(info), info->attrs);
+       return devlink;
 }
 
 static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
@@ -499,7 +500,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
        int err;
 
        mutex_lock(&devlink_mutex);
-       devlink = devlink_get_from_info(info);
+       devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs);
        if (IS_ERR(devlink)) {
                mutex_unlock(&devlink_mutex);
                return PTR_ERR(devlink);
@@ -542,6 +543,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
 unlock:
        if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
                mutex_unlock(&devlink->lock);
+       devlink_put(devlink);
        mutex_unlock(&devlink_mutex);
        return err;
 }
@@ -554,6 +556,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
        devlink = info->user_ptr[0];
        if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
                mutex_unlock(&devlink->lock);
+       devlink_put(devlink);
        mutex_unlock(&devlink_mutex);
 }
 
@@ -817,10 +820,11 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
        return 0;
 }
 
-static int
-devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops,
-                            struct devlink_port *port, struct sk_buff *msg,
-                            struct netlink_ext_ack *extack, bool *msg_updated)
+static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops,
+                                       struct devlink_port *port,
+                                       struct sk_buff *msg,
+                                       struct netlink_ext_ack *extack,
+                                       bool *msg_updated)
 {
        u8 hw_addr[MAX_ADDR_LEN];
        int hw_addr_len;
@@ -829,7 +833,8 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
        if (!ops->port_function_hw_addr_get)
                return 0;
 
-       err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack);
+       err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len,
+                                            extack);
        if (err) {
                if (err == -EOPNOTSUPP)
                        return 0;
@@ -843,12 +848,11 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
 }
 
 static int devlink_nl_rate_fill(struct sk_buff *msg,
-                               struct devlink *devlink,
                                struct devlink_rate *devlink_rate,
-                               enum devlink_command cmd, u32 portid,
-                               u32 seq, int flags,
-                               struct netlink_ext_ack *extack)
+                               enum devlink_command cmd, u32 portid, u32 seq,
+                               int flags, struct netlink_ext_ack *extack)
 {
+       struct devlink *devlink = devlink_rate->devlink;
        void *hdr;
 
        hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -906,12 +910,11 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
               opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
 }
 
-static int
-devlink_port_fn_state_fill(struct devlink *devlink,
-                          const struct devlink_ops *ops,
-                          struct devlink_port *port, struct sk_buff *msg,
-                          struct netlink_ext_ack *extack,
-                          bool *msg_updated)
+static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
+                                     struct devlink_port *port,
+                                     struct sk_buff *msg,
+                                     struct netlink_ext_ack *extack,
+                                     bool *msg_updated)
 {
        enum devlink_port_fn_opstate opstate;
        enum devlink_port_fn_state state;
@@ -920,7 +923,7 @@ devlink_port_fn_state_fill(struct devlink *devlink,
        if (!ops->port_fn_state_get)
                return 0;
 
-       err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack);
+       err = ops->port_fn_state_get(port, &state, &opstate, extack);
        if (err) {
                if (err == -EOPNOTSUPP)
                        return 0;
@@ -948,7 +951,6 @@ static int
 devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
                                   struct netlink_ext_ack *extack)
 {
-       struct devlink *devlink = port->devlink;
        const struct devlink_ops *ops;
        struct nlattr *function_attr;
        bool msg_updated = false;
@@ -958,13 +960,12 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
        if (!function_attr)
                return -EMSGSIZE;
 
-       ops = devlink->ops;
-       err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg,
-                                          extack, &msg_updated);
+       ops = port->devlink->ops;
+       err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack,
+                                          &msg_updated);
        if (err)
                goto out;
-       err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack,
-                                        &msg_updated);
+       err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
 out:
        if (err || !msg_updated)
                nla_nest_cancel(msg, function_attr);
@@ -973,12 +974,12 @@ out:
        return err;
 }
 
-static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
+static int devlink_nl_port_fill(struct sk_buff *msg,
                                struct devlink_port *devlink_port,
-                               enum devlink_command cmd, u32 portid,
-                               u32 seq, int flags,
-                               struct netlink_ext_ack *extack)
+                               enum devlink_command cmd, u32 portid, u32 seq,
+                               int flags, struct netlink_ext_ack *extack)
 {
+       struct devlink *devlink = devlink_port->devlink;
        void *hdr;
 
        hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -1039,53 +1040,47 @@ nla_put_failure:
 static void devlink_port_notify(struct devlink_port *devlink_port,
                                enum devlink_command cmd)
 {
-       struct devlink *devlink = devlink_port->devlink;
        struct sk_buff *msg;
        int err;
 
-       if (!devlink_port->registered)
-               return;
-
        WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
 
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return;
 
-       err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0,
-                                  NULL);
+       err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL);
        if (err) {
                nlmsg_free(msg);
                return;
        }
 
-       genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
-                               msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+       genlmsg_multicast_netns(&devlink_nl_family,
+                               devlink_net(devlink_port->devlink), msg, 0,
+                               DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
 static void devlink_rate_notify(struct devlink_rate *devlink_rate,
                                enum devlink_command cmd)
 {
-       struct devlink *devlink = devlink_rate->devlink;
        struct sk_buff *msg;
        int err;
 
-       WARN_ON(cmd != DEVLINK_CMD_RATE_NEW &&
-               cmd != DEVLINK_CMD_RATE_DEL);
+       WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
 
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return;
 
-       err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
-                                  cmd, 0, 0, 0, NULL);
+       err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL);
        if (err) {
                nlmsg_free(msg);
                return;
        }
 
-       genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
-                               msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+       genlmsg_multicast_netns(&devlink_nl_family,
+                               devlink_net(devlink_rate->devlink), msg, 0,
+                               DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
 static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
@@ -1094,13 +1089,18 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
        struct devlink_rate *devlink_rate;
        struct devlink *devlink;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err = 0;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry;
+
                mutex_lock(&devlink->lock);
                list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
                        enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
@@ -1110,18 +1110,19 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
                                idx++;
                                continue;
                        }
-                       err = devlink_nl_rate_fill(msg, devlink,
-                                                  devlink_rate,
-                                                  cmd, id,
+                       err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
                                                   cb->nlh->nlmsg_seq,
                                                   NLM_F_MULTI, NULL);
                        if (err) {
                                mutex_unlock(&devlink->lock);
+                               devlink_put(devlink);
                                goto out;
                        }
                        idx++;
                }
                mutex_unlock(&devlink->lock);
+retry:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -1136,7 +1137,6 @@ static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
                                        struct genl_info *info)
 {
        struct devlink_rate *devlink_rate = info->user_ptr[1];
-       struct devlink *devlink = devlink_rate->devlink;
        struct sk_buff *msg;
        int err;
 
@@ -1144,8 +1144,7 @@ static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
        if (!msg)
                return -ENOMEM;
 
-       err = devlink_nl_rate_fill(msg, devlink, devlink_rate,
-                                  DEVLINK_CMD_RATE_NEW,
+       err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW,
                                   info->snd_portid, info->snd_seq, 0,
                                   info->extack);
        if (err) {
@@ -1193,20 +1192,30 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
 {
        struct devlink *devlink;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) {
+                       devlink_put(devlink);
+                       continue;
+               }
+
                if (idx < start) {
                        idx++;
+                       devlink_put(devlink);
                        continue;
                }
+
                err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
                                      NETLINK_CB(cb->skb).portid,
                                      cb->nlh->nlmsg_seq, NLM_F_MULTI);
+               devlink_put(devlink);
                if (err)
                        goto out;
                idx++;
@@ -1222,7 +1231,6 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
                                        struct genl_info *info)
 {
        struct devlink_port *devlink_port = info->user_ptr[1];
-       struct devlink *devlink = devlink_port->devlink;
        struct sk_buff *msg;
        int err;
 
@@ -1230,8 +1238,7 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
        if (!msg)
                return -ENOMEM;
 
-       err = devlink_nl_port_fill(msg, devlink, devlink_port,
-                                  DEVLINK_CMD_PORT_NEW,
+       err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW,
                                   info->snd_portid, info->snd_seq, 0,
                                   info->extack);
        if (err) {
@@ -1248,32 +1255,39 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
        struct devlink *devlink;
        struct devlink_port *devlink_port;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry;
+
                mutex_lock(&devlink->lock);
                list_for_each_entry(devlink_port, &devlink->port_list, list) {
                        if (idx < start) {
                                idx++;
                                continue;
                        }
-                       err = devlink_nl_port_fill(msg, devlink, devlink_port,
+                       err = devlink_nl_port_fill(msg, devlink_port,
                                                   DEVLINK_CMD_NEW,
                                                   NETLINK_CB(cb->skb).portid,
                                                   cb->nlh->nlmsg_seq,
-                                                  NLM_F_MULTI,
-                                                  cb->extack);
+                                                  NLM_F_MULTI, cb->extack);
                        if (err) {
                                mutex_unlock(&devlink->lock);
+                               devlink_put(devlink);
                                goto out;
                        }
                        idx++;
                }
                mutex_unlock(&devlink->lock);
+retry:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -1282,31 +1296,33 @@ out:
        return msg->len;
 }
 
-static int devlink_port_type_set(struct devlink *devlink,
-                                struct devlink_port *devlink_port,
+static int devlink_port_type_set(struct devlink_port *devlink_port,
                                 enum devlink_port_type port_type)
 
 {
        int err;
 
-       if (devlink->ops->port_type_set) {
-               if (port_type == devlink_port->type)
-                       return 0;
-               err = devlink->ops->port_type_set(devlink_port, port_type);
-               if (err)
-                       return err;
-               devlink_port->desired_type = port_type;
-               devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+       if (!devlink_port->devlink->ops->port_type_set)
+               return -EOPNOTSUPP;
+
+       if (port_type == devlink_port->type)
                return 0;
-       }
-       return -EOPNOTSUPP;
+
+       err = devlink_port->devlink->ops->port_type_set(devlink_port,
+                                                       port_type);
+       if (err)
+               return err;
+
+       devlink_port->desired_type = port_type;
+       devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+       return 0;
 }
 
-static int
-devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port,
-                                 const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_hw_addr_set(struct devlink_port *port,
+                                            const struct nlattr *attr,
+                                            struct netlink_ext_ack *extack)
 {
-       const struct devlink_ops *ops;
+       const struct devlink_ops *ops = port->devlink->ops;
        const u8 *hw_addr;
        int hw_addr_len;
 
@@ -1327,17 +1343,16 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *
                }
        }
 
-       ops = devlink->ops;
        if (!ops->port_function_hw_addr_set) {
                NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
                return -EOPNOTSUPP;
        }
 
-       return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack);
+       return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len,
+                                             extack);
 }
 
-static int devlink_port_fn_state_set(struct devlink *devlink,
-                                    struct devlink_port *port,
+static int devlink_port_fn_state_set(struct devlink_port *port,
                                     const struct nlattr *attr,
                                     struct netlink_ext_ack *extack)
 {
@@ -1345,18 +1360,18 @@ static int devlink_port_fn_state_set(struct devlink *devlink,
        const struct devlink_ops *ops;
 
        state = nla_get_u8(attr);
-       ops = devlink->ops;
+       ops = port->devlink->ops;
        if (!ops->port_fn_state_set) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Function does not support state setting");
                return -EOPNOTSUPP;
        }
-       return ops->port_fn_state_set(devlink, port, state, extack);
+       return ops->port_fn_state_set(port, state, extack);
 }
 
-static int
-devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
-                         const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_set(struct devlink_port *port,
+                                    const struct nlattr *attr,
+                                    struct netlink_ext_ack *extack)
 {
        struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
        int err;
@@ -1370,7 +1385,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
 
        attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
        if (attr) {
-               err = devlink_port_function_hw_addr_set(devlink, port, attr, extack);
+               err = devlink_port_function_hw_addr_set(port, attr, extack);
                if (err)
                        return err;
        }
@@ -1380,7 +1395,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
         */
        attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
        if (attr)
-               err = devlink_port_fn_state_set(devlink, port, attr, extack);
+               err = devlink_port_fn_state_set(port, attr, extack);
 
        if (!err)
                devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
@@ -1391,14 +1406,13 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
                                        struct genl_info *info)
 {
        struct devlink_port *devlink_port = info->user_ptr[1];
-       struct devlink *devlink = devlink_port->devlink;
        int err;
 
        if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
                enum devlink_port_type port_type;
 
                port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
-               err = devlink_port_type_set(devlink, devlink_port, port_type);
+               err = devlink_port_type_set(devlink_port, port_type);
                if (err)
                        return err;
        }
@@ -1407,7 +1421,7 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
                struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
                struct netlink_ext_ack *extack = info->extack;
 
-               err = devlink_port_function_set(devlink, devlink_port, attr, extack);
+               err = devlink_port_function_set(devlink_port, attr, extack);
                if (err)
                        return err;
        }
@@ -1502,9 +1516,8 @@ static int devlink_port_new_notifiy(struct devlink *devlink,
                goto out;
        }
 
-       err = devlink_nl_port_fill(msg, devlink, devlink_port,
-                                  DEVLINK_CMD_NEW, info->snd_portid,
-                                  info->snd_seq, 0, NULL);
+       err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
+                                  info->snd_portid, info->snd_seq, 0, NULL);
        if (err)
                goto out;
 
@@ -1908,13 +1921,18 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
        struct devlink *devlink;
        struct devlink_sb *devlink_sb;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry;
+
                mutex_lock(&devlink->lock);
                list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
                        if (idx < start) {
@@ -1928,11 +1946,14 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
                                                 NLM_F_MULTI);
                        if (err) {
                                mutex_unlock(&devlink->lock);
+                               devlink_put(devlink);
                                goto out;
                        }
                        idx++;
                }
                mutex_unlock(&devlink->lock);
+retry:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -2052,14 +2073,19 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
        struct devlink *devlink;
        struct devlink_sb *devlink_sb;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err = 0;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
+                       continue;
+
                if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
                    !devlink->ops->sb_pool_get)
-                       continue;
+                       goto retry;
+
                mutex_lock(&devlink->lock);
                list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
                        err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
@@ -2070,10 +2096,13 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
                                err = 0;
                        } else if (err) {
                                mutex_unlock(&devlink->lock);
+                               devlink_put(devlink);
                                goto out;
                        }
                }
                mutex_unlock(&devlink->lock);
+retry:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -2265,14 +2294,19 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
        struct devlink *devlink;
        struct devlink_sb *devlink_sb;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err = 0;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
+                       continue;
+
                if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
                    !devlink->ops->sb_port_pool_get)
-                       continue;
+                       goto retry;
+
                mutex_lock(&devlink->lock);
                list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
                        err = __sb_port_pool_get_dumpit(msg, start, &idx,
@@ -2283,10 +2317,13 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
                                err = 0;
                        } else if (err) {
                                mutex_unlock(&devlink->lock);
+                               devlink_put(devlink);
                                goto out;
                        }
                }
                mutex_unlock(&devlink->lock);
+retry:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -2506,14 +2543,18 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
        struct devlink *devlink;
        struct devlink_sb *devlink_sb;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err = 0;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
+                       continue;
+
                if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
                    !devlink->ops->sb_tc_pool_bind_get)
-                       continue;
+                       goto retry;
 
                mutex_lock(&devlink->lock);
                list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
@@ -2526,10 +2567,13 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
                                err = 0;
                        } else if (err) {
                                mutex_unlock(&devlink->lock);
+                               devlink_put(devlink);
                                goto out;
                        }
                }
                mutex_unlock(&devlink->lock);
+retry:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -3801,10 +3845,12 @@ static void devlink_param_notify(struct devlink *devlink,
                                 struct devlink_param_item *param_item,
                                 enum devlink_command cmd);
 
-static void devlink_reload_netns_change(struct devlink *devlink,
-                                       struct net *dest_net)
+static void devlink_ns_change_notify(struct devlink *devlink,
+                                    struct net *dest_net, struct net *curr_net,
+                                    bool new)
 {
        struct devlink_param_item *param_item;
+       enum devlink_command cmd;
 
        /* Userspace needs to be notified about devlink objects
         * removed from original and entering new network namespace.
@@ -3812,17 +3858,18 @@ static void devlink_reload_netns_change(struct devlink *devlink,
         * reload process so the notifications are generated separatelly.
         */
 
-       list_for_each_entry(param_item, &devlink->param_list, list)
-               devlink_param_notify(devlink, 0, param_item,
-                                    DEVLINK_CMD_PARAM_DEL);
-       devlink_notify(devlink, DEVLINK_CMD_DEL);
+       if (!dest_net || net_eq(dest_net, curr_net))
+               return;
 
-       __devlink_net_set(devlink, dest_net);
+       if (new)
+               devlink_notify(devlink, DEVLINK_CMD_NEW);
 
-       devlink_notify(devlink, DEVLINK_CMD_NEW);
+       cmd = new ? DEVLINK_CMD_PARAM_NEW : DEVLINK_CMD_PARAM_DEL;
        list_for_each_entry(param_item, &devlink->param_list, list)
-               devlink_param_notify(devlink, 0, param_item,
-                                    DEVLINK_CMD_PARAM_NEW);
+               devlink_param_notify(devlink, 0, param_item, cmd);
+
+       if (!new)
+               devlink_notify(devlink, DEVLINK_CMD_DEL);
 }
 
 static bool devlink_reload_supported(const struct devlink_ops *ops)
@@ -3902,6 +3949,7 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
                          u32 *actions_performed, struct netlink_ext_ack *extack)
 {
        u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+       struct net *curr_net;
        int err;
 
        if (!devlink->reload_enabled)
@@ -3909,18 +3957,22 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
 
        memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
               sizeof(remote_reload_stats));
+
+       curr_net = devlink_net(devlink);
+       devlink_ns_change_notify(devlink, dest_net, curr_net, false);
        err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
        if (err)
                return err;
 
-       if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
-               devlink_reload_netns_change(devlink, dest_net);
+       if (dest_net && !net_eq(dest_net, curr_net))
+               write_pnet(&devlink->_net, dest_net);
 
        err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
        devlink_reload_failed_set(devlink, !!err);
        if (err)
                return err;
 
+       devlink_ns_change_notify(devlink, dest_net, curr_net, true);
        WARN_ON(!(*actions_performed & BIT(action)));
        /* Catch driver on updating the remote action within devlink reload */
        WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
@@ -4117,7 +4169,7 @@ out_free_msg:
 
 static void devlink_flash_update_begin_notify(struct devlink *devlink)
 {
-       struct devlink_flash_notify params = { 0 };
+       struct devlink_flash_notify params = {};
 
        __devlink_flash_update_notify(devlink,
                                      DEVLINK_CMD_FLASH_UPDATE,
@@ -4126,7 +4178,7 @@ static void devlink_flash_update_begin_notify(struct devlink *devlink)
 
 static void devlink_flash_update_end_notify(struct devlink *devlink)
 {
-       struct devlink_flash_notify params = { 0 };
+       struct devlink_flash_notify params = {};
 
        __devlink_flash_update_notify(devlink,
                                      DEVLINK_CMD_FLASH_UPDATE_END,
@@ -4283,6 +4335,21 @@ static const struct devlink_param devlink_param_generic[] = {
                .name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME,
                .type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE,
        },
+       {
+               .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+               .name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME,
+               .type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE,
+       },
+       {
+               .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+               .name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME,
+               .type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE,
+       },
+       {
+               .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+               .name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME,
+               .type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE,
+       },
 };
 
 static int devlink_param_generic_verify(const struct devlink_param *param)
@@ -4553,13 +4620,18 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
        struct devlink_param_item *param_item;
        struct devlink *devlink;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err = 0;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry;
+
                mutex_lock(&devlink->lock);
                list_for_each_entry(param_item, &devlink->param_list, list) {
                        if (idx < start) {
@@ -4575,11 +4647,14 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
                                err = 0;
                        } else if (err) {
                                mutex_unlock(&devlink->lock);
+                               devlink_put(devlink);
                                goto out;
                        }
                        idx++;
                }
                mutex_unlock(&devlink->lock);
+retry:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -4821,13 +4896,18 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
        struct devlink_port *devlink_port;
        struct devlink *devlink;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err = 0;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry;
+
                mutex_lock(&devlink->lock);
                list_for_each_entry(devlink_port, &devlink->port_list, list) {
                        list_for_each_entry(param_item,
@@ -4847,12 +4927,15 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
                                        err = 0;
                                } else if (err) {
                                        mutex_unlock(&devlink->lock);
+                                       devlink_put(devlink);
                                        goto out;
                                }
                                idx++;
                        }
                }
                mutex_unlock(&devlink->lock);
+retry:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -5062,7 +5145,6 @@ static void devlink_nl_region_notify(struct devlink_region *region,
                                     struct devlink_snapshot *snapshot,
                                     enum devlink_command cmd)
 {
-       struct devlink *devlink = region->devlink;
        struct sk_buff *msg;
 
        WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
@@ -5071,8 +5153,9 @@ static void devlink_nl_region_notify(struct devlink_region *region,
        if (IS_ERR(msg))
                return;
 
-       genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
-                               msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+       genlmsg_multicast_netns(&devlink_nl_family,
+                               devlink_net(region->devlink), msg, 0,
+                               DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
 /**
@@ -5390,15 +5473,22 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
 {
        struct devlink *devlink;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
-       int err;
+       int err = 0;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry;
+
                err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink,
                                                               &idx, start);
+retry:
+               devlink_put(devlink);
                if (err)
                        goto out;
        }
@@ -5761,6 +5851,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
        nla_nest_end(skb, chunks_attr);
        genlmsg_end(skb, hdr);
        mutex_unlock(&devlink->lock);
+       devlink_put(devlink);
        mutex_unlock(&devlink_mutex);
 
        return skb->len;
@@ -5769,6 +5860,7 @@ nla_put_failure:
        genlmsg_cancel(skb, hdr);
 out_unlock:
        mutex_unlock(&devlink->lock);
+       devlink_put(devlink);
 out_dev:
        mutex_unlock(&devlink_mutex);
        return err;
@@ -5915,22 +6007,20 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
 {
        struct devlink *devlink;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err = 0;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
-               if (idx < start) {
-                       idx++;
-                       continue;
-               }
 
-               if (!devlink->ops->info_get) {
-                       idx++;
-                       continue;
-               }
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry;
+
+               if (idx < start || !devlink->ops->info_get)
+                       goto inc;
 
                mutex_lock(&devlink->lock);
                err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
@@ -5940,9 +6030,14 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
                mutex_unlock(&devlink->lock);
                if (err == -EOPNOTSUPP)
                        err = 0;
-               else if (err)
+               else if (err) {
+                       devlink_put(devlink);
                        break;
+               }
+inc:
                idx++;
+retry:
+               devlink_put(devlink);
        }
        mutex_unlock(&devlink_mutex);
 
@@ -6756,11 +6851,11 @@ EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy);
 
 static int
 devlink_nl_health_reporter_fill(struct sk_buff *msg,
-                               struct devlink *devlink,
                                struct devlink_health_reporter *reporter,
                                enum devlink_command cmd, u32 portid,
                                u32 seq, int flags)
 {
+       struct devlink *devlink = reporter->devlink;
        struct nlattr *reporter_attr;
        void *hdr;
 
@@ -6837,8 +6932,7 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
        if (!msg)
                return;
 
-       err = devlink_nl_health_reporter_fill(msg, reporter->devlink,
-                                             reporter, cmd, 0, 0, 0);
+       err = devlink_nl_health_reporter_fill(msg, reporter, cmd, 0, 0, 0);
        if (err) {
                nlmsg_free(msg);
                return;
@@ -7028,6 +7122,7 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
                goto unlock;
 
        reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
+       devlink_put(devlink);
        mutex_unlock(&devlink_mutex);
        return reporter;
 unlock:
@@ -7071,7 +7166,7 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
                goto out;
        }
 
-       err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
+       err = devlink_nl_health_reporter_fill(msg, reporter,
                                              DEVLINK_CMD_HEALTH_REPORTER_GET,
                                              info->snd_portid, info->snd_seq,
                                              0);
@@ -7094,13 +7189,18 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
        struct devlink_port *port;
        struct devlink *devlink;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry_rep;
+
                mutex_lock(&devlink->reporters_lock);
                list_for_each_entry(reporter, &devlink->reporter_list,
                                    list) {
@@ -7108,24 +7208,29 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
                                idx++;
                                continue;
                        }
-                       err = devlink_nl_health_reporter_fill(msg, devlink,
-                                                             reporter,
-                                                             DEVLINK_CMD_HEALTH_REPORTER_GET,
-                                                             NETLINK_CB(cb->skb).portid,
-                                                             cb->nlh->nlmsg_seq,
-                                                             NLM_F_MULTI);
+                       err = devlink_nl_health_reporter_fill(
+                               msg, reporter, DEVLINK_CMD_HEALTH_REPORTER_GET,
+                               NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+                               NLM_F_MULTI);
                        if (err) {
                                mutex_unlock(&devlink->reporters_lock);
+                               devlink_put(devlink);
                                goto out;
                        }
                        idx++;
                }
                mutex_unlock(&devlink->reporters_lock);
+retry_rep:
+               devlink_put(devlink);
        }
 
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry_port;
+
                mutex_lock(&devlink->lock);
                list_for_each_entry(port, &devlink->port_list, list) {
                        mutex_lock(&port->reporters_lock);
@@ -7134,14 +7239,15 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
                                        idx++;
                                        continue;
                                }
-                               err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
-                                                                     DEVLINK_CMD_HEALTH_REPORTER_GET,
-                                                                     NETLINK_CB(cb->skb).portid,
-                                                                     cb->nlh->nlmsg_seq,
-                                                                     NLM_F_MULTI);
+                               err = devlink_nl_health_reporter_fill(
+                                       msg, reporter,
+                                       DEVLINK_CMD_HEALTH_REPORTER_GET,
+                                       NETLINK_CB(cb->skb).portid,
+                                       cb->nlh->nlmsg_seq, NLM_F_MULTI);
                                if (err) {
                                        mutex_unlock(&port->reporters_lock);
                                        mutex_unlock(&devlink->lock);
+                                       devlink_put(devlink);
                                        goto out;
                                }
                                idx++;
@@ -7149,6 +7255,8 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
                        mutex_unlock(&port->reporters_lock);
                }
                mutex_unlock(&devlink->lock);
+retry_port:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -7677,13 +7785,18 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
        struct devlink_trap_item *trap_item;
        struct devlink *devlink;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry;
+
                mutex_lock(&devlink->lock);
                list_for_each_entry(trap_item, &devlink->trap_list, list) {
                        if (idx < start) {
@@ -7697,11 +7810,14 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
                                                   NLM_F_MULTI);
                        if (err) {
                                mutex_unlock(&devlink->lock);
+                               devlink_put(devlink);
                                goto out;
                        }
                        idx++;
                }
                mutex_unlock(&devlink->lock);
+retry:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -7896,13 +8012,18 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
        u32 portid = NETLINK_CB(cb->skb).portid;
        struct devlink *devlink;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry;
+
                mutex_lock(&devlink->lock);
                list_for_each_entry(group_item, &devlink->trap_group_list,
                                    list) {
@@ -7917,11 +8038,14 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
                                                         NLM_F_MULTI);
                        if (err) {
                                mutex_unlock(&devlink->lock);
+                               devlink_put(devlink);
                                goto out;
                        }
                        idx++;
                }
                mutex_unlock(&devlink->lock);
+retry:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -8202,13 +8326,18 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
        u32 portid = NETLINK_CB(cb->skb).portid;
        struct devlink *devlink;
        int start = cb->args[0];
+       unsigned long index;
        int idx = 0;
        int err;
 
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
                        continue;
+
+               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+                       goto retry;
+
                mutex_lock(&devlink->lock);
                list_for_each_entry(policer_item, &devlink->trap_policer_list,
                                    list) {
@@ -8223,11 +8352,14 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
                                                           NLM_F_MULTI);
                        if (err) {
                                mutex_unlock(&devlink->lock);
+                               devlink_put(devlink);
                                goto out;
                        }
                        idx++;
                }
                mutex_unlock(&devlink->lock);
+retry:
+               devlink_put(devlink);
        }
 out:
        mutex_unlock(&devlink_mutex);
@@ -8768,30 +8900,44 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
 }
 
 /**
- *     devlink_alloc - Allocate new devlink instance resources
+ *     devlink_alloc_ns - Allocate new devlink instance resources
+ *     in specific namespace
  *
  *     @ops: ops
  *     @priv_size: size of user private data
+ *     @net: net namespace
+ *     @dev: parent device
  *
  *     Allocate new devlink instance resources, including devlink index
  *     and name.
  */
-struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
+struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
+                                size_t priv_size, struct net *net,
+                                struct device *dev)
 {
        struct devlink *devlink;
+       static u32 last_id;
+       int ret;
 
-       if (WARN_ON(!ops))
-               return NULL;
-
+       WARN_ON(!ops || !dev);
        if (!devlink_reload_actions_valid(ops))
                return NULL;
 
        devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
        if (!devlink)
                return NULL;
+
+       ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b,
+                             &last_id, GFP_KERNEL);
+       if (ret < 0) {
+               kfree(devlink);
+               return NULL;
+       }
+
+       devlink->dev = dev;
        devlink->ops = ops;
        xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
-       __devlink_net_set(devlink, &init_net);
+       write_pnet(&devlink->_net, net);
        INIT_LIST_HEAD(&devlink->port_list);
        INIT_LIST_HEAD(&devlink->rate_list);
        INIT_LIST_HEAD(&devlink->sb_list);
@@ -8805,22 +8951,22 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
        INIT_LIST_HEAD(&devlink->trap_policer_list);
        mutex_init(&devlink->lock);
        mutex_init(&devlink->reporters_lock);
+       refcount_set(&devlink->refcount, 1);
+       init_completion(&devlink->comp);
+
        return devlink;
 }
-EXPORT_SYMBOL_GPL(devlink_alloc);
+EXPORT_SYMBOL_GPL(devlink_alloc_ns);
 
 /**
  *     devlink_register - Register devlink instance
  *
  *     @devlink: devlink
- *     @dev: parent device
  */
-int devlink_register(struct devlink *devlink, struct device *dev)
+int devlink_register(struct devlink *devlink)
 {
-       devlink->dev = dev;
-       devlink->registered = true;
        mutex_lock(&devlink_mutex);
-       list_add_tail(&devlink->list, &devlink_list);
+       xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
        devlink_notify(devlink, DEVLINK_CMD_NEW);
        mutex_unlock(&devlink_mutex);
        return 0;
@@ -8834,11 +8980,14 @@ EXPORT_SYMBOL_GPL(devlink_register);
  */
 void devlink_unregister(struct devlink *devlink)
 {
+       devlink_put(devlink);
+       wait_for_completion(&devlink->comp);
+
        mutex_lock(&devlink_mutex);
        WARN_ON(devlink_reload_supported(devlink->ops) &&
                devlink->reload_enabled);
        devlink_notify(devlink, DEVLINK_CMD_DEL);
-       list_del(&devlink->list);
+       xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
        mutex_unlock(&devlink_mutex);
 }
 EXPORT_SYMBOL_GPL(devlink_unregister);
@@ -8900,6 +9049,7 @@ void devlink_free(struct devlink *devlink)
        WARN_ON(!list_empty(&devlink->port_list));
 
        xa_destroy(&devlink->snapshot_ids);
+       xa_erase(&devlinks, devlink->index);
 
        kfree(devlink);
 }
@@ -8960,9 +9110,10 @@ int devlink_port_register(struct devlink *devlink,
                mutex_unlock(&devlink->lock);
                return -EEXIST;
        }
+
+       WARN_ON(devlink_port->devlink);
        devlink_port->devlink = devlink;
        devlink_port->index = port_index;
-       devlink_port->registered = true;
        spin_lock_init(&devlink_port->type_lock);
        INIT_LIST_HEAD(&devlink_port->reporter_list);
        mutex_init(&devlink_port->reporters_lock);
@@ -9001,7 +9152,7 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
                                    enum devlink_port_type type,
                                    void *type_dev)
 {
-       if (WARN_ON(!devlink_port->registered))
+       if (WARN_ON(!devlink_port->devlink))
                return;
        devlink_port_type_warn_cancel(devlink_port);
        spin_lock_bh(&devlink_port->type_lock);
@@ -9121,7 +9272,7 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port,
 {
        int ret;
 
-       if (WARN_ON(devlink_port->registered))
+       if (WARN_ON(devlink_port->devlink))
                return;
        devlink_port->attrs = *attrs;
        ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
@@ -9145,7 +9296,7 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro
        struct devlink_port_attrs *attrs = &devlink_port->attrs;
        int ret;
 
-       if (WARN_ON(devlink_port->registered))
+       if (WARN_ON(devlink_port->devlink))
                return;
        ret = __devlink_port_attrs_set(devlink_port,
                                       DEVLINK_PORT_FLAVOUR_PCI_PF);
@@ -9172,7 +9323,7 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
        struct devlink_port_attrs *attrs = &devlink_port->attrs;
        int ret;
 
-       if (WARN_ON(devlink_port->registered))
+       if (WARN_ON(devlink_port->devlink))
                return;
        ret = __devlink_port_attrs_set(devlink_port,
                                       DEVLINK_PORT_FLAVOUR_PCI_VF);
@@ -9200,7 +9351,7 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
        struct devlink_port_attrs *attrs = &devlink_port->attrs;
        int ret;
 
-       if (WARN_ON(devlink_port->registered))
+       if (WARN_ON(devlink_port->devlink))
                return;
        ret = __devlink_port_attrs_set(devlink_port,
                                       DEVLINK_PORT_FLAVOUR_PCI_SF);
@@ -9788,6 +9939,22 @@ static int devlink_param_verify(const struct devlink_param *param)
                return devlink_param_driver_verify(param);
 }
 
+static int __devlink_param_register_one(struct devlink *devlink,
+                                       unsigned int port_index,
+                                       struct list_head *param_list,
+                                       const struct devlink_param *param,
+                                       enum devlink_command reg_cmd)
+{
+       int err;
+
+       err = devlink_param_verify(param);
+       if (err)
+               return err;
+
+       return devlink_param_register_one(devlink, port_index,
+                                         param_list, param, reg_cmd);
+}
+
 static int __devlink_params_register(struct devlink *devlink,
                                     unsigned int port_index,
                                     struct list_head *param_list,
@@ -9802,12 +9969,8 @@ static int __devlink_params_register(struct devlink *devlink,
 
        mutex_lock(&devlink->lock);
        for (i = 0; i < params_count; i++, param++) {
-               err = devlink_param_verify(param);
-               if (err)
-                       goto rollback;
-
-               err = devlink_param_register_one(devlink, port_index,
-                                                param_list, param, reg_cmd);
+               err = __devlink_param_register_one(devlink, port_index,
+                                                  param_list, param, reg_cmd);
                if (err)
                        goto rollback;
        }
@@ -9879,6 +10042,43 @@ void devlink_params_unregister(struct devlink *devlink,
 }
 EXPORT_SYMBOL_GPL(devlink_params_unregister);
 
+/**
+ * devlink_param_register - register one configuration parameter
+ *
+ * @devlink: devlink
+ * @param: one configuration parameter
+ *
+ * Register the configuration parameter supported by the driver.
+ * Return: returns 0 on successful registration or error code otherwise.
+ */
+int devlink_param_register(struct devlink *devlink,
+                          const struct devlink_param *param)
+{
+       int err;
+
+       mutex_lock(&devlink->lock);
+       err = __devlink_param_register_one(devlink, 0, &devlink->param_list,
+                                          param, DEVLINK_CMD_PARAM_NEW);
+       mutex_unlock(&devlink->lock);
+       return err;
+}
+EXPORT_SYMBOL_GPL(devlink_param_register);
+
+/**
+ * devlink_param_unregister - unregister one configuration parameter
+ * @devlink: devlink
+ * @param: configuration parameter to unregister
+ */
+void devlink_param_unregister(struct devlink *devlink,
+                             const struct devlink_param *param)
+{
+       mutex_lock(&devlink->lock);
+       devlink_param_unregister_one(devlink, 0, &devlink->param_list, param,
+                                    DEVLINK_CMD_PARAM_DEL);
+       mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_param_unregister);
+
 /**
  *     devlink_params_publish - publish configuration parameters
  *
@@ -9921,6 +10121,54 @@ void devlink_params_unpublish(struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devlink_params_unpublish);
 
+/**
+ * devlink_param_publish - publish one configuration parameter
+ *
+ * @devlink: devlink
+ * @param: one configuration parameter
+ *
+ * Publish previously registered configuration parameter.
+ */
+void devlink_param_publish(struct devlink *devlink,
+                          const struct devlink_param *param)
+{
+       struct devlink_param_item *param_item;
+
+       list_for_each_entry(param_item, &devlink->param_list, list) {
+               if (param_item->param != param || param_item->published)
+                       continue;
+               param_item->published = true;
+               devlink_param_notify(devlink, 0, param_item,
+                                    DEVLINK_CMD_PARAM_NEW);
+               break;
+       }
+}
+EXPORT_SYMBOL_GPL(devlink_param_publish);
+
+/**
+ * devlink_param_unpublish - unpublish one configuration parameter
+ *
+ * @devlink: devlink
+ * @param: one configuration parameter
+ *
+ * Unpublish previously registered configuration parameter.
+ */
+void devlink_param_unpublish(struct devlink *devlink,
+                            const struct devlink_param *param)
+{
+       struct devlink_param_item *param_item;
+
+       list_for_each_entry(param_item, &devlink->param_list, list) {
+               if (param_item->param != param || !param_item->published)
+                       continue;
+               param_item->published = false;
+               devlink_param_notify(devlink, 0, param_item,
+                                    DEVLINK_CMD_PARAM_DEL);
+               break;
+       }
+}
+EXPORT_SYMBOL_GPL(devlink_param_unpublish);
+
 /**
  *     devlink_port_params_register - register port configuration parameters
  *
@@ -11276,23 +11524,29 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 {
        struct devlink *devlink;
        u32 actions_performed;
+       unsigned long index;
        int err;
 
        /* In case network namespace is getting destroyed, reload
         * all devlink instances from this namespace into init_net.
         */
        mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (net_eq(devlink_net(devlink), net)) {
-                       if (WARN_ON(!devlink_reload_supported(devlink->ops)))
-                               continue;
-                       err = devlink_reload(devlink, &init_net,
-                                            DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
-                                            DEVLINK_RELOAD_LIMIT_UNSPEC,
-                                            &actions_performed, NULL);
-                       if (err && err != -EOPNOTSUPP)
-                               pr_warn("Failed to reload devlink instance into init_net\n");
-               }
+       xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+               if (!devlink_try_get(devlink))
+                       continue;
+
+               if (!net_eq(devlink_net(devlink), net))
+                       goto retry;
+
+               WARN_ON(!devlink_reload_supported(devlink->ops));
+               err = devlink_reload(devlink, &init_net,
+                                    DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+                                    DEVLINK_RELOAD_LIMIT_UNSPEC,
+                                    &actions_performed, NULL);
+               if (err && err != -EOPNOTSUPP)
+                       pr_warn("Failed to reload devlink instance into init_net\n");
+retry:
+               devlink_put(devlink);
        }
        mutex_unlock(&devlink_mutex);
 }
index ead2a8a..49442ca 100644 (file)
@@ -850,8 +850,7 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata)
        }
 
        hw_metadata->input_dev = metadata->input_dev;
-       if (hw_metadata->input_dev)
-               dev_hold(hw_metadata->input_dev);
+       dev_hold(hw_metadata->input_dev);
 
        return hw_metadata;
 
@@ -867,8 +866,7 @@ free_hw_metadata:
 static void
 net_dm_hw_metadata_free(const struct devlink_trap_metadata *hw_metadata)
 {
-       if (hw_metadata->input_dev)
-               dev_put(hw_metadata->input_dev);
+       dev_put(hw_metadata->input_dev);
        kfree(hw_metadata->fa_cookie);
        kfree(hw_metadata->trap_name);
        kfree(hw_metadata->trap_group_name);
index fb3bcba..497ef9b 100644 (file)
@@ -49,8 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
              unsigned short flags)
 {
        dst->dev = dev;
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
        dst->ops = ops;
        dst_init_metrics(dst, dst_default_metrics.metrics, true);
        dst->expires = 0UL;
@@ -118,8 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
 
        if (dst->ops->destroy)
                dst->ops->destroy(dst);
-       if (dst->dev)
-               dev_put(dst->dev);
+       dev_put(dst->dev);
 
        lwtstate_put(dst->lwtstate);
 
index a9f9379..79df7cd 100644 (file)
@@ -57,7 +57,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 {
        struct fib_rule *r;
 
-       r = kzalloc(ops->rule_size, GFP_KERNEL);
+       r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
        if (r == NULL)
                return -ENOMEM;
 
@@ -541,7 +541,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
                        goto errout;
        }
 
-       nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
+       nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
        if (!nlrule) {
                err = -ENOMEM;
                goto errout;
index d70187c..2e32cee 100644 (file)
@@ -77,6 +77,7 @@
 #include <net/transp_v6.h>
 #include <linux/btf_ids.h>
 #include <net/tls.h>
+#include <net/xdp.h>
 
 static const struct bpf_func_proto *
 bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -113,7 +114,7 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
  * Run the eBPF program and then cut skb->data to correct size returned by
  * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
  * than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
+ * wrapper to bpf_prog_run. It returns 0 if the packet should
  * be accepted or -EPERM if the packet should be tossed.
  *
  */
@@ -2179,17 +2180,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
        skb->tstamp = 0;
 
        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, hh_len);
-               if (unlikely(!skb2)) {
-                       kfree_skb(skb);
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb)
                        return -ENOMEM;
-               }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
        }
 
        rcu_read_lock_bh();
@@ -2213,8 +2206,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
        }
        rcu_read_unlock_bh();
        if (dst)
-               IP6_INC_STATS(dev_net(dst->dev),
-                             ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+               IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 out_drop:
        kfree_skb(skb);
        return -ENETDOWN;
@@ -2286,17 +2278,9 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
        skb->tstamp = 0;
 
        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, hh_len);
-               if (unlikely(!skb2)) {
-                       kfree_skb(skb);
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb)
                        return -ENOMEM;
-               }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
        }
 
        rcu_read_lock_bh();
@@ -3880,8 +3864,7 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
        if (unlikely(meta < xdp_frame_end ||
                     meta > xdp->data))
                return -EINVAL;
-       if (unlikely((metalen & (sizeof(__u32) - 1)) ||
-                    (metalen > 32)))
+       if (unlikely(xdp_metalen_invalid(metalen)))
                return -EACCES;
 
        xdp->data_meta = meta;
@@ -3950,6 +3933,31 @@ void bpf_clear_redirect_map(struct bpf_map *map)
        }
 }
 
+DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp)
+{
+       struct net_device *master, *slave;
+       struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+       master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
+       slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
+       if (slave && slave != xdp->rxq->dev) {
+               /* The target device is different from the receiving device, so
+                * redirect it to the new device.
+                * Using XDP_REDIRECT gets the correct behaviour from XDP enabled
+                * drivers to unmap the packet from their rx ring.
+                */
+               ri->tgt_index = slave->ifindex;
+               ri->map_id = INT_MAX;
+               ri->map_type = BPF_MAP_TYPE_UNSPEC;
+               return XDP_REDIRECT;
+       }
+       return XDP_TX;
+}
+EXPORT_SYMBOL_GPL(xdp_master_redirect);
+
 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
                    struct bpf_prog *xdp_prog)
 {
@@ -4040,8 +4048,12 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
                        goto err;
                consume_skb(skb);
                break;
+       case BPF_MAP_TYPE_CPUMAP:
+               err = cpu_map_generic_redirect(fwd, skb);
+               if (unlikely(err))
+                       goto err;
+               break;
        default:
-               /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
                err = -EBADRQC;
                goto err;
        }
@@ -4664,6 +4676,30 @@ static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
        .arg1_type      = ARG_PTR_TO_CTX_OR_NULL,
 };
 
+BPF_CALL_1(bpf_get_netns_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
+{
+       return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sock_ops_proto = {
+       .func           = bpf_get_netns_cookie_sock_ops,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX_OR_NULL,
+};
+
+BPF_CALL_1(bpf_get_netns_cookie_sk_msg, struct sk_msg *, ctx)
+{
+       return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sk_msg_proto = {
+       .func           = bpf_get_netns_cookie_sk_msg,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX_OR_NULL,
+};
+
 BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
 {
        struct sock *sk = sk_to_full_sk(skb->sk);
@@ -5012,6 +5048,46 @@ err_clear:
        return -EINVAL;
 }
 
+BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
+          int, optname, char *, optval, int, optlen)
+{
+       if (level == SOL_TCP && optname == TCP_CONGESTION) {
+               if (optlen >= sizeof("cdg") - 1 &&
+                   !strncmp("cdg", optval, optlen))
+                       return -ENOTSUPP;
+       }
+
+       return _bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_setsockopt_proto = {
+       .func           = bpf_sk_setsockopt,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_MEM,
+       .arg5_type      = ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level,
+          int, optname, char *, optval, int, optlen)
+{
+       return _bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_getsockopt_proto = {
+       .func           = bpf_sk_getsockopt,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg5_type      = ARG_CONST_SIZE,
+};
+
 BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
           int, level, int, optname, char *, optval, int, optlen)
 {
@@ -7445,6 +7521,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
                return &bpf_sk_storage_delete_proto;
+       case BPF_FUNC_get_netns_cookie:
+               return &bpf_get_netns_cookie_sock_ops_proto;
 #ifdef CONFIG_INET
        case BPF_FUNC_load_hdr_opt:
                return &bpf_sock_ops_load_hdr_opt_proto;
@@ -7491,6 +7569,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
                return &bpf_sk_storage_delete_proto;
+       case BPF_FUNC_get_netns_cookie:
+               return &bpf_get_netns_cookie_sk_msg_proto;
 #ifdef CONFIG_CGROUPS
        case BPF_FUNC_get_current_cgroup_id:
                return &bpf_get_current_cgroup_id_proto;
@@ -10069,7 +10149,7 @@ struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
        enum sk_action action;
 
        bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
-       action = BPF_PROG_RUN(prog, &reuse_kern);
+       action = bpf_prog_run(prog, &reuse_kern);
 
        if (action == SK_PASS)
                return reuse_kern.selected_sk;
index 4b2415d..bac0184 100644 (file)
@@ -1056,8 +1056,10 @@ proto_again:
                                                              FLOW_DISSECTOR_KEY_IPV4_ADDRS,
                                                              target_container);
 
-                       memcpy(&key_addrs->v4addrs, &iph->saddr,
-                              sizeof(key_addrs->v4addrs));
+                       memcpy(&key_addrs->v4addrs.src, &iph->saddr,
+                              sizeof(key_addrs->v4addrs.src));
+                       memcpy(&key_addrs->v4addrs.dst, &iph->daddr,
+                              sizeof(key_addrs->v4addrs.dst));
                        key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
                }
 
@@ -1101,8 +1103,10 @@ proto_again:
                                                              FLOW_DISSECTOR_KEY_IPV6_ADDRS,
                                                              target_container);
 
-                       memcpy(&key_addrs->v6addrs, &iph->saddr,
-                              sizeof(key_addrs->v6addrs));
+                       memcpy(&key_addrs->v6addrs.src, &iph->saddr,
+                              sizeof(key_addrs->v6addrs.src));
+                       memcpy(&key_addrs->v6addrs.dst, &iph->daddr,
+                              sizeof(key_addrs->v6addrs.dst));
                        key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                }
 
index 715b67f..6beaea1 100644 (file)
@@ -321,13 +321,13 @@ EXPORT_SYMBOL(flow_block_cb_setup_simple);
 static DEFINE_MUTEX(flow_indr_block_lock);
 static LIST_HEAD(flow_block_indr_list);
 static LIST_HEAD(flow_block_indr_dev_list);
+static LIST_HEAD(flow_indir_dev_list);
 
 struct flow_indr_dev {
        struct list_head                list;
        flow_indr_block_bind_cb_t       *cb;
        void                            *cb_priv;
        refcount_t                      refcnt;
-       struct rcu_head                 rcu;
 };
 
 static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb,
@@ -346,6 +346,33 @@ static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb,
        return indr_dev;
 }
 
+struct flow_indir_dev_info {
+       void *data;
+       struct net_device *dev;
+       struct Qdisc *sch;
+       enum tc_setup_type type;
+       void (*cleanup)(struct flow_block_cb *block_cb);
+       struct list_head list;
+       enum flow_block_command command;
+       enum flow_block_binder_type binder_type;
+       struct list_head *cb_list;
+};
+
+static void existing_qdiscs_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
+{
+       struct flow_block_offload bo;
+       struct flow_indir_dev_info *cur;
+
+       list_for_each_entry(cur, &flow_indir_dev_list, list) {
+               memset(&bo, 0, sizeof(bo));
+               bo.command = cur->command;
+               bo.binder_type = cur->binder_type;
+               INIT_LIST_HEAD(&bo.cb_list);
+               cb(cur->dev, cur->sch, cb_priv, cur->type, &bo, cur->data, cur->cleanup);
+               list_splice(&bo.cb_list, cur->cb_list);
+       }
+}
+
 int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
 {
        struct flow_indr_dev *indr_dev;
@@ -367,6 +394,7 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
        }
 
        list_add(&indr_dev->list, &flow_block_indr_dev_list);
+       existing_qdiscs_register(cb, cb_priv);
        mutex_unlock(&flow_indr_block_lock);
 
        return 0;
@@ -463,7 +491,59 @@ out:
 }
 EXPORT_SYMBOL(flow_indr_block_cb_alloc);
 
-int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
+static struct flow_indir_dev_info *find_indir_dev(void *data)
+{
+       struct flow_indir_dev_info *cur;
+
+       list_for_each_entry(cur, &flow_indir_dev_list, list) {
+               if (cur->data == data)
+                       return cur;
+       }
+       return NULL;
+}
+
+static int indir_dev_add(void *data, struct net_device *dev, struct Qdisc *sch,
+                        enum tc_setup_type type, void (*cleanup)(struct flow_block_cb *block_cb),
+                        struct flow_block_offload *bo)
+{
+       struct flow_indir_dev_info *info;
+
+       info = find_indir_dev(data);
+       if (info)
+               return -EEXIST;
+
+       info = kzalloc(sizeof(*info), GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+
+       info->data = data;
+       info->dev = dev;
+       info->sch = sch;
+       info->type = type;
+       info->cleanup = cleanup;
+       info->command = bo->command;
+       info->binder_type = bo->binder_type;
+       info->cb_list = bo->cb_list_head;
+
+       list_add(&info->list, &flow_indir_dev_list);
+       return 0;
+}
+
+static int indir_dev_remove(void *data)
+{
+       struct flow_indir_dev_info *info;
+
+       info = find_indir_dev(data);
+       if (!info)
+               return -ENOENT;
+
+       list_del(&info->list);
+
+       kfree(info);
+       return 0;
+}
+
+int flow_indr_dev_setup_offload(struct net_device *dev,        struct Qdisc *sch,
                                enum tc_setup_type type, void *data,
                                struct flow_block_offload *bo,
                                void (*cleanup)(struct flow_block_cb *block_cb))
@@ -471,6 +551,12 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
        struct flow_indr_dev *this;
 
        mutex_lock(&flow_indr_block_lock);
+
+       if (bo->command == FLOW_BLOCK_BIND)
+               indir_dev_add(data, dev, sch, type, cleanup, bo);
+       else if (bo->command == FLOW_BLOCK_UNBIND)
+               indir_dev_remove(data);
+
        list_for_each_entry(this, &flow_block_indr_dev_list, list)
                this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup);
 
index 8ec7d13..2820aca 100644 (file)
@@ -23,6 +23,9 @@
 #include <net/ip6_fib.h>
 #include <net/rtnh.h>
 
+DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
+EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
+
 #ifdef CONFIG_MODULES
 
 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
@@ -43,6 +46,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
                return "SEG6LOCAL";
        case LWTUNNEL_ENCAP_RPL:
                return "RPL";
+       case LWTUNNEL_ENCAP_IOAM6:
+               return "IOAM6";
        case LWTUNNEL_ENCAP_IP6:
        case LWTUNNEL_ENCAP_IP:
        case LWTUNNEL_ENCAP_NONE:
index 53e85c7..2d5bc3a 100644 (file)
@@ -741,12 +741,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
        write_pnet(&n->net, net);
        memcpy(n->key, pkey, key_len);
        n->dev = dev;
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
 
        if (tbl->pconstructor && tbl->pconstructor(n)) {
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
                kfree(n);
                n = NULL;
                goto out;
@@ -778,8 +776,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
                        write_unlock_bh(&tbl->lock);
                        if (tbl->pdestructor)
                                tbl->pdestructor(n);
-                       if (n->dev)
-                               dev_put(n->dev);
+                       dev_put(n->dev);
                        kfree(n);
                        return 0;
                }
@@ -812,8 +809,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
                n->next = NULL;
                if (tbl->pdestructor)
                        tbl->pdestructor(n);
-               if (n->dev)
-                       dev_put(n->dev);
+               dev_put(n->dev);
                kfree(n);
        }
        return -ENOENT;
@@ -1662,8 +1658,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
        list_del(&parms->list);
        parms->dead = 1;
        write_unlock_bh(&tbl->lock);
-       if (parms->dev)
-               dev_put(parms->dev);
+       dev_put(parms->dev);
        call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
 }
 EXPORT_SYMBOL(neigh_parms_release);
@@ -2533,6 +2528,13 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
                return false;
 
        master = dev ? netdev_master_upper_dev_get(dev) : NULL;
+
+       /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
+        * invalid value for ifindex to denote "no master".
+        */
+       if (master_idx == -1)
+               return !!master;
+
        if (!master || master->ifindex != master_idx)
                return true;
 
@@ -3315,12 +3317,13 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
        struct neigh_statistics *st = v;
 
        if (v == SEQ_START_TOKEN) {
-               seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
+               seq_puts(seq, "entries  allocs   destroys hash_grows lookups  hits     res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
                return 0;
        }
 
-       seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
-                       "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
+       seq_printf(seq, "%08x %08lx %08lx %08lx   %08lx %08lx %08lx   "
+                       "%08lx         %08lx         %08lx         "
+                       "%08lx       %08lx            %08lx\n",
                   atomic_read(&tbl->entries),
 
                   st->allocs,
index d8b9dba..eab5fc8 100644 (file)
@@ -77,8 +77,8 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
        struct rtnl_link_stats64 temp;
        const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
 
-       seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
-                  "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
+       seq_printf(seq, "%9s: %16llu %12llu %4llu %6llu %4llu %5llu %10llu %9llu "
+                  "%16llu %12llu %4llu %6llu %4llu %5llu %7llu %10llu\n",
                   dev->name, stats->rx_bytes, stats->rx_packets,
                   stats->rx_errors,
                   stats->rx_dropped + stats->rx_missed_errors,
@@ -103,11 +103,11 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 static int dev_seq_show(struct seq_file *seq, void *v)
 {
        if (v == SEQ_START_TOKEN)
-               seq_puts(seq, "Inter-|   Receive                            "
-                             "                    |  Transmit\n"
-                             " face |bytes    packets errs drop fifo frame "
-                             "compressed multicast|bytes    packets errs "
-                             "drop fifo colls carrier compressed\n");
+               seq_puts(seq, "Interface|                            Receive                   "
+                             "                    |                                 Transmit\n"
+                             "         |            bytes      packets errs   drop fifo frame "
+                             "compressed multicast|            bytes      packets errs "
+                             "  drop fifo colls carrier compressed\n");
        else
                dev_seq_printf_stats(seq, v);
        return 0;
@@ -259,14 +259,14 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
        struct packet_type *pt = v;
 
        if (v == SEQ_START_TOKEN)
-               seq_puts(seq, "Type Device      Function\n");
+               seq_puts(seq, "Type      Device      Function\n");
        else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
                if (pt->type == htons(ETH_P_ALL))
                        seq_puts(seq, "ALL ");
                else
                        seq_printf(seq, "%04x", ntohs(pt->type));
 
-               seq_printf(seq, " %-8s %ps\n",
+               seq_printf(seq, "      %-9s   %ps\n",
                           pt->dev ? pt->dev->name : "", pt->func);
        }
 
@@ -327,12 +327,14 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
        struct netdev_hw_addr *ha;
        struct net_device *dev = v;
 
-       if (v == SEQ_START_TOKEN)
+       if (v == SEQ_START_TOKEN) {
+               seq_puts(seq, "Ifindex Interface Refcount Global_use Address\n");
                return 0;
+       }
 
        netif_addr_lock_bh(dev);
        netdev_for_each_mc_addr(ha, dev) {
-               seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n",
+               seq_printf(seq, "%-7d %-9s %-8d %-10d %*phN\n",
                           dev->ifindex, dev->name,
                           ha->refcount, ha->global_use,
                           (int)dev->addr_len, ha->addr);
index 9b5a767..a448a9b 100644 (file)
@@ -98,7 +98,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
        }
 
        ng = net_alloc_generic();
-       if (ng == NULL)
+       if (!ng)
                return -ENOMEM;
 
        /*
@@ -148,13 +148,6 @@ out:
        return err;
 }
 
-static void ops_free(const struct pernet_operations *ops, struct net *net)
-{
-       if (ops->id && ops->size) {
-               kfree(net_generic(net, *ops->id));
-       }
-}
-
 static void ops_pre_exit_list(const struct pernet_operations *ops,
                              struct list_head *net_exit_list)
 {
@@ -184,7 +177,7 @@ static void ops_free_list(const struct pernet_operations *ops,
        struct net *net;
        if (ops->size && ops->id) {
                list_for_each_entry(net, net_exit_list, exit_list)
-                       ops_free(ops, net);
+                       kfree(net_generic(net, *ops->id));
        }
 }
 
@@ -433,15 +426,18 @@ out_free:
 
 static void net_free(struct net *net)
 {
-       kfree(rcu_access_pointer(net->gen));
-       kmem_cache_free(net_cachep, net);
+       if (refcount_dec_and_test(&net->passive)) {
+               kfree(rcu_access_pointer(net->gen));
+               kmem_cache_free(net_cachep, net);
+       }
 }
 
 void net_drop_ns(void *p)
 {
-       struct net *ns = p;
-       if (ns && refcount_dec_and_test(&ns->passive))
-               net_free(ns);
+       struct net *net = (struct net *)p;
+
+       if (net)
+               net_free(net);
 }
 
 struct net *copy_net_ns(unsigned long flags,
@@ -479,7 +475,7 @@ struct net *copy_net_ns(unsigned long flags,
 put_userns:
                key_remove_domain(net->key_domain);
                put_user_ns(user_ns);
-               net_drop_ns(net);
+               net_free(net);
 dec_ucounts:
                dec_net_namespaces(ucounts);
                return ERR_PTR(rv);
@@ -611,7 +607,7 @@ static void cleanup_net(struct work_struct *work)
                dec_net_namespaces(net->ucounts);
                key_remove_domain(net->key_domain);
                put_user_ns(net->user_ns);
-               net_drop_ns(net);
+               net_free(net);
        }
 }
 
@@ -1120,6 +1116,14 @@ static int __init net_ns_init(void)
 
 pure_initcall(net_ns_init);
 
+static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
+{
+       ops_pre_exit_list(ops, net_exit_list);
+       synchronize_rcu();
+       ops_exit_list(ops, net_exit_list);
+       ops_free_list(ops, net_exit_list);
+}
+
 #ifdef CONFIG_NET_NS
 static int __register_pernet_operations(struct list_head *list,
                                        struct pernet_operations *ops)
@@ -1145,10 +1149,7 @@ static int __register_pernet_operations(struct list_head *list,
 out_undo:
        /* If I have an error cleanup all namespaces I initialized */
        list_del(&ops->list);
-       ops_pre_exit_list(ops, &net_exit_list);
-       synchronize_rcu();
-       ops_exit_list(ops, &net_exit_list);
-       ops_free_list(ops, &net_exit_list);
+       free_exit_list(ops, &net_exit_list);
        return error;
 }
 
@@ -1161,10 +1162,8 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
        /* See comment in __register_pernet_operations() */
        for_each_net(net)
                list_add_tail(&net->exit_list, &net_exit_list);
-       ops_pre_exit_list(ops, &net_exit_list);
-       synchronize_rcu();
-       ops_exit_list(ops, &net_exit_list);
-       ops_free_list(ops, &net_exit_list);
+
+       free_exit_list(ops, &net_exit_list);
 }
 
 #else
@@ -1187,10 +1186,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
        } else {
                LIST_HEAD(net_exit_list);
                list_add(&init_net.exit_list, &net_exit_list);
-               ops_pre_exit_list(ops, &net_exit_list);
-               synchronize_rcu();
-               ops_exit_list(ops, &net_exit_list);
-               ops_free_list(ops, &net_exit_list);
+               free_exit_list(ops, &net_exit_list);
        }
 }
 
index 8ab7b40..1a69784 100644 (file)
@@ -24,6 +24,8 @@
 #define DEFER_TIME (msecs_to_jiffies(1000))
 #define DEFER_WARN_INTERVAL (60 * HZ)
 
+#define BIAS_MAX       LONG_MAX
+
 static int page_pool_init(struct page_pool *pool,
                          const struct page_pool_params *params)
 {
@@ -67,6 +69,10 @@ static int page_pool_init(struct page_pool *pool,
                 */
        }
 
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
+           pool->p.flags & PP_FLAG_PAGE_FRAG)
+               return -EINVAL;
+
        if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
                return -ENOMEM;
 
@@ -206,6 +212,19 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
        return true;
 }
 
+static void page_pool_set_pp_info(struct page_pool *pool,
+                                 struct page *page)
+{
+       page->pp = pool;
+       page->pp_magic |= PP_SIGNATURE;
+}
+
+static void page_pool_clear_pp_info(struct page *page)
+{
+       page->pp_magic = 0;
+       page->pp = NULL;
+}
+
 static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
                                                 gfp_t gfp)
 {
@@ -222,7 +241,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
                return NULL;
        }
 
-       page->pp_magic |= PP_SIGNATURE;
+       page_pool_set_pp_info(pool, page);
 
        /* Track how many pages are held 'in-flight' */
        pool->pages_state_hold_cnt++;
@@ -266,7 +285,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
                        put_page(page);
                        continue;
                }
-               page->pp_magic |= PP_SIGNATURE;
+
+               page_pool_set_pp_info(pool, page);
                pool->alloc.cache[pool->alloc.count++] = page;
                /* Track how many pages are held 'in-flight' */
                pool->pages_state_hold_cnt++;
@@ -345,12 +365,12 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
                             DMA_ATTR_SKIP_CPU_SYNC);
        page_pool_set_dma_addr(page, 0);
 skip_dma_unmap:
-       page->pp_magic = 0;
+       page_pool_clear_pp_info(page);
 
        /* This may be the last page returned, releasing the pool, so
         * it is not safe to reference pool afterwards.
         */
-       count = atomic_inc_return(&pool->pages_state_release_cnt);
+       count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
        trace_page_pool_state_release(pool, page, count);
 }
 EXPORT_SYMBOL(page_pool_release_page);
@@ -405,6 +425,11 @@ static __always_inline struct page *
 __page_pool_put_page(struct page_pool *pool, struct page *page,
                     unsigned int dma_sync_size, bool allow_direct)
 {
+       /* It is not the last user for the page frag case */
+       if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
+           page_pool_atomic_sub_frag_count_return(page, 1))
+               return NULL;
+
        /* This allocator is optimized for the XDP mode that uses
         * one-frame-per-page, but have fallbacks that act like the
         * regular page allocator APIs.
@@ -497,6 +522,84 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
 }
 EXPORT_SYMBOL(page_pool_put_page_bulk);
 
+static struct page *page_pool_drain_frag(struct page_pool *pool,
+                                        struct page *page)
+{
+       long drain_count = BIAS_MAX - pool->frag_users;
+
+       /* Some user is still using the page frag */
+       if (likely(page_pool_atomic_sub_frag_count_return(page,
+                                                         drain_count)))
+               return NULL;
+
+       if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
+               if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
+                       page_pool_dma_sync_for_device(pool, page, -1);
+
+               return page;
+       }
+
+       page_pool_return_page(pool, page);
+       return NULL;
+}
+
+static void page_pool_free_frag(struct page_pool *pool)
+{
+       long drain_count = BIAS_MAX - pool->frag_users;
+       struct page *page = pool->frag_page;
+
+       pool->frag_page = NULL;
+
+       if (!page ||
+           page_pool_atomic_sub_frag_count_return(page, drain_count))
+               return;
+
+       page_pool_return_page(pool, page);
+}
+
+struct page *page_pool_alloc_frag(struct page_pool *pool,
+                                 unsigned int *offset,
+                                 unsigned int size, gfp_t gfp)
+{
+       unsigned int max_size = PAGE_SIZE << pool->p.order;
+       struct page *page = pool->frag_page;
+
+       if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+                   size > max_size))
+               return NULL;
+
+       size = ALIGN(size, dma_get_cache_alignment());
+       *offset = pool->frag_offset;
+
+       if (page && *offset + size > max_size) {
+               page = page_pool_drain_frag(pool, page);
+               if (page)
+                       goto frag_reset;
+       }
+
+       if (!page) {
+               page = page_pool_alloc_pages(pool, gfp);
+               if (unlikely(!page)) {
+                       pool->frag_page = NULL;
+                       return NULL;
+               }
+
+               pool->frag_page = page;
+
+frag_reset:
+               pool->frag_users = 1;
+               *offset = 0;
+               pool->frag_offset = size;
+               page_pool_set_frag_count(page, BIAS_MAX);
+               return page;
+       }
+
+       pool->frag_users++;
+       pool->frag_offset = *offset + size;
+       return page;
+}
+EXPORT_SYMBOL(page_pool_alloc_frag);
+
 static void page_pool_empty_ring(struct page_pool *pool)
 {
        struct page *page;
@@ -602,6 +705,8 @@ void page_pool_destroy(struct page_pool *pool)
        if (!page_pool_put(pool))
                return;
 
+       page_pool_free_frag(pool);
+
        if (!page_pool_release(pool))
                return;
 
@@ -652,7 +757,6 @@ bool page_pool_return_skb_page(struct page *page)
         * The page will be returned to the pool here regardless of the
         * 'flipped' fragment being in use or not.
         */
-       page->pp = NULL;
        page_pool_put_full_page(pp, page, false);
 
        return true;
index 7e258d2..9e5a324 100644 (file)
 #define IP_NAME_SZ 32
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
 #define MPLS_STACK_BOTTOM htonl(0x00000100)
+/* Max number of internet mix entries that can be specified in imix_weights. */
+#define MAX_IMIX_ENTRIES 20
+#define IMIX_PRECISION 100 /* Precision of IMIX distribution */
 
 #define func_enter() pr_debug("entering %s\n", __func__);
 
@@ -242,6 +245,12 @@ static char *pkt_flag_names[] = {
 #define VLAN_TAG_SIZE(x) ((x)->vlan_id == 0xffff ? 0 : 4)
 #define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4)
 
+struct imix_pkt {
+       u64 size;
+       u64 weight;
+       u64 count_so_far;
+};
+
 struct flow_state {
        __be32 cur_daddr;
        int count;
@@ -343,6 +352,12 @@ struct pktgen_dev {
        __u8 traffic_class;  /* ditto for the (former) Traffic Class in IPv6
                                (see RFC 3260, sec. 4) */
 
+       /* IMIX */
+       unsigned int n_imix_entries;
+       struct imix_pkt imix_entries[MAX_IMIX_ENTRIES];
+       /* Maps 0-IMIX_PRECISION range to imix_entry based on probability*/
+       __u8 imix_distribution[IMIX_PRECISION];
+
        /* MPLS */
        unsigned int nr_labels; /* Depth of stack, 0 = no MPLS */
        __be32 labels[MAX_MPLS_LABELS];
@@ -471,6 +486,7 @@ static void pktgen_stop_all_threads(struct pktgen_net *pn);
 
 static void pktgen_stop(struct pktgen_thread *t);
 static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
+static void fill_imix_distribution(struct pktgen_dev *pkt_dev);
 
 /* Module parameters, defaults. */
 static int pg_count_d __read_mostly = 1000;
@@ -552,6 +568,16 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
                   (unsigned long long)pkt_dev->count, pkt_dev->min_pkt_size,
                   pkt_dev->max_pkt_size);
 
+       if (pkt_dev->n_imix_entries > 0) {
+               seq_puts(seq, "     imix_weights: ");
+               for (i = 0; i < pkt_dev->n_imix_entries; i++) {
+                       seq_printf(seq, "%llu,%llu ",
+                                  pkt_dev->imix_entries[i].size,
+                                  pkt_dev->imix_entries[i].weight);
+               }
+               seq_puts(seq, "\n");
+       }
+
        seq_printf(seq,
                   "     frags: %d  delay: %llu  clone_skb: %d  ifname: %s\n",
                   pkt_dev->nfrags, (unsigned long long) pkt_dev->delay,
@@ -669,6 +695,18 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
                   (unsigned long long)pkt_dev->sofar,
                   (unsigned long long)pkt_dev->errors);
 
+       if (pkt_dev->n_imix_entries > 0) {
+               int i;
+
+               seq_puts(seq, "     imix_size_counts: ");
+               for (i = 0; i < pkt_dev->n_imix_entries; i++) {
+                       seq_printf(seq, "%llu,%llu ",
+                                  pkt_dev->imix_entries[i].size,
+                                  pkt_dev->imix_entries[i].count_so_far);
+               }
+               seq_puts(seq, "\n");
+       }
+
        seq_printf(seq,
                   "     started: %lluus  stopped: %lluus idle: %lluus\n",
                   (unsigned long long) ktime_to_us(pkt_dev->started_at),
@@ -792,6 +830,62 @@ done_str:
        return i;
 }
 
+/* Parses imix entries from user buffer.
+ * The user buffer should consist of imix entries separated by spaces
+ * where each entry consists of size and weight delimited by commas.
+ * "size1,weight_1 size2,weight_2 ... size_n,weight_n" for example.
+ */
+static ssize_t get_imix_entries(const char __user *buffer,
+                               struct pktgen_dev *pkt_dev)
+{
+       const int max_digits = 10;
+       int i = 0;
+       long len;
+       char c;
+
+       pkt_dev->n_imix_entries = 0;
+
+       do {
+               unsigned long weight;
+               unsigned long size;
+
+               len = num_arg(&buffer[i], max_digits, &size);
+               if (len < 0)
+                       return len;
+               i += len;
+               if (get_user(c, &buffer[i]))
+                       return -EFAULT;
+               /* Check for comma between size_i and weight_i */
+               if (c != ',')
+                       return -EINVAL;
+               i++;
+
+               if (size < 14 + 20 + 8)
+                       size = 14 + 20 + 8;
+
+               len = num_arg(&buffer[i], max_digits, &weight);
+               if (len < 0)
+                       return len;
+               if (weight <= 0)
+                       return -EINVAL;
+
+               pkt_dev->imix_entries[pkt_dev->n_imix_entries].size = size;
+               pkt_dev->imix_entries[pkt_dev->n_imix_entries].weight = weight;
+
+               i += len;
+               if (get_user(c, &buffer[i]))
+                       return -EFAULT;
+
+               i++;
+               pkt_dev->n_imix_entries++;
+
+               if (pkt_dev->n_imix_entries > MAX_IMIX_ENTRIES)
+                       return -E2BIG;
+       } while (c == ' ');
+
+       return i;
+}
+
 static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
 {
        unsigned int n = 0;
@@ -960,6 +1054,20 @@ static ssize_t pktgen_if_write(struct file *file,
                return count;
        }
 
+       if (!strcmp(name, "imix_weights")) {
+               if (pkt_dev->clone_skb > 0)
+                       return -EINVAL;
+
+               len = get_imix_entries(&user_buffer[i], pkt_dev);
+               if (len < 0)
+                       return len;
+
+               fill_imix_distribution(pkt_dev);
+
+               i += len;
+               return count;
+       }
+
        if (!strcmp(name, "debug")) {
                len = num_arg(&user_buffer[i], 10, &value);
                if (len < 0)
@@ -1082,10 +1190,16 @@ static ssize_t pktgen_if_write(struct file *file,
                len = num_arg(&user_buffer[i], 10, &value);
                if (len < 0)
                        return len;
+               /* clone_skb is not supported for netif_receive xmit_mode and
+                * IMIX mode.
+                */
                if ((value > 0) &&
                    ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) ||
                     !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
                        return -ENOTSUPP;
+               if (value > 0 && pkt_dev->n_imix_entries > 0)
+                       return -EINVAL;
+
                i += len;
                pkt_dev->clone_skb = value;
 
@@ -1190,11 +1304,6 @@ static ssize_t pktgen_if_write(struct file *file,
                         * pktgen_xmit() is called
                         */
                        pkt_dev->last_ok = 1;
-
-                       /* override clone_skb if user passed default value
-                        * at module loading time
-                        */
-                       pkt_dev->clone_skb = 0;
                } else if (strcmp(f, "queue_xmit") == 0) {
                        pkt_dev->xmit_mode = M_QUEUE_XMIT;
                        pkt_dev->last_ok = 1;
@@ -2477,6 +2586,14 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
                                t = pkt_dev->min_pkt_size;
                }
                pkt_dev->cur_pkt_size = t;
+       } else if (pkt_dev->n_imix_entries > 0) {
+               struct imix_pkt *entry;
+               __u32 t = prandom_u32() % IMIX_PRECISION;
+               __u8 entry_index = pkt_dev->imix_distribution[t];
+
+               entry = &pkt_dev->imix_entries[entry_index];
+               entry->count_so_far++;
+               pkt_dev->cur_pkt_size = entry->size;
        }
 
        set_cur_queue_map(pkt_dev);
@@ -2484,6 +2601,32 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
        pkt_dev->flows[flow].count++;
 }
 
+static void fill_imix_distribution(struct pktgen_dev *pkt_dev)
+{
+       int cumulative_probabilites[MAX_IMIX_ENTRIES];
+       int j = 0;
+       __u64 cumulative_prob = 0;
+       __u64 total_weight = 0;
+       int i = 0;
+
+       for (i = 0; i < pkt_dev->n_imix_entries; i++)
+               total_weight += pkt_dev->imix_entries[i].weight;
+
+       /* Fill cumulative_probabilites with sum of normalized probabilities */
+       for (i = 0; i < pkt_dev->n_imix_entries - 1; i++) {
+               cumulative_prob += div64_u64(pkt_dev->imix_entries[i].weight *
+                                                    IMIX_PRECISION,
+                                            total_weight);
+               cumulative_probabilites[i] = cumulative_prob;
+       }
+       cumulative_probabilites[pkt_dev->n_imix_entries - 1] = 100;
+
+       for (i = 0; i < IMIX_PRECISION; i++) {
+               if (i == cumulative_probabilites[j])
+                       j++;
+               pkt_dev->imix_distribution[i] = j;
+       }
+}
 
 #ifdef CONFIG_XFRM
 static u32 pktgen_dst_metrics[RTAX_MAX + 1] = {
@@ -3145,7 +3288,19 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
        pps = div64_u64(pkt_dev->sofar * NSEC_PER_SEC,
                        ktime_to_ns(elapsed));
 
-       bps = pps * 8 * pkt_dev->cur_pkt_size;
+       if (pkt_dev->n_imix_entries > 0) {
+               int i;
+               struct imix_pkt *entry;
+
+               bps = 0;
+               for (i = 0; i < pkt_dev->n_imix_entries; i++) {
+                       entry = &pkt_dev->imix_entries[i];
+                       bps += entry->size * entry->count_so_far;
+               }
+               bps = div64_u64(bps * 8 * NSEC_PER_SEC, ktime_to_ns(elapsed));
+       } else {
+               bps = pps * 8 * pkt_dev->cur_pkt_size;
+       }
 
        mbps = bps;
        do_div(mbps, 1000000);
index e33fde0..dd4cf01 100644 (file)
@@ -103,7 +103,7 @@ static struct bpf_prog *ptp_insns __read_mostly;
 
 unsigned int ptp_classify_raw(const struct sk_buff *skb)
 {
-       return BPF_PROG_RUN(ptp_insns, skb);
+       return bpf_prog_run(ptp_insns, skb);
 }
 EXPORT_SYMBOL_GPL(ptp_classify_raw);
 
index 662eb1c..972c8cb 100644 (file)
@@ -710,15 +710,8 @@ out:
 int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
 {
        struct sock *rtnl = net->rtnl;
-       int err = 0;
 
-       NETLINK_CB(skb).dst_group = group;
-       if (echo)
-               refcount_inc(&skb->users);
-       netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
-       if (echo)
-               err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
-       return err;
+       return nlmsg_notify(rtnl, skb, pid, group, echo, GFP_KERNEL);
 }
 
 int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
@@ -733,12 +726,8 @@ void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
                 struct nlmsghdr *nlh, gfp_t flags)
 {
        struct sock *rtnl = net->rtnl;
-       int report = 0;
 
-       if (nlh)
-               report = nlmsg_report(nlh);
-
-       nlmsg_notify(rtnl, skb, pid, group, report, flags);
+       nlmsg_notify(rtnl, skb, pid, group, nlmsg_report(nlh), flags);
 }
 EXPORT_SYMBOL(rtnl_notify);
 
@@ -1970,6 +1959,13 @@ static bool link_master_filtered(struct net_device *dev, int master_idx)
                return false;
 
        master = netdev_master_upper_dev_get(dev);
+
+       /* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need
+        * another invalid value for ifindex to denote "no master".
+        */
+       if (master_idx == -1)
+               return !!master;
+
        if (!master || master->ifindex != master_idx)
                return true;
 
@@ -2268,7 +2264,8 @@ invalid_attr:
        return -EINVAL;
 }
 
-static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
+static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
+                           struct netlink_ext_ack *extack)
 {
        if (dev) {
                if (tb[IFLA_ADDRESS] &&
@@ -2295,7 +2292,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
                                return -EOPNOTSUPP;
 
                        if (af_ops->validate_link_af) {
-                               err = af_ops->validate_link_af(dev, af);
+                               err = af_ops->validate_link_af(dev, af, extack);
                                if (err < 0)
                                        return err;
                        }
@@ -2603,7 +2600,7 @@ static int do_setlink(const struct sk_buff *skb,
        const struct net_device_ops *ops = dev->netdev_ops;
        int err;
 
-       err = validate_linkmsg(dev, tb);
+       err = validate_linkmsg(dev, tb, extack);
        if (err < 0)
                return err;
 
@@ -3302,7 +3299,7 @@ replay:
                        m_ops = master_dev->rtnl_link_ops;
        }
 
-       err = validate_linkmsg(dev, tb);
+       err = validate_linkmsg(dev, tb, extack);
        if (err < 0)
                return err;
 
index ae3085d..5c356f0 100644 (file)
@@ -79,7 +79,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 
        if (!fpl)
        {
-               fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+               fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT);
                if (!fpl)
                        return -ENOMEM;
                *fplp = fpl;
@@ -355,7 +355,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
                return NULL;
 
        new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
-                         GFP_KERNEL);
+                         GFP_KERNEL_ACCOUNT);
        if (new_fpl) {
                for (i = 0; i < fpl->count; i++)
                        get_file(fpl->fp[i]);
index ba7b017..9077fa9 100644 (file)
@@ -318,6 +318,15 @@ static int net_test_phy_loopback_udp(struct net_device *ndev)
        return __net_test_loopback(ndev, &attr);
 }
 
+static int net_test_phy_loopback_udp_mtu(struct net_device *ndev)
+{
+       struct net_packet_attrs attr = { };
+
+       attr.dst = ndev->dev_addr;
+       attr.max_size = ndev->mtu;
+       return __net_test_loopback(ndev, &attr);
+}
+
 static int net_test_phy_loopback_tcp(struct net_device *ndev)
 {
        struct net_packet_attrs attr = { };
@@ -344,6 +353,9 @@ static const struct net_test {
        }, {
                .name = "PHY internal loopback, UDP    ",
                .fn = net_test_phy_loopback_udp,
+       }, {
+               .name = "PHY internal loopback, MTU    ",
+               .fn = net_test_phy_loopback_udp_mtu,
        }, {
                .name = "PHY internal loopback, TCP    ",
                .fn = net_test_phy_loopback_tcp,
index fc7942c..f931176 100644 (file)
@@ -156,7 +156,7 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
        void *data;
 
        fragsz = SKB_DATA_ALIGN(fragsz);
-       if (in_irq() || irqs_disabled()) {
+       if (in_hardirq() || irqs_disabled()) {
                nc = this_cpu_ptr(&netdev_alloc_cache);
                data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
        } else {
@@ -502,7 +502,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
        if (sk_memalloc_socks())
                gfp_mask |= __GFP_MEMALLOC;
 
-       if (in_irq() || irqs_disabled()) {
+       if (in_hardirq() || irqs_disabled()) {
                nc = this_cpu_ptr(&netdev_alloc_cache);
                data = page_frag_alloc(nc, len, gfp_mask);
                pfmemalloc = nc->pfmemalloc;
@@ -724,7 +724,7 @@ void skb_release_head_state(struct sk_buff *skb)
 {
        skb_dst_drop(skb);
        if (skb->destructor) {
-               WARN_ON(in_irq());
+               WARN_ON(in_hardirq());
                skb->destructor(skb);
        }
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -954,9 +954,13 @@ void __kfree_skb_defer(struct sk_buff *skb)
 
 void napi_skb_free_stolen_head(struct sk_buff *skb)
 {
-       nf_reset_ct(skb);
-       skb_dst_drop(skb);
-       skb_ext_put(skb);
+       if (unlikely(skb->slow_gro)) {
+               nf_reset_ct(skb);
+               skb_dst_drop(skb);
+               skb_ext_put(skb);
+               skb_orphan(skb);
+               skb->slow_gro = 0;
+       }
        napi_skb_cache_put(skb);
 }
 
@@ -1785,6 +1789,48 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
 }
 EXPORT_SYMBOL(skb_realloc_headroom);
 
+/**
+ *     skb_expand_head - reallocate header of &sk_buff
+ *     @skb: buffer to reallocate
+ *     @headroom: needed headroom
+ *
+ *     Unlike skb_realloc_headroom, this one does not allocate a new skb
+ *     if possible; copies skb->sk to new skb as needed
+ *     and frees original skb in case of failures.
+ *
+ *     It expect increased headroom and generates warning otherwise.
+ */
+
+struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
+{
+       int delta = headroom - skb_headroom(skb);
+
+       if (WARN_ONCE(delta <= 0,
+                     "%s is expecting an increase in the headroom", __func__))
+               return skb;
+
+       /* pskb_expand_head() might crash, if skb is shared */
+       if (skb_shared(skb)) {
+               struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+               if (likely(nskb)) {
+                       if (skb->sk)
+                               skb_set_owner_w(nskb, skb->sk);
+                       consume_skb(skb);
+               } else {
+                       kfree_skb(skb);
+               }
+               skb = nskb;
+       }
+       if (skb &&
+           pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+               kfree_skb(skb);
+               skb = NULL;
+       }
+       return skb;
+}
+EXPORT_SYMBOL(skb_expand_head);
+
 /**
  *     skb_copy_expand -       copy and expand sk_buff
  *     @skb: buffer to copy
@@ -3889,6 +3935,9 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
        NAPI_GRO_CB(p)->last = skb;
        NAPI_GRO_CB(p)->count++;
        p->data_len += skb->len;
+
+       /* sk owenrship - if any - completely transferred to the aggregated packet */
+       skb->destructor = NULL;
        p->truesize += skb->truesize;
        p->len += skb->len;
 
@@ -4256,6 +4305,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
        unsigned int headlen = skb_headlen(skb);
        unsigned int len = skb_gro_len(skb);
        unsigned int delta_truesize;
+       unsigned int new_truesize;
        struct sk_buff *lp;
 
        if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush))
@@ -4287,10 +4337,10 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
                skb_frag_size_sub(frag, offset);
 
                /* all fragments truesize : remove (head size + sk_buff) */
-               delta_truesize = skb->truesize -
-                                SKB_TRUESIZE(skb_end_offset(skb));
+               new_truesize = SKB_TRUESIZE(skb_end_offset(skb));
+               delta_truesize = skb->truesize - new_truesize;
 
-               skb->truesize -= skb->data_len;
+               skb->truesize = new_truesize;
                skb->len -= skb->data_len;
                skb->data_len = 0;
 
@@ -4319,12 +4369,16 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
                memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
                /* We dont need to clear skbinfo->nr_frags here */
 
-               delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
+               new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff));
+               delta_truesize = skb->truesize - new_truesize;
+               skb->truesize = new_truesize;
                NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
                goto done;
        }
 
 merge:
+       /* sk owenrship - if any - completely transferred to the aggregated packet */
+       skb->destructor = NULL;
        delta_truesize = skb->truesize;
        if (offset > headlen) {
                unsigned int eat = offset - headlen;
@@ -6449,6 +6503,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
        new->chunks = newlen;
        new->offset[id] = newoff;
 set_active:
+       skb->slow_gro = 1;
        skb->extensions = new;
        skb->active_extensions |= 1 << id;
        return skb_ext_get_ptr(new, id);
index a3eea6e..62627e8 100644 (file)
@@ -226,6 +226,7 @@ static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
   x "AF_IEEE802154",   x "AF_CAIF"     ,       x "AF_ALG"      , \
   x "AF_NFC"   ,       x "AF_VSOCK"    ,       x "AF_KCM"      , \
   x "AF_QIPCRTR",      x "AF_SMC"      ,       x "AF_XDP"      , \
+  x "AF_MCTP"  , \
   x "AF_MAX"
 
 static const char *const af_family_key_strings[AF_MAX+1] = {
@@ -1357,6 +1358,15 @@ set_sndbuf:
                ret = sock_bindtoindex_locked(sk, val);
                break;
 
+       case SO_BUF_LOCK:
+               if (val & ~SOCK_BUF_LOCK_MASK) {
+                       ret = -EINVAL;
+                       break;
+               }
+               sk->sk_userlocks = val | (sk->sk_userlocks &
+                                         ~SOCK_BUF_LOCK_MASK);
+               break;
+
        default:
                ret = -ENOPROTOOPT;
                break;
@@ -1719,6 +1729,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                v.val64 = sock_net(sk)->net_cookie;
                break;
 
+       case SO_BUF_LOCK:
+               v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
+               break;
+
        default:
                /* We implement the SO_SNDLOWAT etc to not be settable
                 * (1003.1g 7).
@@ -2560,7 +2574,6 @@ static void sk_leave_memory_pressure(struct sock *sk)
        }
 }
 
-#define SKB_FRAG_PAGE_ORDER    get_order(32768)
 DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
 
 /**
@@ -2714,10 +2727,12 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
 {
        struct proto *prot = sk->sk_prot;
        long allocated = sk_memory_allocated_add(sk, amt);
+       bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
        bool charged = true;
 
-       if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
-           !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
+       if (memcg_charge &&
+           !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+                                               gfp_memcg_charge())))
                goto suppress_allocation;
 
        /* Under limit. */
@@ -2771,8 +2786,14 @@ suppress_allocation:
                /* Fail only if socket is _under_ its sndbuf.
                 * In this case we cannot block, so that we have to fail.
                 */
-               if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
+               if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
+                       /* Force charge with __GFP_NOFAIL */
+                       if (memcg_charge && !charged) {
+                               mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+                                       gfp_memcg_charge() | __GFP_NOFAIL);
+                       }
                        return 1;
+               }
        }
 
        if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
@@ -2780,7 +2801,7 @@ suppress_allocation:
 
        sk_memory_allocated_sub(sk, amt);
 
-       if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+       if (memcg_charge && charged)
                mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
 
        return 0;
index 60decd6..e252b8e 100644 (file)
@@ -211,8 +211,6 @@ out:
        return psock;
 }
 
-static bool sock_map_redirect_allowed(const struct sock *sk);
-
 static int sock_map_link(struct bpf_map *map, struct sock *sk)
 {
        struct sk_psock_progs *progs = sock_map_progs(map);
@@ -223,13 +221,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
        struct sk_psock *psock;
        int ret;
 
-       /* Only sockets we can redirect into/from in BPF need to hold
-        * refs to parser/verdict progs and have their sk_data_ready
-        * and sk_write_space callbacks overridden.
-        */
-       if (!sock_map_redirect_allowed(sk))
-               goto no_progs;
-
        stream_verdict = READ_ONCE(progs->stream_verdict);
        if (stream_verdict) {
                stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
@@ -264,7 +255,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
                }
        }
 
-no_progs:
        psock = sock_map_psock_get_checked(sk);
        if (IS_ERR(psock)) {
                ret = PTR_ERR(psock);
@@ -527,12 +517,6 @@ static bool sk_is_tcp(const struct sock *sk)
               sk->sk_protocol == IPPROTO_TCP;
 }
 
-static bool sk_is_udp(const struct sock *sk)
-{
-       return sk->sk_type == SOCK_DGRAM &&
-              sk->sk_protocol == IPPROTO_UDP;
-}
-
 static bool sock_map_redirect_allowed(const struct sock *sk)
 {
        if (sk_is_tcp(sk))
@@ -550,10 +534,7 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
 {
        if (sk_is_tcp(sk))
                return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
-       else if (sk_is_udp(sk))
-               return sk_hashed(sk);
-
-       return false;
+       return true;
 }
 
 static int sock_hash_update_common(struct bpf_map *map, void *key,
@@ -1513,6 +1494,7 @@ void sock_map_unhash(struct sock *sk)
        rcu_read_unlock();
        saved_unhash(sk);
 }
+EXPORT_SYMBOL_GPL(sock_map_unhash);
 
 void sock_map_close(struct sock *sk, long timeout)
 {
@@ -1536,6 +1518,7 @@ void sock_map_close(struct sock *sk, long timeout)
        release_sock(sk);
        saved_close(sk, timeout);
 }
+EXPORT_SYMBOL_GPL(sock_map_close);
 
 static int sock_map_iter_attach_target(struct bpf_prog *prog,
                                       union bpf_iter_link_info *linfo,
index 7eb0fb2..abb5c59 100644 (file)
@@ -1126,7 +1126,7 @@ static int __init dccp_init(void)
        dccp_hashinfo.bind_bucket_cachep =
                kmem_cache_create("dccp_bind_bucket",
                                  sizeof(struct inet_bind_bucket), 0,
-                                 SLAB_HWCACHE_ALIGN, NULL);
+                                 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
        if (!dccp_hashinfo.bind_bucket_cachep)
                goto out_free_hashinfo2;
 
index d1c50a4..0ee7d4c 100644 (file)
@@ -521,8 +521,7 @@ int dn_dev_set_default(struct net_device *dev, int force)
        }
        spin_unlock(&dndev_lock);
 
-       if (old)
-               dev_put(old);
+       dev_put(old);
        return rv;
 }
 
@@ -536,8 +535,7 @@ static void dn_dev_check_default(struct net_device *dev)
        }
        spin_unlock(&dndev_lock);
 
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
 }
 
 /*
index 77fbf8e..269c029 100644 (file)
@@ -92,8 +92,7 @@ void dn_fib_free_info(struct dn_fib_info *fi)
        }
 
        change_nexthops(fi) {
-               if (nh->nh_dev)
-                       dev_put(nh->nh_dev);
+               dev_put(nh->nh_dev);
                nh->nh_dev = NULL;
        } endfor_nexthops(fi);
        kfree(fi);
@@ -102,7 +101,7 @@ void dn_fib_free_info(struct dn_fib_info *fi)
 void dn_fib_release_info(struct dn_fib_info *fi)
 {
        spin_lock(&dn_fib_info_lock);
-       if (fi && --fi->fib_treeref == 0) {
+       if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
                if (fi->fib_next)
                        fi->fib_next->fib_prev = fi->fib_prev;
                if (fi->fib_prev)
@@ -385,11 +384,11 @@ link_it:
        if ((ofi = dn_fib_find_info(fi)) != NULL) {
                fi->fib_dead = 1;
                dn_fib_free_info(fi);
-               ofi->fib_treeref++;
+               refcount_inc(&ofi->fib_treeref);
                return ofi;
        }
 
-       fi->fib_treeref++;
+       refcount_set(&fi->fib_treeref, 1);
        refcount_set(&fi->fib_clntref, 1);
        spin_lock(&dn_fib_info_lock);
        fi->fib_next = dn_fib_info_list;
index 729d3de..7e85f2a 100644 (file)
@@ -1026,8 +1026,7 @@ source_ok:
        if (!fld.daddr) {
                fld.daddr = fld.saddr;
 
-               if (dev_out)
-                       dev_put(dev_out);
+               dev_put(dev_out);
                err = -EINVAL;
                dev_out = init_net.loopback_dev;
                if (!dev_out->dn_ptr)
@@ -1084,8 +1083,7 @@ source_ok:
                                        neigh_release(neigh);
                                        neigh = NULL;
                                } else {
-                                       if (dev_out)
-                                               dev_put(dev_out);
+                                       dev_put(dev_out);
                                        if (dn_dev_islocal(neigh->dev, fld.daddr)) {
                                                dev_out = init_net.loopback_dev;
                                                res.type = RTN_LOCAL;
@@ -1144,8 +1142,7 @@ select_source:
        if (res.type == RTN_LOCAL) {
                if (!fld.saddr)
                        fld.saddr = fld.daddr;
-               if (dev_out)
-                       dev_put(dev_out);
+               dev_put(dev_out);
                dev_out = init_net.loopback_dev;
                dev_hold(dev_out);
                if (!dev_out->dn_ptr)
@@ -1168,8 +1165,7 @@ select_source:
        if (!fld.saddr)
                fld.saddr = DN_FIB_RES_PREFSRC(res);
 
-       if (dev_out)
-               dev_put(dev_out);
+       dev_put(dev_out);
        dev_out = DN_FIB_RES_DEV(res);
        dev_hold(dev_out);
        fld.flowidn_oif = dev_out->ifindex;
@@ -1222,8 +1218,7 @@ done:
                neigh_release(neigh);
        if (free_res)
                dn_fib_res_put(&res);
-       if (dev_out)
-               dev_put(dev_out);
+       dev_put(dev_out);
 out:
        return err;
 
@@ -1503,8 +1498,7 @@ done:
        if (free_res)
                dn_fib_res_put(&res);
        dev_put(in_dev);
-       if (out_dev)
-               dev_put(out_dev);
+       dev_put(out_dev);
 out:
        return err;
 
index 00bb89b..5482855 100644 (file)
@@ -18,16 +18,6 @@ if NET_DSA
 
 # Drivers must select the appropriate tagging format(s)
 
-config NET_DSA_TAG_8021Q
-       tristate
-       select VLAN_8021Q
-       help
-         Unlike the other tagging protocols, the 802.1Q config option simply
-         provides helpers for other tagging implementations that might rely on
-         VLAN in one way or another. It is not a complete solution.
-
-         Drivers which use these helpers should select this as dependency.
-
 config NET_DSA_TAG_AR9331
        tristate "Tag driver for Atheros AR9331 SoC with built-in switch"
        help
@@ -126,7 +116,6 @@ config NET_DSA_TAG_OCELOT_8021Q
        tristate "Tag driver for Ocelot family of switches, using VLAN"
        depends on MSCC_OCELOT_SWITCH_LIB || \
                  (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
-       select NET_DSA_TAG_8021Q
        help
          Say Y or M if you want to enable support for tagging frames with a
          custom VLAN-based header. Frames that require timestamping, such as
@@ -149,7 +138,7 @@ config NET_DSA_TAG_LAN9303
 
 config NET_DSA_TAG_SJA1105
        tristate "Tag driver for NXP SJA1105 switches"
-       select NET_DSA_TAG_8021Q
+       depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105
        select PACKING
        help
          Say Y or M if you want to enable support for tagging frames with the
index 44bc799..67ea009 100644 (file)
@@ -1,10 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 # the core
 obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
+dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o tag_8021q.o
 
 # tagging formats
-obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o
 obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o
 obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o
 obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o
index 84cad1b..1dc45e4 100644 (file)
@@ -238,7 +238,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
        if (!skb)
                return 0;
 
-       nskb = cpu_dp->rcv(skb, dev, pt);
+       nskb = cpu_dp->rcv(skb, dev);
        if (!nskb) {
                kfree_skb(skb);
                return 0;
index 185629f..1b2b25d 100644 (file)
@@ -21,6 +21,9 @@
 static DEFINE_MUTEX(dsa2_mutex);
 LIST_HEAD(dsa_tree_list);
 
+/* Track the bridges with forwarding offload enabled */
+static unsigned long dsa_fwd_offloading_bridges;
+
 /**
  * dsa_tree_notify - Execute code for all switches in a DSA switch tree.
  * @dst: collection of struct dsa_switch devices to notify.
@@ -49,6 +52,9 @@ int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v)
  * Can be used to notify the switching fabric of events such as cross-chip
  * bridging between disjoint trees (such as islands of tagger-compatible
  * switches bridged by an incompatible middle switch).
+ *
+ * WARNING: this function is not reliable during probe time, because probing
+ * between trees is asynchronous and not all DSA trees might have probed.
  */
 int dsa_broadcast(unsigned long e, void *v)
 {
@@ -123,6 +129,51 @@ void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag)
        }
 }
 
+static int dsa_bridge_num_find(const struct net_device *bridge_dev)
+{
+       struct dsa_switch_tree *dst;
+       struct dsa_port *dp;
+
+       /* When preparing the offload for a port, it will have a valid
+        * dp->bridge_dev pointer but a not yet valid dp->bridge_num.
+        * However there might be other ports having the same dp->bridge_dev
+        * and a valid dp->bridge_num, so just ignore this port.
+        */
+       list_for_each_entry(dst, &dsa_tree_list, list)
+               list_for_each_entry(dp, &dst->ports, list)
+                       if (dp->bridge_dev == bridge_dev &&
+                           dp->bridge_num != -1)
+                               return dp->bridge_num;
+
+       return -1;
+}
+
+int dsa_bridge_num_get(const struct net_device *bridge_dev, int max)
+{
+       int bridge_num = dsa_bridge_num_find(bridge_dev);
+
+       if (bridge_num < 0) {
+               /* First port that offloads TX forwarding for this bridge */
+               bridge_num = find_first_zero_bit(&dsa_fwd_offloading_bridges,
+                                                DSA_MAX_NUM_OFFLOADING_BRIDGES);
+               if (bridge_num >= max)
+                       return -1;
+
+               set_bit(bridge_num, &dsa_fwd_offloading_bridges);
+       }
+
+       return bridge_num;
+}
+
+void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num)
+{
+       /* Check if the bridge is still in use, otherwise it is time
+        * to clean it up so we can reuse this bridge_num later.
+        */
+       if (!dsa_bridge_num_find(bridge_dev))
+               clear_bit(bridge_num, &dsa_fwd_offloading_bridges);
+}
+
 struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
 {
        struct dsa_switch_tree *dst;
@@ -311,6 +362,9 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
        return NULL;
 }
 
+/* Assign the default CPU port (the first one in the tree) to all ports of the
+ * fabric which don't already have one as part of their own switch.
+ */
 static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
 {
        struct dsa_port *cpu_dp, *dp;
@@ -321,15 +375,48 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
                return -EINVAL;
        }
 
-       /* Assign the default CPU port to all ports of the fabric */
-       list_for_each_entry(dp, &dst->ports, list)
+       list_for_each_entry(dp, &dst->ports, list) {
+               if (dp->cpu_dp)
+                       continue;
+
                if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
                        dp->cpu_dp = cpu_dp;
+       }
 
        return 0;
 }
 
-static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
+/* Perform initial assignment of CPU ports to user ports and DSA links in the
+ * fabric, giving preference to CPU ports local to each switch. Default to
+ * using the first CPU port in the switch tree if the port does not have a CPU
+ * port local to this switch.
+ */
+static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
+{
+       struct dsa_port *cpu_dp, *dp;
+
+       list_for_each_entry(cpu_dp, &dst->ports, list) {
+               if (!dsa_port_is_cpu(cpu_dp))
+                       continue;
+
+               list_for_each_entry(dp, &dst->ports, list) {
+                       /* Prefer a local CPU port */
+                       if (dp->ds != cpu_dp->ds)
+                               continue;
+
+                       /* Prefer the first local CPU port found */
+                       if (dp->cpu_dp)
+                               continue;
+
+                       if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+                               dp->cpu_dp = cpu_dp;
+               }
+       }
+
+       return dsa_tree_setup_default_cpu(dst);
+}
+
+static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst)
 {
        struct dsa_port *dp;
 
@@ -710,13 +797,14 @@ static int dsa_switch_setup(struct dsa_switch *ds)
        /* Add the switch to devlink before calling setup, so that setup can
         * add dpipe tables
         */
-       ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv));
+       ds->devlink =
+               devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev);
        if (!ds->devlink)
                return -ENOMEM;
        dl_priv = devlink_priv(ds->devlink);
        dl_priv->ds = ds;
 
-       err = devlink_register(ds->devlink, ds->dev);
+       err = devlink_register(ds->devlink);
        if (err)
                goto free_devlink;
 
@@ -921,13 +1009,13 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
        if (!complete)
                return 0;
 
-       err = dsa_tree_setup_default_cpu(dst);
+       err = dsa_tree_setup_cpu_ports(dst);
        if (err)
                return err;
 
        err = dsa_tree_setup_switches(dst);
        if (err)
-               goto teardown_default_cpu;
+               goto teardown_cpu_ports;
 
        err = dsa_tree_setup_master(dst);
        if (err)
@@ -947,8 +1035,8 @@ teardown_master:
        dsa_tree_teardown_master(dst);
 teardown_switches:
        dsa_tree_teardown_switches(dst);
-teardown_default_cpu:
-       dsa_tree_teardown_default_cpu(dst);
+teardown_cpu_ports:
+       dsa_tree_teardown_cpu_ports(dst);
 
        return err;
 }
@@ -966,7 +1054,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
 
        dsa_tree_teardown_switches(dst);
 
-       dsa_tree_teardown_default_cpu(dst);
+       dsa_tree_teardown_cpu_ports(dst);
 
        list_for_each_entry_safe(dl, next, &dst->rtable, list) {
                list_del(&dl->list);
@@ -1044,6 +1132,7 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
 
        dp->ds = ds;
        dp->index = index;
+       dp->bridge_num = -1;
 
        INIT_LIST_HEAD(&dp->list);
        list_add_tail(&dp->list, &dst->ports);
@@ -1265,6 +1354,9 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds,
                return -EEXIST;
        }
 
+       if (ds->dst->last_switch < ds->index)
+               ds->dst->last_switch = ds->index;
+
        return 0;
 }
 
index f201c33..33ab7d7 100644 (file)
@@ -14,6 +14,8 @@
 #include <net/dsa.h>
 #include <net/gro_cells.h>
 
+#define DSA_MAX_NUM_OFFLOADING_BRIDGES         BITS_PER_LONG
+
 enum {
        DSA_NOTIFIER_AGEING_TIME,
        DSA_NOTIFIER_BRIDGE_JOIN,
@@ -39,6 +41,8 @@ enum {
        DSA_NOTIFIER_MRP_DEL,
        DSA_NOTIFIER_MRP_ADD_RING_ROLE,
        DSA_NOTIFIER_MRP_DEL_RING_ROLE,
+       DSA_NOTIFIER_TAG_8021Q_VLAN_ADD,
+       DSA_NOTIFIER_TAG_8021Q_VLAN_DEL,
 };
 
 /* DSA_NOTIFIER_AGEING_TIME */
@@ -113,6 +117,14 @@ struct dsa_notifier_mrp_ring_role_info {
        int port;
 };
 
+/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */
+struct dsa_notifier_tag_8021q_vlan_info {
+       int tree_index;
+       int sw_index;
+       int port;
+       u16 vid;
+};
+
 struct dsa_switchdev_event_work {
        struct dsa_switch *ds;
        int port;
@@ -187,23 +199,21 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
 /* port.c */
 void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
                               const struct dsa_device_ops *tag_ops);
-int dsa_port_set_state(struct dsa_port *dp, u8 state);
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
 int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
 int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
 void dsa_port_disable_rt(struct dsa_port *dp);
 void dsa_port_disable(struct dsa_port *dp);
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
                         struct netlink_ext_ack *extack);
-int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br,
-                             struct netlink_ext_ack *extack);
+void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br);
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
 int dsa_port_lag_change(struct dsa_port *dp,
                        struct netdev_lag_lower_state_info *linfo);
 int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
                      struct netdev_lag_upper_info *uinfo,
                      struct netlink_ext_ack *extack);
-int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev,
-                          struct netlink_ext_ack *extack);
+void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
 void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
 int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
                            struct netlink_ext_ack *extack);
@@ -231,11 +241,9 @@ int dsa_port_host_mdb_del(const struct dsa_port *dp,
 int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
                              struct switchdev_brport_flags flags,
                              struct netlink_ext_ack *extack);
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
                          struct switchdev_brport_flags flags,
                          struct netlink_ext_ack *extack);
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
-                    struct netlink_ext_ack *extack);
 int dsa_port_vlan_add(struct dsa_port *dp,
                      const struct switchdev_obj_port_vlan *vlan,
                      struct netlink_ext_ack *extack);
@@ -253,16 +261,18 @@ int dsa_port_link_register_of(struct dsa_port *dp);
 void dsa_port_link_unregister_of(struct dsa_port *dp);
 int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
 void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
+int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast);
+void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast);
 extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
 
 static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp,
-                                                struct net_device *dev)
+                                                const struct net_device *dev)
 {
        return dsa_port_to_bridge_port(dp) == dev;
 }
 
 static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
-                                           struct net_device *bridge_dev)
+                                           const struct net_device *bridge_dev)
 {
        /* DSA ports connected to a bridge, and event was emitted
         * for the bridge.
@@ -272,7 +282,7 @@ static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
 
 /* Returns true if any port of this tree offloads the given net_device */
 static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
-                                                struct net_device *dev)
+                                                const struct net_device *dev)
 {
        struct dsa_port *dp;
 
@@ -283,6 +293,19 @@ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
        return false;
 }
 
+/* Returns true if any port of this tree offloads the given bridge */
+static inline bool dsa_tree_offloads_bridge(struct dsa_switch_tree *dst,
+                                           const struct net_device *bridge_dev)
+{
+       struct dsa_port *dp;
+
+       list_for_each_entry(dp, &dst->ports, list)
+               if (dsa_port_offloads_bridge(dp, bridge_dev))
+                       return true;
+
+       return false;
+}
+
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
 extern struct notifier_block dsa_slave_switchdev_notifier;
@@ -297,6 +320,8 @@ int dsa_slave_register_notifier(void);
 void dsa_slave_unregister_notifier(void);
 void dsa_slave_setup_tagger(struct net_device *slave);
 int dsa_slave_change_mtu(struct net_device *dev, int new_mtu);
+int dsa_slave_manage_vlan_filtering(struct net_device *dev,
+                                   bool vlan_filtering);
 
 static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
 {
@@ -372,6 +397,141 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
        return skb;
 }
 
+/* For switches without hardware support for DSA tagging to be able
+ * to support termination through the bridge.
+ */
+static inline struct net_device *
+dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid)
+{
+       struct dsa_port *cpu_dp = master->dsa_ptr;
+       struct dsa_switch_tree *dst = cpu_dp->dst;
+       struct bridge_vlan_info vinfo;
+       struct net_device *slave;
+       struct dsa_port *dp;
+       int err;
+
+       list_for_each_entry(dp, &dst->ports, list) {
+               if (dp->type != DSA_PORT_TYPE_USER)
+                       continue;
+
+               if (!dp->bridge_dev)
+                       continue;
+
+               if (dp->stp_state != BR_STATE_LEARNING &&
+                   dp->stp_state != BR_STATE_FORWARDING)
+                       continue;
+
+               /* Since the bridge might learn this packet, keep the CPU port
+                * affinity with the port that will be used for the reply on
+                * xmit.
+                */
+               if (dp->cpu_dp != cpu_dp)
+                       continue;
+
+               slave = dp->slave;
+
+               err = br_vlan_get_info_rcu(slave, vid, &vinfo);
+               if (err)
+                       continue;
+
+               return slave;
+       }
+
+       return NULL;
+}
+
+/* If the ingress port offloads the bridge, we mark the frame as autonomously
+ * forwarded by hardware, so the software bridge doesn't forward in twice, back
+ * to us, because we already did. However, if we're in fallback mode and we do
+ * software bridging, we are not offloading it, therefore the dp->bridge_dev
+ * pointer is not populated, and flooding needs to be done by software (we are
+ * effectively operating in standalone ports mode).
+ */
+static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb)
+{
+       struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+
+       skb->offload_fwd_mark = !!(dp->bridge_dev);
+}
+
+/* Helper for removing DSA header tags from packets in the RX path.
+ * Must not be called before skb_pull(len).
+ *                                                                 skb->data
+ *                                                                         |
+ *                                                                         v
+ * |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |
+ * +-----------------------+-----------------------+---------------+-------+
+ * |    Destination MAC    |      Source MAC       |  DSA header   | EType |
+ * +-----------------------+-----------------------+---------------+-------+
+ *                                                 |               |
+ * <----- len ----->                               <----- len ----->
+ *                 |
+ *       >>>>>>>   v
+ *       >>>>>>>   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |
+ *       >>>>>>>   +-----------------------+-----------------------+-------+
+ *       >>>>>>>   |    Destination MAC    |      Source MAC       | EType |
+ *                 +-----------------------+-----------------------+-------+
+ *                                                                         ^
+ *                                                                         |
+ *                                                                 skb->data
+ */
+static inline void dsa_strip_etype_header(struct sk_buff *skb, int len)
+{
+       memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - len, 2 * ETH_ALEN);
+}
+
+/* Helper for creating space for DSA header tags in TX path packets.
+ * Must not be called before skb_push(len).
+ *
+ * Before:
+ *
+ *       <<<<<<<   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |
+ * ^     <<<<<<<   +-----------------------+-----------------------+-------+
+ * |     <<<<<<<   |    Destination MAC    |      Source MAC       | EType |
+ * |               +-----------------------+-----------------------+-------+
+ * <----- len ----->
+ * |
+ * |
+ * skb->data
+ *
+ * After:
+ *
+ * |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |
+ * +-----------------------+-----------------------+---------------+-------+
+ * |    Destination MAC    |      Source MAC       |  DSA header   | EType |
+ * +-----------------------+-----------------------+---------------+-------+
+ * ^                                               |               |
+ * |                                               <----- len ----->
+ * skb->data
+ */
+static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len)
+{
+       memmove(skb->data, skb->data + len, 2 * ETH_ALEN);
+}
+
+/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from
+ * skb_mac_header(skb), which leaves skb->data pointing at the first byte after
+ * what the DSA master perceives as the EtherType (the beginning of the L3
+ * protocol). Since DSA EtherType header taggers treat the EtherType as part of
+ * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header
+ * is located 2 bytes behind skb->data. Note that EtherType in this context
+ * means the first 2 bytes of the DSA header, not the encapsulated EtherType
+ * that will become visible after the DSA header is stripped.
+ */
+static inline void *dsa_etype_header_pos_rx(struct sk_buff *skb)
+{
+       return skb->data - 2;
+}
+
+/* On TX, skb->data points to skb_mac_header(skb), which means that EtherType
+ * header taggers start exactly where the EtherType is (the EtherType is
+ * treated as part of the DSA header).
+ */
+static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb)
+{
+       return skb->data + 2 * ETH_ALEN;
+}
+
 /* switch.c */
 int dsa_switch_register_notifier(struct dsa_switch *ds);
 void dsa_switch_unregister_notifier(struct dsa_switch *ds);
@@ -385,6 +545,18 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
                              struct net_device *master,
                              const struct dsa_device_ops *tag_ops,
                              const struct dsa_device_ops *old_tag_ops);
+int dsa_bridge_num_get(const struct net_device *bridge_dev, int max);
+void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num);
+
+/* tag_8021q.c */
+int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
+                             struct dsa_notifier_bridge_info *info);
+int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
+                              struct dsa_notifier_bridge_info *info);
+int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
+                                 struct dsa_notifier_tag_8021q_vlan_info *info);
+int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
+                                 struct dsa_notifier_tag_8021q_vlan_info *info);
 
 extern struct list_head dsa_tree_list;
 
index 3fc90e3..e8e1985 100644 (file)
@@ -210,14 +210,14 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                break;
        }
 
-       if (dev->netdev_ops->ndo_do_ioctl)
-               err = dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
+       if (dev->netdev_ops->ndo_eth_ioctl)
+               err = dev->netdev_ops->ndo_eth_ioctl(dev, ifr, cmd);
 
        return err;
 }
 
 static const struct dsa_netdevice_ops dsa_netdev_ops = {
-       .ndo_do_ioctl = dsa_master_ioctl,
+       .ndo_eth_ioctl = dsa_master_ioctl,
 };
 
 static int dsa_master_ethtool_setup(struct net_device *dev)
index 28b45b7..616330a 100644 (file)
@@ -30,7 +30,52 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
        return dsa_tree_notify(dp->ds->dst, e, v);
 }
 
-int dsa_port_set_state(struct dsa_port *dp, u8 state)
+static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp)
+{
+       struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+       struct switchdev_notifier_fdb_info info = {
+               /* flush all VLANs */
+               .vid = 0,
+       };
+
+       /* When the port becomes standalone it has already left the bridge.
+        * Don't notify the bridge in that case.
+        */
+       if (!brport_dev)
+               return;
+
+       call_switchdev_notifiers(SWITCHDEV_FDB_FLUSH_TO_BRIDGE,
+                                brport_dev, &info.info, NULL);
+}
+
+static void dsa_port_fast_age(const struct dsa_port *dp)
+{
+       struct dsa_switch *ds = dp->ds;
+
+       if (!ds->ops->port_fast_age)
+               return;
+
+       ds->ops->port_fast_age(ds, dp->index);
+
+       dsa_port_notify_bridge_fdb_flush(dp);
+}
+
+static bool dsa_port_can_configure_learning(struct dsa_port *dp)
+{
+       struct switchdev_brport_flags flags = {
+               .mask = BR_LEARNING,
+       };
+       struct dsa_switch *ds = dp->ds;
+       int err;
+
+       if (!ds->ops->port_bridge_flags || !ds->ops->port_pre_bridge_flags)
+               return false;
+
+       err = ds->ops->port_pre_bridge_flags(ds, dp->index, flags, NULL);
+       return !err;
+}
+
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
 {
        struct dsa_switch *ds = dp->ds;
        int port = dp->index;
@@ -40,10 +85,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
 
        ds->ops->port_stp_state_set(ds, port, state);
 
-       if (ds->ops->port_fast_age) {
+       if (!dsa_port_can_configure_learning(dp) ||
+           (do_fast_age && dp->learning)) {
                /* Fast age FDB entries or flush appropriate forwarding database
                 * for the given port, if we are moving it from Learning or
                 * Forwarding state, to Disabled or Blocking or Listening state.
+                * Ports that were standalone before the STP state change don't
+                * need to fast age the FDB, since address learning is off in
+                * standalone mode.
                 */
 
                if ((dp->stp_state == BR_STATE_LEARNING ||
@@ -51,7 +100,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
                    (state == BR_STATE_DISABLED ||
                     state == BR_STATE_BLOCKING ||
                     state == BR_STATE_LISTENING))
-                       ds->ops->port_fast_age(ds, port);
+                       dsa_port_fast_age(dp);
        }
 
        dp->stp_state = state;
@@ -59,11 +108,12 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
        return 0;
 }
 
-static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+static void dsa_port_set_state_now(struct dsa_port *dp, u8 state,
+                                  bool do_fast_age)
 {
        int err;
 
-       err = dsa_port_set_state(dp, state);
+       err = dsa_port_set_state(dp, state, do_fast_age);
        if (err)
                pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
 }
@@ -81,7 +131,7 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
        }
 
        if (!dp->bridge_dev)
-               dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+               dsa_port_set_state_now(dp, BR_STATE_FORWARDING, false);
 
        if (dp->pl)
                phylink_start(dp->pl);
@@ -109,7 +159,7 @@ void dsa_port_disable_rt(struct dsa_port *dp)
                phylink_stop(dp->pl);
 
        if (!dp->bridge_dev)
-               dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+               dsa_port_set_state_now(dp, BR_STATE_DISABLED, false);
 
        if (ds->ops->port_disable)
                ds->ops->port_disable(ds, port);
@@ -167,8 +217,8 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp)
        }
 }
 
-static int dsa_port_switchdev_sync(struct dsa_port *dp,
-                                  struct netlink_ext_ack *extack)
+static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp,
+                                        struct netlink_ext_ack *extack)
 {
        struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
        struct net_device *br = dp->bridge_dev;
@@ -178,7 +228,7 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
        if (err)
                return err;
 
-       err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev));
+       err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev), false);
        if (err && err != -EOPNOTSUPP)
                return err;
 
@@ -186,67 +236,10 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
        if (err && err != -EOPNOTSUPP)
                return err;
 
-       err = dsa_port_mrouter(dp->cpu_dp, br_multicast_router(br), extack);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
        err = dsa_port_ageing_time(dp, br_get_ageing_time(br));
        if (err && err != -EOPNOTSUPP)
                return err;
 
-       err = br_mdb_replay(br, brport_dev, dp, true,
-                           &dsa_slave_switchdev_blocking_notifier, extack);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
-       /* Forwarding and termination FDB entries on the port */
-       err = br_fdb_replay(br, brport_dev, dp, true,
-                           &dsa_slave_switchdev_notifier);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
-       /* Termination FDB entries on the bridge itself */
-       err = br_fdb_replay(br, br, dp, true, &dsa_slave_switchdev_notifier);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
-       err = br_vlan_replay(br, brport_dev, dp, true,
-                            &dsa_slave_switchdev_blocking_notifier, extack);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
-       return 0;
-}
-
-static int dsa_port_switchdev_unsync_objs(struct dsa_port *dp,
-                                         struct net_device *br,
-                                         struct netlink_ext_ack *extack)
-{
-       struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
-       int err;
-
-       /* Delete the switchdev objects left on this port */
-       err = br_mdb_replay(br, brport_dev, dp, false,
-                           &dsa_slave_switchdev_blocking_notifier, extack);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
-       /* Forwarding and termination FDB entries on the port */
-       err = br_fdb_replay(br, brport_dev, dp, false,
-                           &dsa_slave_switchdev_notifier);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
-       /* Termination FDB entries on the bridge itself */
-       err = br_fdb_replay(br, br, dp, false, &dsa_slave_switchdev_notifier);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
-       err = br_vlan_replay(br, brport_dev, dp, false,
-                            &dsa_slave_switchdev_blocking_notifier, extack);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
        return 0;
 }
 
@@ -268,21 +261,63 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp)
        /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
         * so allow it to be in BR_STATE_FORWARDING to be kept functional
         */
-       dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+       dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true);
 
        /* VLAN filtering is handled by dsa_switch_bridge_leave */
 
-       /* Some drivers treat the notification for having a local multicast
-        * router by allowing multicast to be flooded to the CPU, so we should
-        * allow this in standalone mode too.
-        */
-       dsa_port_mrouter(dp->cpu_dp, true, NULL);
-
        /* Ageing time may be global to the switch chip, so don't change it
         * here because we have no good reason (or value) to change it to.
         */
 }
 
+static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp,
+                                            struct net_device *bridge_dev)
+{
+       int bridge_num = dp->bridge_num;
+       struct dsa_switch *ds = dp->ds;
+
+       /* No bridge TX forwarding offload => do nothing */
+       if (!ds->ops->port_bridge_tx_fwd_unoffload || dp->bridge_num == -1)
+               return;
+
+       dp->bridge_num = -1;
+
+       dsa_bridge_num_put(bridge_dev, bridge_num);
+
+       /* Notify the chips only once the offload has been deactivated, so
+        * that they can update their configuration accordingly.
+        */
+       ds->ops->port_bridge_tx_fwd_unoffload(ds, dp->index, bridge_dev,
+                                             bridge_num);
+}
+
+static bool dsa_port_bridge_tx_fwd_offload(struct dsa_port *dp,
+                                          struct net_device *bridge_dev)
+{
+       struct dsa_switch *ds = dp->ds;
+       int bridge_num, err;
+
+       if (!ds->ops->port_bridge_tx_fwd_offload)
+               return false;
+
+       bridge_num = dsa_bridge_num_get(bridge_dev,
+                                       ds->num_fwd_offloading_bridges);
+       if (bridge_num < 0)
+               return false;
+
+       dp->bridge_num = bridge_num;
+
+       /* Notify the driver */
+       err = ds->ops->port_bridge_tx_fwd_offload(ds, dp->index, bridge_dev,
+                                                 bridge_num);
+       if (err) {
+               dsa_port_bridge_tx_fwd_unoffload(dp, bridge_dev);
+               return false;
+       }
+
+       return true;
+}
+
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
                         struct netlink_ext_ack *extack)
 {
@@ -292,6 +327,9 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
                .port = dp->index,
                .br = br,
        };
+       struct net_device *dev = dp->slave;
+       struct net_device *brport_dev;
+       bool tx_fwd_offload;
        int err;
 
        /* Here the interface is already bridged. Reflect the current
@@ -299,16 +337,31 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
         */
        dp->bridge_dev = br;
 
+       brport_dev = dsa_port_to_bridge_port(dp);
+
        err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info);
        if (err)
                goto out_rollback;
 
-       err = dsa_port_switchdev_sync(dp, extack);
+       tx_fwd_offload = dsa_port_bridge_tx_fwd_offload(dp, br);
+
+       err = switchdev_bridge_port_offload(brport_dev, dev, dp,
+                                           &dsa_slave_switchdev_notifier,
+                                           &dsa_slave_switchdev_blocking_notifier,
+                                           tx_fwd_offload, extack);
        if (err)
                goto out_rollback_unbridge;
 
+       err = dsa_port_switchdev_sync_attrs(dp, extack);
+       if (err)
+               goto out_rollback_unoffload;
+
        return 0;
 
+out_rollback_unoffload:
+       switchdev_bridge_port_unoffload(brport_dev, dp,
+                                       &dsa_slave_switchdev_notifier,
+                                       &dsa_slave_switchdev_blocking_notifier);
 out_rollback_unbridge:
        dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
 out_rollback:
@@ -316,10 +369,17 @@ out_rollback:
        return err;
 }
 
-int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br,
-                             struct netlink_ext_ack *extack)
+void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br)
 {
-       return dsa_port_switchdev_unsync_objs(dp, br, extack);
+       struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+
+       /* Don't try to unoffload something that is not offloaded */
+       if (!brport_dev)
+               return;
+
+       switchdev_bridge_port_unoffload(brport_dev, dp,
+                                       &dsa_slave_switchdev_notifier,
+                                       &dsa_slave_switchdev_blocking_notifier);
 }
 
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
@@ -337,9 +397,13 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
         */
        dp->bridge_dev = NULL;
 
+       dsa_port_bridge_tx_fwd_unoffload(dp, br);
+
        err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
        if (err)
-               pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
+               dev_err(dp->ds->dev,
+                       "port %d failed to notify DSA_NOTIFIER_BRIDGE_LEAVE: %pe\n",
+                       dp->index, ERR_PTR(err));
 
        dsa_port_switchdev_unsync_attrs(dp);
 }
@@ -409,13 +473,10 @@ err_lag_join:
        return err;
 }
 
-int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag,
-                          struct netlink_ext_ack *extack)
+void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag)
 {
        if (dp->bridge_dev)
-               return dsa_port_pre_bridge_leave(dp, dp->bridge_dev, extack);
-
-       return 0;
+               dsa_port_pre_bridge_leave(dp, dp->bridge_dev);
 }
 
 void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
@@ -441,8 +502,9 @@ void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
 
        err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
        if (err)
-               pr_err("DSA: failed to notify DSA_NOTIFIER_LAG_LEAVE: %d\n",
-                      err);
+               dev_err(dp->ds->dev,
+                       "port %d failed to notify DSA_NOTIFIER_LAG_LEAVE: %pe\n",
+                       dp->index, ERR_PTR(err));
 
        dsa_lag_unmap(dp->ds->dst, lag);
 }
@@ -518,6 +580,7 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
 int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
                            struct netlink_ext_ack *extack)
 {
+       bool old_vlan_filtering = dsa_port_is_vlan_filtering(dp);
        struct dsa_switch *ds = dp->ds;
        bool apply;
        int err;
@@ -543,12 +606,49 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
        if (err)
                return err;
 
-       if (ds->vlan_filtering_is_global)
+       if (ds->vlan_filtering_is_global) {
+               int port;
+
                ds->vlan_filtering = vlan_filtering;
-       else
+
+               for (port = 0; port < ds->num_ports; port++) {
+                       struct net_device *slave;
+
+                       if (!dsa_is_user_port(ds, port))
+                               continue;
+
+                       /* We might be called in the unbind path, so not
+                        * all slave devices might still be registered.
+                        */
+                       slave = dsa_to_port(ds, port)->slave;
+                       if (!slave)
+                               continue;
+
+                       err = dsa_slave_manage_vlan_filtering(slave,
+                                                             vlan_filtering);
+                       if (err)
+                               goto restore;
+               }
+       } else {
                dp->vlan_filtering = vlan_filtering;
 
+               err = dsa_slave_manage_vlan_filtering(dp->slave,
+                                                     vlan_filtering);
+               if (err)
+                       goto restore;
+       }
+
        return 0;
+
+restore:
+       ds->ops->port_vlan_filtering(ds, dp->index, old_vlan_filtering, NULL);
+
+       if (ds->vlan_filtering_is_global)
+               ds->vlan_filtering = old_vlan_filtering;
+       else
+               dp->vlan_filtering = old_vlan_filtering;
+
+       return err;
 }
 
 /* This enforces legacy behavior for switch drivers which assume they can't
@@ -595,27 +695,35 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
        return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack);
 }
 
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
                          struct switchdev_brport_flags flags,
                          struct netlink_ext_ack *extack)
 {
        struct dsa_switch *ds = dp->ds;
+       int err;
 
        if (!ds->ops->port_bridge_flags)
                return -EOPNOTSUPP;
 
-       return ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
-}
+       err = ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
+       if (err)
+               return err;
 
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
-                    struct netlink_ext_ack *extack)
-{
-       struct dsa_switch *ds = dp->ds;
+       if (flags.mask & BR_LEARNING) {
+               bool learning = flags.val & BR_LEARNING;
 
-       if (!ds->ops->port_set_mrouter)
-               return -EOPNOTSUPP;
+               if (learning == dp->learning)
+                       return 0;
+
+               if ((dp->learning && !learning) &&
+                   (dp->stp_state == BR_STATE_LEARNING ||
+                    dp->stp_state == BR_STATE_FORWARDING))
+                       dsa_port_fast_age(dp);
+
+               dp->learning = learning;
+       }
 
-       return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack);
+       return 0;
 }
 
 int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
@@ -844,7 +952,6 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
 void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
                               const struct dsa_device_ops *tag_ops)
 {
-       cpu_dp->filter = tag_ops->filter;
        cpu_dp->rcv = tag_ops->rcv;
        cpu_dp->tag_ops = tag_ops;
 }
@@ -1215,5 +1322,42 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr)
 
        err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_LEAVE, &info);
        if (err)
-               pr_err("DSA: failed to notify DSA_NOTIFIER_HSR_LEAVE\n");
+               dev_err(dp->ds->dev,
+                       "port %d failed to notify DSA_NOTIFIER_HSR_LEAVE: %pe\n",
+                       dp->index, ERR_PTR(err));
+}
+
+int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast)
+{
+       struct dsa_notifier_tag_8021q_vlan_info info = {
+               .tree_index = dp->ds->dst->index,
+               .sw_index = dp->ds->index,
+               .port = dp->index,
+               .vid = vid,
+       };
+
+       if (broadcast)
+               return dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info);
+
+       return dsa_port_notify(dp, DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info);
+}
+
+void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast)
+{
+       struct dsa_notifier_tag_8021q_vlan_info info = {
+               .tree_index = dp->ds->dst->index,
+               .sw_index = dp->ds->index,
+               .port = dp->index,
+               .vid = vid,
+       };
+       int err;
+
+       if (broadcast)
+               err = dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info);
+       else
+               err = dsa_port_notify(dp, DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info);
+       if (err)
+               dev_err(dp->ds->dev,
+                       "port %d failed to notify tag_8021q VLAN %d deletion: %pe\n",
+                       dp->index, vid, ERR_PTR(err));
 }
index 23be8e0..662ff53 100644 (file)
@@ -286,7 +286,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
                if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
                        return -EOPNOTSUPP;
 
-               ret = dsa_port_set_state(dp, attr->u.stp_state);
+               ret = dsa_port_set_state(dp, attr->u.stp_state, true);
                break;
        case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
                if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
@@ -314,12 +314,6 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
 
                ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack);
                break;
-       case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
-               if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
-                       return -EOPNOTSUPP;
-
-               ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack);
-               break;
        default:
                ret = -EOPNOTSUPP;
                break;
@@ -1415,6 +1409,76 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
        return 0;
 }
 
+static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg)
+{
+       __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q);
+
+       return dsa_slave_vlan_rx_add_vid(arg, proto, vid);
+}
+
+static int dsa_slave_clear_vlan(struct net_device *vdev, int vid, void *arg)
+{
+       __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q);
+
+       return dsa_slave_vlan_rx_kill_vid(arg, proto, vid);
+}
+
+/* Keep the VLAN RX filtering list in sync with the hardware only if VLAN
+ * filtering is enabled. The baseline is that only ports that offload a
+ * VLAN-aware bridge are VLAN-aware, and standalone ports are VLAN-unaware,
+ * but there are exceptions for quirky hardware.
+ *
+ * If ds->vlan_filtering_is_global = true, then standalone ports which share
+ * the same switch with other ports that offload a VLAN-aware bridge are also
+ * inevitably VLAN-aware.
+ *
+ * To summarize, a DSA switch port offloads:
+ *
+ * - If standalone (this includes software bridge, software LAG):
+ *     - if ds->needs_standalone_vlan_filtering = true, OR if
+ *       (ds->vlan_filtering_is_global = true AND there are bridges spanning
+ *       this switch chip which have vlan_filtering=1)
+ *         - the 8021q upper VLANs
+ *     - else (standalone VLAN filtering is not needed, VLAN filtering is not
+ *       global, or it is, but no port is under a VLAN-aware bridge):
+ *         - no VLAN (any 8021q upper is a software VLAN)
+ *
+ * - If under a vlan_filtering=0 bridge which it offload:
+ *     - if ds->configure_vlan_while_not_filtering = true (default):
+ *         - the bridge VLANs. These VLANs are committed to hardware but inactive.
+ *     - else (deprecated):
+ *         - no VLAN. The bridge VLANs are not restored when VLAN awareness is
+ *           enabled, so this behavior is broken and discouraged.
+ *
+ * - If under a vlan_filtering=1 bridge which it offload:
+ *     - the bridge VLANs
+ *     - the 8021q upper VLANs
+ */
+int dsa_slave_manage_vlan_filtering(struct net_device *slave,
+                                   bool vlan_filtering)
+{
+       int err;
+
+       if (vlan_filtering) {
+               slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+               err = vlan_for_each(slave, dsa_slave_restore_vlan, slave);
+               if (err) {
+                       vlan_for_each(slave, dsa_slave_clear_vlan, slave);
+                       slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+                       return err;
+               }
+       } else {
+               err = vlan_for_each(slave, dsa_slave_clear_vlan, slave);
+               if (err)
+                       return err;
+
+               slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+       }
+
+       return 0;
+}
+
 struct dsa_hw_port {
        struct list_head list;
        struct net_device *dev;
@@ -1687,7 +1751,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
        .ndo_set_rx_mode        = dsa_slave_set_rx_mode,
        .ndo_set_mac_address    = dsa_slave_set_mac_address,
        .ndo_fdb_dump           = dsa_slave_fdb_dump,
-       .ndo_do_ioctl           = dsa_slave_ioctl,
+       .ndo_eth_ioctl          = dsa_slave_ioctl,
        .ndo_get_iflink         = dsa_slave_get_iflink,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_netpoll_setup      = dsa_slave_netpoll_setup,
@@ -1822,12 +1886,12 @@ void dsa_slave_setup_tagger(struct net_device *slave)
        p->xmit = cpu_dp->tag_ops->xmit;
 
        slave->features = master->vlan_features | NETIF_F_HW_TC;
-       if (ds->ops->port_vlan_add && ds->ops->port_vlan_del)
-               slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
        slave->hw_features |= NETIF_F_HW_TC;
        slave->features |= NETIF_F_LLTX;
        if (slave->needed_tailroom)
                slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST);
+       if (ds->needs_standalone_vlan_filtering)
+               slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 }
 
 static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
@@ -2015,6 +2079,11 @@ static int dsa_slave_changeupper(struct net_device *dev,
                        err = dsa_port_bridge_join(dp, info->upper_dev, extack);
                        if (!err)
                                dsa_bridge_mtu_normalization(dp);
+                       if (err == -EOPNOTSUPP) {
+                               NL_SET_ERR_MSG_MOD(extack,
+                                                  "Offloading not supported");
+                               err = 0;
+                       }
                        err = notifier_from_errno(err);
                } else {
                        dsa_port_bridge_leave(dp, info->upper_dev);
@@ -2056,20 +2125,16 @@ static int dsa_slave_prechangeupper(struct net_device *dev,
                                    struct netdev_notifier_changeupper_info *info)
 {
        struct dsa_port *dp = dsa_slave_to_port(dev);
-       struct netlink_ext_ack *extack;
-       int err = 0;
-
-       extack = netdev_notifier_info_to_extack(&info->info);
 
        if (netif_is_bridge_master(info->upper_dev) && !info->linking)
-               err = dsa_port_pre_bridge_leave(dp, info->upper_dev, extack);
+               dsa_port_pre_bridge_leave(dp, info->upper_dev);
        else if (netif_is_lag_master(info->upper_dev) && !info->linking)
-               err = dsa_port_pre_lag_leave(dp, info->upper_dev, extack);
+               dsa_port_pre_lag_leave(dp, info->upper_dev);
        /* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be
         * meaningfully enslaved to a bridge yet
         */
 
-       return notifier_from_errno(err);
+       return NOTIFY_DONE;
 }
 
 static int
@@ -2357,26 +2422,98 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
        kfree(switchdev_work);
 }
 
-static int dsa_lower_dev_walk(struct net_device *lower_dev,
-                             struct netdev_nested_priv *priv)
+static bool dsa_foreign_dev_check(const struct net_device *dev,
+                                 const struct net_device *foreign_dev)
 {
-       if (dsa_slave_dev_check(lower_dev)) {
-               priv->data = (void *)netdev_priv(lower_dev);
-               return 1;
-       }
+       const struct dsa_port *dp = dsa_slave_to_port(dev);
+       struct dsa_switch_tree *dst = dp->ds->dst;
 
-       return 0;
+       if (netif_is_bridge_master(foreign_dev))
+               return !dsa_tree_offloads_bridge(dst, foreign_dev);
+
+       if (netif_is_bridge_port(foreign_dev))
+               return !dsa_tree_offloads_bridge_port(dst, foreign_dev);
+
+       /* Everything else is foreign */
+       return true;
 }
 
-static struct dsa_slave_priv *dsa_slave_dev_lower_find(struct net_device *dev)
+static int dsa_slave_fdb_event(struct net_device *dev,
+                              const struct net_device *orig_dev,
+                              const void *ctx,
+                              const struct switchdev_notifier_fdb_info *fdb_info,
+                              unsigned long event)
 {
-       struct netdev_nested_priv priv = {
-               .data = NULL,
-       };
+       struct dsa_switchdev_event_work *switchdev_work;
+       struct dsa_port *dp = dsa_slave_to_port(dev);
+       bool host_addr = fdb_info->is_local;
+       struct dsa_switch *ds = dp->ds;
+
+       if (ctx && ctx != dp)
+               return 0;
+
+       if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del)
+               return -EOPNOTSUPP;
+
+       if (dsa_slave_dev_check(orig_dev) &&
+           switchdev_fdb_is_dynamically_learned(fdb_info))
+               return 0;
+
+       /* FDB entries learned by the software bridge should be installed as
+        * host addresses only if the driver requests assisted learning.
+        */
+       if (switchdev_fdb_is_dynamically_learned(fdb_info) &&
+           !ds->assisted_learning_on_cpu_port)
+               return 0;
+
+       /* Also treat FDB entries on foreign interfaces bridged with us as host
+        * addresses.
+        */
+       if (dsa_foreign_dev_check(dev, orig_dev))
+               host_addr = true;
+
+       switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+       if (!switchdev_work)
+               return -ENOMEM;
 
-       netdev_walk_all_lower_dev_rcu(dev, dsa_lower_dev_walk, &priv);
+       netdev_dbg(dev, "%s FDB entry towards %s, addr %pM vid %d%s\n",
+                  event == SWITCHDEV_FDB_ADD_TO_DEVICE ? "Adding" : "Deleting",
+                  orig_dev->name, fdb_info->addr, fdb_info->vid,
+                  host_addr ? " as host address" : "");
 
-       return (struct dsa_slave_priv *)priv.data;
+       INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work);
+       switchdev_work->ds = ds;
+       switchdev_work->port = dp->index;
+       switchdev_work->event = event;
+       switchdev_work->dev = dev;
+
+       ether_addr_copy(switchdev_work->addr, fdb_info->addr);
+       switchdev_work->vid = fdb_info->vid;
+       switchdev_work->host_addr = host_addr;
+
+       /* Hold a reference for dsa_fdb_offload_notify */
+       dev_hold(dev);
+       dsa_schedule_work(&switchdev_work->work);
+
+       return 0;
+}
+
+static int
+dsa_slave_fdb_add_to_device(struct net_device *dev,
+                           const struct net_device *orig_dev, const void *ctx,
+                           const struct switchdev_notifier_fdb_info *fdb_info)
+{
+       return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
+                                  SWITCHDEV_FDB_ADD_TO_DEVICE);
+}
+
+static int
+dsa_slave_fdb_del_to_device(struct net_device *dev,
+                           const struct net_device *orig_dev, const void *ctx,
+                           const struct switchdev_notifier_fdb_info *fdb_info)
+{
+       return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
+                                  SWITCHDEV_FDB_DEL_TO_DEVICE);
 }
 
 /* Called under rcu_read_lock() */
@@ -2384,10 +2521,6 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
                                     unsigned long event, void *ptr)
 {
        struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
-       const struct switchdev_notifier_fdb_info *fdb_info;
-       struct dsa_switchdev_event_work *switchdev_work;
-       bool host_addr = false;
-       struct dsa_port *dp;
        int err;
 
        switch (event) {
@@ -2397,92 +2530,19 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
                                                     dsa_slave_port_attr_set);
                return notifier_from_errno(err);
        case SWITCHDEV_FDB_ADD_TO_DEVICE:
+               err = switchdev_handle_fdb_add_to_device(dev, ptr,
+                                                        dsa_slave_dev_check,
+                                                        dsa_foreign_dev_check,
+                                                        dsa_slave_fdb_add_to_device,
+                                                        NULL);
+               return notifier_from_errno(err);
        case SWITCHDEV_FDB_DEL_TO_DEVICE:
-               fdb_info = ptr;
-
-               if (dsa_slave_dev_check(dev)) {
-                       dp = dsa_slave_to_port(dev);
-
-                       if (fdb_info->is_local)
-                               host_addr = true;
-                       else if (!fdb_info->added_by_user)
-                               return NOTIFY_OK;
-               } else {
-                       /* Snoop addresses added to foreign interfaces
-                        * bridged with us, or the bridge
-                        * itself. Dynamically learned addresses can
-                        * also be added for switches that don't
-                        * automatically learn SA from CPU-injected
-                        * traffic.
-                        */
-                       struct net_device *br_dev;
-                       struct dsa_slave_priv *p;
-
-                       if (netif_is_bridge_master(dev))
-                               br_dev = dev;
-                       else
-                               br_dev = netdev_master_upper_dev_get_rcu(dev);
-
-                       if (!br_dev)
-                               return NOTIFY_DONE;
-
-                       if (!netif_is_bridge_master(br_dev))
-                               return NOTIFY_DONE;
-
-                       p = dsa_slave_dev_lower_find(br_dev);
-                       if (!p)
-                               return NOTIFY_DONE;
-
-                       dp = p->dp;
-                       host_addr = fdb_info->is_local;
-
-                       /* FDB entries learned by the software bridge should
-                        * be installed as host addresses only if the driver
-                        * requests assisted learning.
-                        * On the other hand, FDB entries for local termination
-                        * should always be installed.
-                        */
-                       if (!fdb_info->added_by_user && !fdb_info->is_local &&
-                           !dp->ds->assisted_learning_on_cpu_port)
-                               return NOTIFY_DONE;
-
-                       /* When the bridge learns an address on an offloaded
-                        * LAG we don't want to send traffic to the CPU, the
-                        * other ports bridged with the LAG should be able to
-                        * autonomously forward towards it.
-                        * On the other hand, if the address is local
-                        * (therefore not learned) then we want to trap it to
-                        * the CPU regardless of whether the interface it
-                        * belongs to is offloaded or not.
-                        */
-                       if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev) &&
-                           !fdb_info->is_local)
-                               return NOTIFY_DONE;
-               }
-
-               if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del)
-                       return NOTIFY_DONE;
-
-               switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
-               if (!switchdev_work)
-                       return NOTIFY_BAD;
-
-               INIT_WORK(&switchdev_work->work,
-                         dsa_slave_switchdev_event_work);
-               switchdev_work->ds = dp->ds;
-               switchdev_work->port = dp->index;
-               switchdev_work->event = event;
-               switchdev_work->dev = dev;
-
-               ether_addr_copy(switchdev_work->addr,
-                               fdb_info->addr);
-               switchdev_work->vid = fdb_info->vid;
-               switchdev_work->host_addr = host_addr;
-
-               /* Hold a reference for dsa_fdb_offload_notify */
-               dev_hold(dev);
-               dsa_schedule_work(&switchdev_work->work);
-               break;
+               err = switchdev_handle_fdb_del_to_device(dev, ptr,
+                                                        dsa_slave_dev_check,
+                                                        dsa_foreign_dev_check,
+                                                        dsa_slave_fdb_del_to_device,
+                                                        NULL);
+               return notifier_from_errno(err);
        default:
                return NOTIFY_DONE;
        }
index 5ece05d..1c797ec 100644 (file)
@@ -90,26 +90,36 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
                                  struct dsa_notifier_bridge_info *info)
 {
        struct dsa_switch_tree *dst = ds->dst;
+       int err;
 
-       if (dst->index == info->tree_index && ds->index == info->sw_index &&
-           ds->ops->port_bridge_join)
-               return ds->ops->port_bridge_join(ds, info->port, info->br);
+       if (dst->index == info->tree_index && ds->index == info->sw_index) {
+               if (!ds->ops->port_bridge_join)
+                       return -EOPNOTSUPP;
+
+               err = ds->ops->port_bridge_join(ds, info->port, info->br);
+               if (err)
+                       return err;
+       }
 
        if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
-           ds->ops->crosschip_bridge_join)
-               return ds->ops->crosschip_bridge_join(ds, info->tree_index,
-                                                     info->sw_index,
-                                                     info->port, info->br);
+           ds->ops->crosschip_bridge_join) {
+               err = ds->ops->crosschip_bridge_join(ds, info->tree_index,
+                                                    info->sw_index,
+                                                    info->port, info->br);
+               if (err)
+                       return err;
+       }
 
-       return 0;
+       return dsa_tag_8021q_bridge_join(ds, info);
 }
 
 static int dsa_switch_bridge_leave(struct dsa_switch *ds,
                                   struct dsa_notifier_bridge_info *info)
 {
-       bool unset_vlan_filtering = br_vlan_enabled(info->br);
        struct dsa_switch_tree *dst = ds->dst;
        struct netlink_ext_ack extack = {0};
+       bool change_vlan_filtering = false;
+       bool vlan_filtering;
        int err, port;
 
        if (dst->index == info->tree_index && ds->index == info->sw_index &&
@@ -122,6 +132,15 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
                                                info->sw_index, info->port,
                                                info->br);
 
+       if (ds->needs_standalone_vlan_filtering && !br_vlan_enabled(info->br)) {
+               change_vlan_filtering = true;
+               vlan_filtering = true;
+       } else if (!ds->needs_standalone_vlan_filtering &&
+                  br_vlan_enabled(info->br)) {
+               change_vlan_filtering = true;
+               vlan_filtering = false;
+       }
+
        /* If the bridge was vlan_filtering, the bridge core doesn't trigger an
         * event for changing vlan_filtering setting upon slave ports leaving
         * it. That is a good thing, because that lets us handle it and also
@@ -130,28 +149,30 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
         * vlan_filtering callback is only when the last port leaves the last
         * VLAN-aware bridge.
         */
-       if (unset_vlan_filtering && ds->vlan_filtering_is_global) {
+       if (change_vlan_filtering && ds->vlan_filtering_is_global) {
                for (port = 0; port < ds->num_ports; port++) {
                        struct net_device *bridge_dev;
 
                        bridge_dev = dsa_to_port(ds, port)->bridge_dev;
 
                        if (bridge_dev && br_vlan_enabled(bridge_dev)) {
-                               unset_vlan_filtering = false;
+                               change_vlan_filtering = false;
                                break;
                        }
                }
        }
-       if (unset_vlan_filtering) {
+
+       if (change_vlan_filtering) {
                err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port),
-                                             false, &extack);
+                                             vlan_filtering, &extack);
                if (extack._msg)
                        dev_err(ds->dev, "port %d: %s\n", info->port,
                                extack._msg);
                if (err && err != EOPNOTSUPP)
                        return err;
        }
-       return 0;
+
+       return dsa_tag_8021q_bridge_leave(ds, info);
 }
 
 /* Matches for all upstream-facing ports (the CPU port and all upstream-facing
@@ -726,6 +747,12 @@ static int dsa_switch_event(struct notifier_block *nb,
        case DSA_NOTIFIER_MRP_DEL_RING_ROLE:
                err = dsa_switch_mrp_del_ring_role(ds, info);
                break;
+       case DSA_NOTIFIER_TAG_8021Q_VLAN_ADD:
+               err = dsa_switch_tag_8021q_vlan_add(ds, info);
+               break;
+       case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL:
+               err = dsa_switch_tag_8021q_vlan_del(ds, info);
+               break;
        default:
                err = -EOPNOTSUPP;
                break;
index 4aa29f9..f8f7b7c 100644 (file)
@@ -17,7 +17,7 @@
  *
  * | 11  | 10  |  9  |  8  |  7  |  6  |  5  |  4  |  3  |  2  |  1  |  0  |
  * +-----------+-----+-----------------+-----------+-----------------------+
- * |    DIR    | SVL |    SWITCH_ID    |  SUBVLAN  |          PORT         |
+ * |    DIR    | VBID|    SWITCH_ID    |   VBID    |          PORT         |
  * +-----------+-----+-----------------+-----------+-----------------------+
  *
  * DIR - VID[11:10]:
  *     These values make the special VIDs of 0, 1 and 4095 to be left
  *     unused by this coding scheme.
  *
- * SVL/SUBVLAN - { VID[9], VID[5:4] }:
- *     Sub-VLAN encoding. Valid only when DIR indicates an RX VLAN.
- *     * 0 (0b000): Field does not encode a sub-VLAN, either because
- *     received traffic is untagged, PVID-tagged or because a second
- *     VLAN tag is present after this tag and not inside of it.
- *     * 1 (0b001): Received traffic is tagged with a VID value private
- *     to the host. This field encodes the index in the host's lookup
- *     table through which the value of the ingress VLAN ID can be
- *     recovered.
- *     * 2 (0b010): Field encodes a sub-VLAN.
- *     ...
- *     * 7 (0b111): Field encodes a sub-VLAN.
- *     When DIR indicates a TX VLAN, SUBVLAN must be transmitted as zero
- *     (by the host) and ignored on receive (by the switch).
- *
  * SWITCH_ID - VID[8:6]:
  *     Index of switch within DSA tree. Must be between 0 and 7.
  *
+ * VBID - { VID[9], VID[5:4] }:
+ *     Virtual bridge ID. If between 1 and 7, packet targets the broadcast
+ *     domain of a bridge. If transmitted as zero, packet targets a single
+ *     port. Field only valid on transmit, must be ignored on receive.
+ *
  * PORT - VID[3:0]:
  *     Index of switch port. Must be between 0 and 15.
  */
 #define DSA_8021Q_SWITCH_ID(x)         (((x) << DSA_8021Q_SWITCH_ID_SHIFT) & \
                                                 DSA_8021Q_SWITCH_ID_MASK)
 
-#define DSA_8021Q_SUBVLAN_HI_SHIFT     9
-#define DSA_8021Q_SUBVLAN_HI_MASK      GENMASK(9, 9)
-#define DSA_8021Q_SUBVLAN_LO_SHIFT     4
-#define DSA_8021Q_SUBVLAN_LO_MASK      GENMASK(5, 4)
-#define DSA_8021Q_SUBVLAN_HI(x)                (((x) & GENMASK(2, 2)) >> 2)
-#define DSA_8021Q_SUBVLAN_LO(x)                ((x) & GENMASK(1, 0))
-#define DSA_8021Q_SUBVLAN(x)           \
-               (((DSA_8021Q_SUBVLAN_LO(x) << DSA_8021Q_SUBVLAN_LO_SHIFT) & \
-                 DSA_8021Q_SUBVLAN_LO_MASK) | \
-                ((DSA_8021Q_SUBVLAN_HI(x) << DSA_8021Q_SUBVLAN_HI_SHIFT) & \
-                 DSA_8021Q_SUBVLAN_HI_MASK))
+#define DSA_8021Q_VBID_HI_SHIFT                9
+#define DSA_8021Q_VBID_HI_MASK         GENMASK(9, 9)
+#define DSA_8021Q_VBID_LO_SHIFT                4
+#define DSA_8021Q_VBID_LO_MASK         GENMASK(5, 4)
+#define DSA_8021Q_VBID_HI(x)           (((x) & GENMASK(2, 2)) >> 2)
+#define DSA_8021Q_VBID_LO(x)           ((x) & GENMASK(1, 0))
+#define DSA_8021Q_VBID(x)              \
+               (((DSA_8021Q_VBID_LO(x) << DSA_8021Q_VBID_LO_SHIFT) & \
+                 DSA_8021Q_VBID_LO_MASK) | \
+                ((DSA_8021Q_VBID_HI(x) << DSA_8021Q_VBID_HI_SHIFT) & \
+                 DSA_8021Q_VBID_HI_MASK))
 
 #define DSA_8021Q_PORT_SHIFT           0
 #define DSA_8021Q_PORT_MASK            GENMASK(3, 0)
 #define DSA_8021Q_PORT(x)              (((x) << DSA_8021Q_PORT_SHIFT) & \
                                                 DSA_8021Q_PORT_MASK)
 
+u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num)
+{
+       /* The VBID value of 0 is reserved for precise TX */
+       return DSA_8021Q_DIR_TX | DSA_8021Q_VBID(bridge_num + 1);
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid);
+
 /* Returns the VID to be inserted into the frame from xmit for switch steering
  * instructions on egress. Encodes switch ID and port ID.
  */
@@ -98,13 +95,6 @@ u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port)
 }
 EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid);
 
-u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan)
-{
-       return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) |
-              DSA_8021Q_PORT(port) | DSA_8021Q_SUBVLAN(subvlan);
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid_subvlan);
-
 /* Returns the decoded switch ID from the RX VID. */
 int dsa_8021q_rx_switch_id(u16 vid)
 {
@@ -119,20 +109,6 @@ int dsa_8021q_rx_source_port(u16 vid)
 }
 EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port);
 
-/* Returns the decoded subvlan from the RX VID. */
-u16 dsa_8021q_rx_subvlan(u16 vid)
-{
-       u16 svl_hi, svl_lo;
-
-       svl_hi = (vid & DSA_8021Q_SUBVLAN_HI_MASK) >>
-                DSA_8021Q_SUBVLAN_HI_SHIFT;
-       svl_lo = (vid & DSA_8021Q_SUBVLAN_LO_MASK) >>
-                DSA_8021Q_SUBVLAN_LO_SHIFT;
-
-       return (svl_hi << 2) | svl_lo;
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_rx_subvlan);
-
 bool vid_is_dsa_8021q_rxvlan(u16 vid)
 {
        return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX;
@@ -151,21 +127,152 @@ bool vid_is_dsa_8021q(u16 vid)
 }
 EXPORT_SYMBOL_GPL(vid_is_dsa_8021q);
 
-/* If @enabled is true, installs @vid with @flags into the switch port's HW
- * filter.
- * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the
- * user explicitly configured this @vid through the bridge core, then the @vid
- * is installed again, but this time with the flags from the bridge layer.
- */
-static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
-                              u16 flags, bool enabled)
+static struct dsa_tag_8021q_vlan *
+dsa_tag_8021q_vlan_find(struct dsa_8021q_context *ctx, int port, u16 vid)
 {
-       struct dsa_port *dp = dsa_to_port(ctx->ds, port);
+       struct dsa_tag_8021q_vlan *v;
 
-       if (enabled)
-               return ctx->ops->vlan_add(ctx->ds, dp->index, vid, flags);
+       list_for_each_entry(v, &ctx->vlans, list)
+               if (v->vid == vid && v->port == port)
+                       return v;
 
-       return ctx->ops->vlan_del(ctx->ds, dp->index, vid);
+       return NULL;
+}
+
+static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port,
+                                           u16 vid, u16 flags)
+{
+       struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+       struct dsa_port *dp = dsa_to_port(ds, port);
+       struct dsa_tag_8021q_vlan *v;
+       int err;
+
+       /* No need to bother with refcounting for user ports */
+       if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+               return ds->ops->tag_8021q_vlan_add(ds, port, vid, flags);
+
+       v = dsa_tag_8021q_vlan_find(ctx, port, vid);
+       if (v) {
+               refcount_inc(&v->refcount);
+               return 0;
+       }
+
+       v = kzalloc(sizeof(*v), GFP_KERNEL);
+       if (!v)
+               return -ENOMEM;
+
+       err = ds->ops->tag_8021q_vlan_add(ds, port, vid, flags);
+       if (err) {
+               kfree(v);
+               return err;
+       }
+
+       v->vid = vid;
+       v->port = port;
+       refcount_set(&v->refcount, 1);
+       list_add_tail(&v->list, &ctx->vlans);
+
+       return 0;
+}
+
+static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port,
+                                           u16 vid)
+{
+       struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+       struct dsa_port *dp = dsa_to_port(ds, port);
+       struct dsa_tag_8021q_vlan *v;
+       int err;
+
+       /* No need to bother with refcounting for user ports */
+       if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+               return ds->ops->tag_8021q_vlan_del(ds, port, vid);
+
+       v = dsa_tag_8021q_vlan_find(ctx, port, vid);
+       if (!v)
+               return -ENOENT;
+
+       if (!refcount_dec_and_test(&v->refcount))
+               return 0;
+
+       err = ds->ops->tag_8021q_vlan_del(ds, port, vid);
+       if (err) {
+               refcount_inc(&v->refcount);
+               return err;
+       }
+
+       list_del(&v->list);
+       kfree(v);
+
+       return 0;
+}
+
+static bool
+dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port,
+                               struct dsa_notifier_tag_8021q_vlan_info *info)
+{
+       if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
+               return true;
+
+       if (ds->dst->index == info->tree_index && ds->index == info->sw_index)
+               return port == info->port;
+
+       return false;
+}
+
+int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
+                                 struct dsa_notifier_tag_8021q_vlan_info *info)
+{
+       int port, err;
+
+       /* Since we use dsa_broadcast(), there might be other switches in other
+        * trees which don't support tag_8021q, so don't return an error.
+        * Or they might even support tag_8021q but have not registered yet to
+        * use it (maybe they use another tagger currently).
+        */
+       if (!ds->ops->tag_8021q_vlan_add || !ds->tag_8021q_ctx)
+               return 0;
+
+       for (port = 0; port < ds->num_ports; port++) {
+               if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) {
+                       u16 flags = 0;
+
+                       if (dsa_is_user_port(ds, port))
+                               flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+                       if (vid_is_dsa_8021q_rxvlan(info->vid) &&
+                           dsa_8021q_rx_switch_id(info->vid) == ds->index &&
+                           dsa_8021q_rx_source_port(info->vid) == port)
+                               flags |= BRIDGE_VLAN_INFO_PVID;
+
+                       err = dsa_switch_do_tag_8021q_vlan_add(ds, port,
+                                                              info->vid,
+                                                              flags);
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
+}
+
+int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
+                                 struct dsa_notifier_tag_8021q_vlan_info *info)
+{
+       int port, err;
+
+       if (!ds->ops->tag_8021q_vlan_del || !ds->tag_8021q_ctx)
+               return 0;
+
+       for (port = 0; port < ds->num_ports; port++) {
+               if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) {
+                       err = dsa_switch_do_tag_8021q_vlan_del(ds, port,
+                                                              info->vid);
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
 }
 
 /* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single
@@ -181,12 +288,6 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
  *    force all switched traffic to pass through the CPU. So we must also make
  *    the other front-panel ports members of this VID we're adding, albeit
  *    we're not making it their PVID (they'll still have their own).
- *    By the way - just because we're installing the same VID in multiple
- *    switch ports doesn't mean that they'll start to talk to one another, even
- *    while not bridged: the final forwarding decision is still an AND between
- *    the L2 forwarding information (which is limiting forwarding in this case)
- *    and the VLAN-based restrictions (of which there are none in this case,
- *    since all ports are members).
  *  - On TX (ingress from CPU and towards network) we are faced with a problem.
  *    If we were to tag traffic (from within DSA) with the port's pvid, all
  *    would be well, assuming the switch ports were standalone. Frames would
@@ -200,9 +301,10 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
  *    a member of the VID we're tagging the traffic with - the desired one.
  *
  * So at the end, each front-panel port will have one RX VID (also the PVID),
- * the RX VID of all other front-panel ports, and one TX VID. Whereas the CPU
- * port will have the RX and TX VIDs of all front-panel ports, and on top of
- * that, is also tagged-input and tagged-output (VLAN trunk).
+ * the RX VID of all other front-panel ports that are in the same bridge, and
+ * one TX VID. Whereas the CPU port will have the RX and TX VIDs of all
+ * front-panel ports, and on top of that, is also tagged-input and
+ * tagged-output (VLAN trunk).
  *
  *               CPU port                               CPU port
  * +-------------+-----+-------------+    +-------------+-----+-------------+
@@ -220,246 +322,246 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
  * +-+-----+-+-----+-+-----+-+-----+-+    +-+-----+-+-----+-+-----+-+-----+-+
  *   swp0    swp1    swp2    swp3           swp0    swp1    swp2    swp3
  */
-static int dsa_8021q_setup_port(struct dsa_8021q_context *ctx, int port,
-                               bool enabled)
+static bool dsa_tag_8021q_bridge_match(struct dsa_switch *ds, int port,
+                                      struct dsa_notifier_bridge_info *info)
+{
+       struct dsa_port *dp = dsa_to_port(ds, port);
+
+       /* Don't match on self */
+       if (ds->dst->index == info->tree_index &&
+           ds->index == info->sw_index &&
+           port == info->port)
+               return false;
+
+       if (dsa_port_is_user(dp))
+               return dp->bridge_dev == info->br;
+
+       return false;
+}
+
+int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
+                             struct dsa_notifier_bridge_info *info)
+{
+       struct dsa_switch *targeted_ds;
+       struct dsa_port *targeted_dp;
+       u16 targeted_rx_vid;
+       int err, port;
+
+       if (!ds->tag_8021q_ctx)
+               return 0;
+
+       targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
+       targeted_dp = dsa_to_port(targeted_ds, info->port);
+       targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+
+       for (port = 0; port < ds->num_ports; port++) {
+               struct dsa_port *dp = dsa_to_port(ds, port);
+               u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+
+               if (!dsa_tag_8021q_bridge_match(ds, port, info))
+                       continue;
+
+               /* Install the RX VID of the targeted port in our VLAN table */
+               err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid, true);
+               if (err)
+                       return err;
+
+               /* Install our RX VID into the targeted port's VLAN table */
+               err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid, true);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
+                              struct dsa_notifier_bridge_info *info)
 {
-       int upstream = dsa_upstream_port(ctx->ds, port);
-       u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port);
-       u16 tx_vid = dsa_8021q_tx_vid(ctx->ds, port);
+       struct dsa_switch *targeted_ds;
+       struct dsa_port *targeted_dp;
+       u16 targeted_rx_vid;
+       int port;
+
+       if (!ds->tag_8021q_ctx)
+               return 0;
+
+       targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
+       targeted_dp = dsa_to_port(targeted_ds, info->port);
+       targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+
+       for (port = 0; port < ds->num_ports; port++) {
+               struct dsa_port *dp = dsa_to_port(ds, port);
+               u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+
+               if (!dsa_tag_8021q_bridge_match(ds, port, info))
+                       continue;
+
+               /* Remove the RX VID of the targeted port from our VLAN table */
+               dsa_port_tag_8021q_vlan_del(dp, targeted_rx_vid, true);
+
+               /* Remove our RX VID from the targeted port's VLAN table */
+               dsa_port_tag_8021q_vlan_del(targeted_dp, rx_vid, true);
+       }
+
+       return 0;
+}
+
+int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port,
+                                       struct net_device *br,
+                                       int bridge_num)
+{
+       u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num);
+
+       return dsa_port_tag_8021q_vlan_add(dsa_to_port(ds, port), tx_vid,
+                                          true);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_offload);
+
+void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
+                                          struct net_device *br,
+                                          int bridge_num)
+{
+       u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num);
+
+       dsa_port_tag_8021q_vlan_del(dsa_to_port(ds, port), tx_vid, true);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_unoffload);
+
+/* Set up a port's tag_8021q RX and TX VLAN for standalone mode operation */
+static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
+{
+       struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+       struct dsa_port *dp = dsa_to_port(ds, port);
+       u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+       u16 tx_vid = dsa_8021q_tx_vid(ds, port);
        struct net_device *master;
-       int i, err, subvlan;
+       int err;
 
        /* The CPU port is implicitly configured by
         * configuring the front-panel ports
         */
-       if (!dsa_is_user_port(ctx->ds, port))
+       if (!dsa_port_is_user(dp))
                return 0;
 
-       master = dsa_to_port(ctx->ds, port)->cpu_dp->master;
+       master = dp->cpu_dp->master;
 
        /* Add this user port's RX VID to the membership list of all others
         * (including itself). This is so that bridging will not be hindered.
         * L2 forwarding rules still take precedence when there are no VLAN
         * restrictions, so there are no concerns about leaking traffic.
         */
-       for (i = 0; i < ctx->ds->num_ports; i++) {
-               u16 flags;
-
-               if (i == upstream)
-                       continue;
-               else if (i == port)
-                       /* The RX VID is pvid on this port */
-                       flags = BRIDGE_VLAN_INFO_UNTAGGED |
-                               BRIDGE_VLAN_INFO_PVID;
-               else
-                       /* The RX VID is a regular VLAN on all others */
-                       flags = BRIDGE_VLAN_INFO_UNTAGGED;
-
-               err = dsa_8021q_vid_apply(ctx, i, rx_vid, flags, enabled);
-               if (err) {
-                       dev_err(ctx->ds->dev,
-                               "Failed to apply RX VID %d to port %d: %d\n",
-                               rx_vid, port, err);
-                       return err;
-               }
-       }
-
-       /* CPU port needs to see this port's RX VID
-        * as tagged egress.
-        */
-       err = dsa_8021q_vid_apply(ctx, upstream, rx_vid, 0, enabled);
+       err = dsa_port_tag_8021q_vlan_add(dp, rx_vid, false);
        if (err) {
-               dev_err(ctx->ds->dev,
-                       "Failed to apply RX VID %d to port %d: %d\n",
-                       rx_vid, port, err);
+               dev_err(ds->dev,
+                       "Failed to apply RX VID %d to port %d: %pe\n",
+                       rx_vid, port, ERR_PTR(err));
                return err;
        }
 
-       /* Add to the master's RX filter not only @rx_vid, but in fact
-        * the entire subvlan range, just in case this DSA switch might
-        * want to use sub-VLANs.
-        */
-       for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) {
-               u16 vid = dsa_8021q_rx_vid_subvlan(ctx->ds, port, subvlan);
-
-               if (enabled)
-                       vlan_vid_add(master, ctx->proto, vid);
-               else
-                       vlan_vid_del(master, ctx->proto, vid);
-       }
+       /* Add @rx_vid to the master's RX filter. */
+       vlan_vid_add(master, ctx->proto, rx_vid);
 
        /* Finally apply the TX VID on this port and on the CPU port */
-       err = dsa_8021q_vid_apply(ctx, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED,
-                                 enabled);
-       if (err) {
-               dev_err(ctx->ds->dev,
-                       "Failed to apply TX VID %d on port %d: %d\n",
-                       tx_vid, port, err);
-               return err;
-       }
-       err = dsa_8021q_vid_apply(ctx, upstream, tx_vid, 0, enabled);
+       err = dsa_port_tag_8021q_vlan_add(dp, tx_vid, false);
        if (err) {
-               dev_err(ctx->ds->dev,
-                       "Failed to apply TX VID %d on port %d: %d\n",
-                       tx_vid, upstream, err);
+               dev_err(ds->dev,
+                       "Failed to apply TX VID %d on port %d: %pe\n",
+                       tx_vid, port, ERR_PTR(err));
                return err;
        }
 
        return err;
 }
 
-int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled)
+static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
 {
-       int rc, port;
+       struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+       struct dsa_port *dp = dsa_to_port(ds, port);
+       u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+       u16 tx_vid = dsa_8021q_tx_vid(ds, port);
+       struct net_device *master;
 
-       ASSERT_RTNL();
+       /* The CPU port is implicitly configured by
+        * configuring the front-panel ports
+        */
+       if (!dsa_port_is_user(dp))
+               return;
 
-       for (port = 0; port < ctx->ds->num_ports; port++) {
-               rc = dsa_8021q_setup_port(ctx, port, enabled);
-               if (rc < 0) {
-                       dev_err(ctx->ds->dev,
-                               "Failed to setup VLAN tagging for port %d: %d\n",
-                               port, rc);
-                       return rc;
-               }
-       }
+       master = dp->cpu_dp->master;
 
-       return 0;
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_setup);
+       dsa_port_tag_8021q_vlan_del(dp, rx_vid, false);
 
-static int dsa_8021q_crosschip_link_apply(struct dsa_8021q_context *ctx,
-                                         int port,
-                                         struct dsa_8021q_context *other_ctx,
-                                         int other_port, bool enabled)
-{
-       u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port);
+       vlan_vid_del(master, ctx->proto, rx_vid);
 
-       /* @rx_vid of local @ds port @port goes to @other_port of
-        * @other_ds
-        */
-       return dsa_8021q_vid_apply(other_ctx, other_port, rx_vid,
-                                  BRIDGE_VLAN_INFO_UNTAGGED, enabled);
+       dsa_port_tag_8021q_vlan_del(dp, tx_vid, false);
 }
 
-static int dsa_8021q_crosschip_link_add(struct dsa_8021q_context *ctx, int port,
-                                       struct dsa_8021q_context *other_ctx,
-                                       int other_port)
+static int dsa_tag_8021q_setup(struct dsa_switch *ds)
 {
-       struct dsa_8021q_crosschip_link *c;
+       int err, port;
+
+       ASSERT_RTNL();
 
-       list_for_each_entry(c, &ctx->crosschip_links, list) {
-               if (c->port == port && c->other_ctx == other_ctx &&
-                   c->other_port == other_port) {
-                       refcount_inc(&c->refcount);
-                       return 0;
+       for (port = 0; port < ds->num_ports; port++) {
+               err = dsa_tag_8021q_port_setup(ds, port);
+               if (err < 0) {
+                       dev_err(ds->dev,
+                               "Failed to setup VLAN tagging for port %d: %pe\n",
+                               port, ERR_PTR(err));
+                       return err;
                }
        }
 
-       dev_dbg(ctx->ds->dev,
-               "adding crosschip link from port %d to %s port %d\n",
-               port, dev_name(other_ctx->ds->dev), other_port);
-
-       c = kzalloc(sizeof(*c), GFP_KERNEL);
-       if (!c)
-               return -ENOMEM;
-
-       c->port = port;
-       c->other_ctx = other_ctx;
-       c->other_port = other_port;
-       refcount_set(&c->refcount, 1);
-
-       list_add(&c->list, &ctx->crosschip_links);
-
        return 0;
 }
 
-static void dsa_8021q_crosschip_link_del(struct dsa_8021q_context *ctx,
-                                        struct dsa_8021q_crosschip_link *c,
-                                        bool *keep)
+static void dsa_tag_8021q_teardown(struct dsa_switch *ds)
 {
-       *keep = !refcount_dec_and_test(&c->refcount);
+       int port;
 
-       if (*keep)
-               return;
-
-       dev_dbg(ctx->ds->dev,
-               "deleting crosschip link from port %d to %s port %d\n",
-               c->port, dev_name(c->other_ctx->ds->dev), c->other_port);
+       ASSERT_RTNL();
 
-       list_del(&c->list);
-       kfree(c);
+       for (port = 0; port < ds->num_ports; port++)
+               dsa_tag_8021q_port_teardown(ds, port);
 }
 
-/* Make traffic from local port @port be received by remote port @other_port.
- * This means that our @rx_vid needs to be installed on @other_ds's upstream
- * and user ports. The user ports should be egress-untagged so that they can
- * pop the dsa_8021q VLAN. But the @other_upstream can be either egress-tagged
- * or untagged: it doesn't matter, since it should never egress a frame having
- * our @rx_vid.
- */
-int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port,
-                                   struct dsa_8021q_context *other_ctx,
-                                   int other_port)
+int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto)
 {
-       /* @other_upstream is how @other_ds reaches us. If we are part
-        * of disjoint trees, then we are probably connected through
-        * our CPU ports. If we're part of the same tree though, we should
-        * probably use dsa_towards_port.
-        */
-       int other_upstream = dsa_upstream_port(other_ctx->ds, other_port);
-       int rc;
+       struct dsa_8021q_context *ctx;
 
-       rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_port);
-       if (rc)
-               return rc;
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
 
-       rc = dsa_8021q_crosschip_link_apply(ctx, port, other_ctx,
-                                           other_port, true);
-       if (rc)
-               return rc;
+       ctx->proto = proto;
+       ctx->ds = ds;
 
-       rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_upstream);
-       if (rc)
-               return rc;
+       INIT_LIST_HEAD(&ctx->vlans);
 
-       return dsa_8021q_crosschip_link_apply(ctx, port, other_ctx,
-                                             other_upstream, true);
+       ds->tag_8021q_ctx = ctx;
+
+       return dsa_tag_8021q_setup(ds);
 }
-EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_join);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_register);
 
-int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port,
-                                    struct dsa_8021q_context *other_ctx,
-                                    int other_port)
+void dsa_tag_8021q_unregister(struct dsa_switch *ds)
 {
-       int other_upstream = dsa_upstream_port(other_ctx->ds, other_port);
-       struct dsa_8021q_crosschip_link *c, *n;
-
-       list_for_each_entry_safe(c, n, &ctx->crosschip_links, list) {
-               if (c->port == port && c->other_ctx == other_ctx &&
-                   (c->other_port == other_port ||
-                    c->other_port == other_upstream)) {
-                       struct dsa_8021q_context *other_ctx = c->other_ctx;
-                       int other_port = c->other_port;
-                       bool keep;
-                       int rc;
-
-                       dsa_8021q_crosschip_link_del(ctx, c, &keep);
-                       if (keep)
-                               continue;
-
-                       rc = dsa_8021q_crosschip_link_apply(ctx, port,
-                                                           other_ctx,
-                                                           other_port,
-                                                           false);
-                       if (rc)
-                               return rc;
-               }
+       struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+       struct dsa_tag_8021q_vlan *v, *n;
+
+       dsa_tag_8021q_teardown(ds);
+
+       list_for_each_entry_safe(v, n, &ctx->vlans, list) {
+               list_del(&v->list);
+               kfree(v);
        }
 
-       return 0;
+       ds->tag_8021q_ctx = NULL;
+
+       kfree(ctx);
 }
-EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_leave);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_unregister);
 
 struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
                               u16 tpid, u16 tci)
@@ -471,8 +573,7 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
 }
 EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
 
-void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
-                  int *subvlan)
+void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id)
 {
        u16 vid, tci;
 
@@ -489,9 +590,6 @@ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
 
        *source_port = dsa_8021q_rx_source_port(vid);
        *switch_id = dsa_8021q_rx_switch_id(vid);
-       *subvlan = dsa_8021q_rx_subvlan(vid);
        skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 }
 EXPORT_SYMBOL_GPL(dsa_8021q_rcv);
-
-MODULE_LICENSE("GPL v2");
index 0efae1a..8a02ac4 100644 (file)
@@ -44,8 +44,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb,
-                                     struct net_device *ndev,
-                                     struct packet_type *pt)
+                                     struct net_device *ndev)
 {
        u8 ver, port;
        u16 hdr;
index 0750af9..96dbb8e 100644 (file)
@@ -99,7 +99,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
        skb_push(skb, BRCM_TAG_LEN);
 
        if (offset)
-               memmove(skb->data, skb->data + BRCM_TAG_LEN, offset);
+               dsa_alloc_etype_header(skb, BRCM_TAG_LEN);
 
        brcm_tag = skb->data + offset;
 
@@ -136,7 +136,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
  */
 static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
                                       struct net_device *dev,
-                                      struct packet_type *pt,
                                       unsigned int offset)
 {
        int source_port;
@@ -167,7 +166,7 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
        /* Remove Broadcom tag and update checksum */
        skb_pull_rcsum(skb, BRCM_TAG_LEN);
 
-       skb->offload_fwd_mark = 1;
+       dsa_default_offload_fwd_mark(skb);
 
        return skb;
 }
@@ -182,20 +181,16 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
 }
 
 
-static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-                                   struct packet_type *pt)
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        struct sk_buff *nskb;
 
        /* skb->data points to the EtherType, the tag is right before it */
-       nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
+       nskb = brcm_tag_rcv_ll(skb, dev, 2);
        if (!nskb)
                return nskb;
 
-       /* Move the Ethernet DA and SA */
-       memmove(nskb->data - ETH_HLEN,
-               nskb->data - ETH_HLEN - BRCM_TAG_LEN,
-               2 * ETH_ALEN);
+       dsa_strip_etype_header(skb, BRCM_TAG_LEN);
 
        return nskb;
 }
@@ -233,7 +228,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
 
        skb_push(skb, BRCM_LEG_TAG_LEN);
 
-       memmove(skb->data, skb->data + BRCM_LEG_TAG_LEN, 2 * ETH_ALEN);
+       dsa_alloc_etype_header(skb, BRCM_LEG_TAG_LEN);
 
        brcm_tag = skb->data + 2 * ETH_ALEN;
 
@@ -251,8 +246,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
-                                       struct net_device *dev,
-                                       struct packet_type *pt)
+                                       struct net_device *dev)
 {
        int source_port;
        u8 *brcm_tag;
@@ -260,7 +254,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
        if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID)))
                return NULL;
 
-       brcm_tag = skb->data - 2;
+       brcm_tag = dsa_etype_header_pos_rx(skb);
 
        source_port = brcm_tag[5] & BRCM_LEG_PORT_ID;
 
@@ -271,12 +265,9 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
        /* Remove Broadcom tag and update checksum */
        skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN);
 
-       skb->offload_fwd_mark = 1;
+       dsa_default_offload_fwd_mark(skb);
 
-       /* Move the Ethernet DA and SA */
-       memmove(skb->data - ETH_HLEN,
-               skb->data - ETH_HLEN - BRCM_LEG_TAG_LEN,
-               2 * ETH_ALEN);
+       dsa_strip_etype_header(skb, BRCM_LEG_TAG_LEN);
 
        return skb;
 }
@@ -302,11 +293,10 @@ static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
 }
 
 static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
-                                           struct net_device *dev,
-                                           struct packet_type *pt)
+                                           struct net_device *dev)
 {
        /* tag is prepended to the packet */
-       return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+       return brcm_tag_rcv_ll(skb, dev, ETH_HLEN);
 }
 
 static const struct dsa_device_ops brcm_prepend_netdev_ops = {
index a822355..77d0ce8 100644 (file)
@@ -126,18 +126,53 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
                                   u8 extra)
 {
        struct dsa_port *dp = dsa_slave_to_port(dev);
+       u8 tag_dev, tag_port;
+       enum dsa_cmd cmd;
        u8 *dsa_header;
+       u16 pvid = 0;
+       int err;
+
+       if (skb->offload_fwd_mark) {
+               struct dsa_switch_tree *dst = dp->ds->dst;
+               struct net_device *br = dp->bridge_dev;
+
+               cmd = DSA_CMD_FORWARD;
+
+               /* When offloading forwarding for a bridge, inject FORWARD
+                * packets on behalf of a virtual switch device with an index
+                * past the physical switches.
+                */
+               tag_dev = dst->last_switch + 1 + dp->bridge_num;
+               tag_port = 0;
+
+               /* If we are offloading forwarding for a VLAN-unaware bridge,
+                * inject packets to hardware using the bridge's pvid, since
+                * that's where the packets ingressed from.
+                */
+               if (!br_vlan_enabled(br)) {
+                       /* Safe because __dev_queue_xmit() runs under
+                        * rcu_read_lock_bh()
+                        */
+                       err = br_vlan_get_pvid_rcu(br, &pvid);
+                       if (err)
+                               return NULL;
+               }
+       } else {
+               cmd = DSA_CMD_FROM_CPU;
+               tag_dev = dp->ds->index;
+               tag_port = dp->index;
+       }
 
        if (skb->protocol == htons(ETH_P_8021Q)) {
                if (extra) {
                        skb_push(skb, extra);
-                       memmove(skb->data, skb->data + extra, 2 * ETH_ALEN);
+                       dsa_alloc_etype_header(skb, extra);
                }
 
-               /* Construct tagged FROM_CPU DSA tag from 802.1Q tag. */
-               dsa_header = skb->data + 2 * ETH_ALEN + extra;
-               dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | 0x20 | dp->ds->index;
-               dsa_header[1] = dp->index << 3;
+               /* Construct tagged DSA tag from 802.1Q tag. */
+               dsa_header = dsa_etype_header_pos_tx(skb) + extra;
+               dsa_header[0] = (cmd << 6) | 0x20 | tag_dev;
+               dsa_header[1] = tag_port << 3;
 
                /* Move CFI field from byte 2 to byte 1. */
                if (dsa_header[2] & 0x10) {
@@ -146,14 +181,15 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
                }
        } else {
                skb_push(skb, DSA_HLEN + extra);
-               memmove(skb->data, skb->data + DSA_HLEN + extra, 2 * ETH_ALEN);
-
-               /* Construct untagged FROM_CPU DSA tag. */
-               dsa_header = skb->data + 2 * ETH_ALEN + extra;
-               dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | dp->ds->index;
-               dsa_header[1] = dp->index << 3;
-               dsa_header[2] = 0x00;
-               dsa_header[3] = 0x00;
+               dsa_alloc_etype_header(skb, DSA_HLEN + extra);
+
+               /* Construct untagged DSA tag. */
+               dsa_header = dsa_etype_header_pos_tx(skb) + extra;
+
+               dsa_header[0] = (cmd << 6) | tag_dev;
+               dsa_header[1] = tag_port << 3;
+               dsa_header[2] = pvid >> 8;
+               dsa_header[3] = pvid & 0xff;
        }
 
        return skb;
@@ -162,20 +198,18 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
 static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
                                  u8 extra)
 {
+       bool trap = false, trunk = false;
        int source_device, source_port;
-       bool trunk = false;
        enum dsa_code code;
        enum dsa_cmd cmd;
        u8 *dsa_header;
 
        /* The ethertype field is part of the DSA header. */
-       dsa_header = skb->data - 2;
+       dsa_header = dsa_etype_header_pos_rx(skb);
 
        cmd = dsa_header[0] >> 6;
        switch (cmd) {
        case DSA_CMD_FORWARD:
-               skb->offload_fwd_mark = 1;
-
                trunk = !!(dsa_header[1] & 7);
                break;
 
@@ -194,7 +228,6 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
                         * device (like a bridge) that forwarding has
                         * already been done by hardware.
                         */
-                       skb->offload_fwd_mark = 1;
                        break;
                case DSA_CODE_MGMT_TRAP:
                case DSA_CODE_IGMP_MLD_TRAP:
@@ -202,6 +235,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
                        /* Traps have, by definition, not been
                         * forwarded by hardware, so don't mark them.
                         */
+                       trap = true;
                        break;
                default:
                        /* Reserved code, this could be anything. Drop
@@ -235,6 +269,15 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
        if (!skb->dev)
                return NULL;
 
+       /* When using LAG offload, skb->dev is not a DSA slave interface,
+        * so we cannot call dsa_default_offload_fwd_mark and we need to
+        * special-case it.
+        */
+       if (trunk)
+               skb->offload_fwd_mark = true;
+       else if (!trap)
+               dsa_default_offload_fwd_mark(skb);
+
        /* If the 'tagged' bit is set; convert the DSA tag to a 802.1Q
         * tag, and delete the ethertype (extra) if applicable. If the
         * 'tagged' bit is cleared; delete the DSA tag, and ethertype
@@ -269,14 +312,10 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
                memcpy(dsa_header, new_header, DSA_HLEN);
 
                if (extra)
-                       memmove(skb->data - ETH_HLEN,
-                               skb->data - ETH_HLEN - extra,
-                               2 * ETH_ALEN);
+                       dsa_strip_etype_header(skb, extra);
        } else {
                skb_pull_rcsum(skb, DSA_HLEN);
-               memmove(skb->data - ETH_HLEN,
-                       skb->data - ETH_HLEN - DSA_HLEN - extra,
-                       2 * ETH_ALEN);
+               dsa_strip_etype_header(skb, DSA_HLEN + extra);
        }
 
        return skb;
@@ -289,8 +328,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
        return dsa_xmit_ll(skb, dev, 0);
 }
 
-static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
-                              struct packet_type *pt)
+static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
                return NULL;
@@ -322,7 +360,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
        if (!skb)
                return NULL;
 
-       edsa_header = skb->data + 2 * ETH_ALEN;
+       edsa_header = dsa_etype_header_pos_tx(skb);
        edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff;
        edsa_header[1] = ETH_P_EDSA & 0xff;
        edsa_header[2] = 0x00;
@@ -330,8 +368,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
-                               struct packet_type *pt)
+static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
                return NULL;
index 5985dab..df71409 100644 (file)
@@ -75,8 +75,7 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
-                                    struct net_device *dev,
-                                    struct packet_type *pt)
+                                    struct net_device *dev)
 {
        int port;
        u8 *gswip_tag;
index 424130f..f64b805 100644 (file)
@@ -29,8 +29,7 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
-                                    struct net_device *dev,
-                                    struct packet_type *pt)
+                                    struct net_device *dev)
 {
        /* Tag decoding */
        u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN;
@@ -44,7 +43,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
 
        pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN);
 
-       skb->offload_fwd_mark = true;
+       dsa_default_offload_fwd_mark(skb);
 
        return skb;
 }
index a201ccf..fa1d60d 100644 (file)
@@ -24,7 +24,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
 
        pskb_trim_rcsum(skb, skb->len - len);
 
-       skb->offload_fwd_mark = true;
+       dsa_default_offload_fwd_mark(skb);
 
        return skb;
 }
@@ -67,8 +67,7 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev,
-                                 struct packet_type *pt)
+static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
 
@@ -134,8 +133,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
        return skb;
 }
 
-static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        /* Tag decoding */
        u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
index 26207ef..cb54818 100644 (file)
@@ -62,9 +62,10 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
        skb_push(skb, LAN9303_TAG_LEN);
 
        /* make room between MACs and Ether-Type */
-       memmove(skb->data, skb->data + LAN9303_TAG_LEN, 2 * ETH_ALEN);
+       dsa_alloc_etype_header(skb, LAN9303_TAG_LEN);
+
+       lan9303_tag = dsa_etype_header_pos_tx(skb);
 
-       lan9303_tag = (__be16 *)(skb->data + 2 * ETH_ALEN);
        tag = lan9303_xmit_use_arl(dp, skb->data) ?
                LAN9303_TAG_TX_USE_ALR :
                dp->index | LAN9303_TAG_TX_STP_OVERRIDE;
@@ -74,8 +75,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        __be16 *lan9303_tag;
        u16 lan9303_tag1;
@@ -87,13 +87,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
                return NULL;
        }
 
-       /* '->data' points into the middle of our special VLAN tag information:
-        *
-        * ~ MAC src   | 0x81 | 0x00 | 0xyy | 0xzz | ether type
-        *                           ^
-        *                        ->data
-        */
-       lan9303_tag = (__be16 *)(skb->data - 2);
+       lan9303_tag = dsa_etype_header_pos_rx(skb);
 
        if (lan9303_tag[0] != htons(ETH_P_8021Q)) {
                dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n");
@@ -113,9 +107,11 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
         * and the current ethertype field.
         */
        skb_pull_rcsum(skb, 2 + 2);
-       memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
-               2 * ETH_ALEN);
-       skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU);
+
+       dsa_strip_etype_header(skb, LAN9303_TAG_LEN);
+
+       if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU))
+               dsa_default_offload_fwd_mark(skb);
 
        return skb;
 }
index cc3ba86..415d8ec 100644 (file)
@@ -41,10 +41,10 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
        default:
                xmit_tpid = MTK_HDR_XMIT_UNTAGGED;
                skb_push(skb, MTK_HDR_LEN);
-               memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+               dsa_alloc_etype_header(skb, MTK_HDR_LEN);
        }
 
-       mtk_tag = skb->data + 2 * ETH_ALEN;
+       mtk_tag = dsa_etype_header_pos_tx(skb);
 
        /* Mark tag attribute on special tag insertion to notify hardware
         * whether that's a combined special tag with 802.1Q header.
@@ -61,8 +61,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
        return skb;
 }
 
-static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        u16 hdr;
        int port;
@@ -71,19 +70,13 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
        if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
                return NULL;
 
-       /* The MTK header is added by the switch between src addr
-        * and ethertype at this point, skb->data points to 2 bytes
-        * after src addr so header should be 2 bytes right before.
-        */
-       phdr = (__be16 *)(skb->data - 2);
+       phdr = dsa_etype_header_pos_rx(skb);
        hdr = ntohs(*phdr);
 
        /* Remove MTK tag and recalculate checksum. */
        skb_pull_rcsum(skb, MTK_HDR_LEN);
 
-       memmove(skb->data - ETH_HLEN,
-               skb->data - ETH_HLEN - MTK_HDR_LEN,
-               2 * ETH_ALEN);
+       dsa_strip_etype_header(skb, MTK_HDR_LEN);
 
        /* Get source port information */
        port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
@@ -92,7 +85,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
        if (!skb->dev)
                return NULL;
 
-       skb->offload_fwd_mark = 1;
+       dsa_default_offload_fwd_mark(skb);
 
        return skb;
 }
index 190f4bf..d37ab98 100644 (file)
@@ -55,8 +55,7 @@ static struct sk_buff *seville_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
-                                 struct net_device *netdev,
-                                 struct packet_type *pt)
+                                 struct net_device *netdev)
 {
        u64 src_port, qos_class;
        u64 vlan_tci, tag_type;
@@ -104,7 +103,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
                 */
                return NULL;
 
-       skb->offload_fwd_mark = 1;
+       dsa_default_offload_fwd_mark(skb);
        skb->priority = qos_class;
 
        /* Ocelot switches copy frames unmodified to the CPU. However, it is
index 85ac85c..3038a25 100644 (file)
@@ -38,18 +38,17 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
-                                 struct net_device *netdev,
-                                 struct packet_type *pt)
+                                 struct net_device *netdev)
 {
-       int src_port, switch_id, subvlan;
+       int src_port, switch_id;
 
-       dsa_8021q_rcv(skb, &src_port, &switch_id, &subvlan);
+       dsa_8021q_rcv(skb, &src_port, &switch_id);
 
        skb->dev = dsa_master_find_slave(netdev, switch_id, src_port);
        if (!skb->dev)
                return NULL;
 
-       skb->offload_fwd_mark = 1;
+       dsa_default_offload_fwd_mark(skb);
 
        return skb;
 }
index 693bda0..1ea9401 100644 (file)
@@ -36,8 +36,8 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 
        skb_push(skb, QCA_HDR_LEN);
 
-       memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN);
-       phdr = (__be16 *)(skb->data + 2 * ETH_ALEN);
+       dsa_alloc_etype_header(skb, QCA_HDR_LEN);
+       phdr = dsa_etype_header_pos_tx(skb);
 
        /* Set the version field, and set destination port information */
        hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
@@ -48,8 +48,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        u8 ver;
        u16  hdr;
@@ -59,11 +58,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
        if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
                return NULL;
 
-       /* The QCA header is added by the switch between src addr and Ethertype
-        * At this point, skb->data points to ethertype so header should be
-        * right before
-        */
-       phdr = (__be16 *)(skb->data - 2);
+       phdr = dsa_etype_header_pos_rx(skb);
        hdr = ntohs(*phdr);
 
        /* Make sure the version is correct */
@@ -73,8 +68,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 
        /* Remove QCA tag and recalculate checksum */
        skb_pull_rcsum(skb, QCA_HDR_LEN);
-       memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
-               ETH_HLEN - QCA_HDR_LEN);
+       dsa_strip_etype_header(skb, QCA_HDR_LEN);
 
        /* Get source port information */
        port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
index 57c46b4..40811ba 100644 (file)
@@ -47,8 +47,8 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
                   dp->index);
        skb_push(skb, RTL4_A_HDR_LEN);
 
-       memmove(skb->data, skb->data + RTL4_A_HDR_LEN, 2 * ETH_ALEN);
-       tag = skb->data + 2 * ETH_ALEN;
+       dsa_alloc_etype_header(skb, RTL4_A_HDR_LEN);
+       tag = dsa_etype_header_pos_tx(skb);
 
        /* Set Ethertype */
        p = (__be16 *)tag;
@@ -64,8 +64,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
 }
 
 static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
-                                    struct net_device *dev,
-                                    struct packet_type *pt)
+                                    struct net_device *dev)
 {
        u16 protport;
        __be16 *p;
@@ -77,12 +76,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
        if (unlikely(!pskb_may_pull(skb, RTL4_A_HDR_LEN)))
                return NULL;
 
-       /* The RTL4 header has its own custom Ethertype 0x8899 and that
-        * starts right at the beginning of the packet, after the src
-        * ethernet addr. Apparently skb->data always points 2 bytes in,
-        * behind the Ethertype.
-        */
-       tag = skb->data - 2;
+       tag = dsa_etype_header_pos_rx(skb);
        p = (__be16 *)tag;
        etype = ntohs(*p);
        if (etype != RTL4_A_ETHERTYPE) {
@@ -109,12 +103,9 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
        /* Remove RTL4 tag and recalculate checksum */
        skb_pull_rcsum(skb, RTL4_A_HDR_LEN);
 
-       /* Move ethernet DA and SA in front of the data */
-       memmove(skb->data - ETH_HLEN,
-               skb->data - ETH_HLEN - RTL4_A_HDR_LEN,
-               2 * ETH_ALEN);
+       dsa_strip_etype_header(skb, RTL4_A_HDR_LEN);
 
-       skb->offload_fwd_mark = 1;
+       dsa_default_offload_fwd_mark(skb);
 
        return skb;
 }
index 9c2df9e..c054f48 100644 (file)
@@ -115,56 +115,117 @@ static inline bool sja1105_is_meta_frame(const struct sk_buff *skb)
        return true;
 }
 
-static bool sja1105_can_use_vlan_as_tags(const struct sk_buff *skb)
+/* Calls sja1105_port_deferred_xmit in sja1105_main.c */
+static struct sk_buff *sja1105_defer_xmit(struct dsa_port *dp,
+                                         struct sk_buff *skb)
 {
-       struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
-       u16 vlan_tci;
-
-       if (hdr->h_vlan_proto == htons(ETH_P_SJA1105))
-               return true;
+       struct sja1105_port *sp = dp->priv;
 
-       if (hdr->h_vlan_proto != htons(ETH_P_8021Q) &&
-           !skb_vlan_tag_present(skb))
-               return false;
+       if (!dsa_port_is_sja1105(dp))
+               return skb;
 
-       if (skb_vlan_tag_present(skb))
-               vlan_tci = skb_vlan_tag_get(skb);
-       else
-               vlan_tci = ntohs(hdr->h_vlan_TCI);
+       /* Increase refcount so the kfree_skb in dsa_slave_xmit
+        * won't really free the packet.
+        */
+       skb_queue_tail(&sp->xmit_queue, skb_get(skb));
+       kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
 
-       return vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK);
+       return NULL;
 }
 
-/* This is the first time the tagger sees the frame on RX.
- * Figure out if we can decode it.
+/* Send VLAN tags with a TPID that blends in with whatever VLAN protocol a
+ * bridge spanning ports of this switch might have.
  */
-static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev)
+static u16 sja1105_xmit_tpid(struct dsa_port *dp)
 {
-       if (sja1105_can_use_vlan_as_tags(skb))
-               return true;
-       if (sja1105_is_link_local(skb))
-               return true;
-       if (sja1105_is_meta_frame(skb))
-               return true;
-       return false;
+       struct dsa_switch *ds = dp->ds;
+       struct dsa_port *other_dp;
+       u16 proto;
+
+       /* Since VLAN awareness is global, then if this port is VLAN-unaware,
+        * all ports are. Use the VLAN-unaware TPID used for tag_8021q.
+        */
+       if (!dsa_port_is_vlan_filtering(dp))
+               return ETH_P_SJA1105;
+
+       /* Port is VLAN-aware, so there is a bridge somewhere (a single one,
+        * we're sure about that). It may not be on this port though, so we
+        * need to find it.
+        */
+       list_for_each_entry(other_dp, &ds->dst->ports, list) {
+               if (other_dp->ds != ds)
+                       continue;
+
+               if (!other_dp->bridge_dev)
+                       continue;
+
+               /* Error is returned only if CONFIG_BRIDGE_VLAN_FILTERING,
+                * which seems pointless to handle, as our port cannot become
+                * VLAN-aware in that case.
+                */
+               br_vlan_get_proto(other_dp->bridge_dev, &proto);
+
+               return proto;
+       }
+
+       WARN_ONCE(1, "Port is VLAN-aware but cannot find associated bridge!\n");
+
+       return ETH_P_SJA1105;
 }
 
-/* Calls sja1105_port_deferred_xmit in sja1105_main.c */
-static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp,
-                                         struct sk_buff *skb)
+static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb,
+                                             struct net_device *netdev)
 {
-       /* Increase refcount so the kfree_skb in dsa_slave_xmit
-        * won't really free the packet.
+       struct dsa_port *dp = dsa_slave_to_port(netdev);
+       struct net_device *br = dp->bridge_dev;
+       u16 tx_vid;
+
+       /* If the port is under a VLAN-aware bridge, just slide the
+        * VLAN-tagged packet into the FDB and hope for the best.
+        * This works because we support a single VLAN-aware bridge
+        * across the entire dst, and its VLANs cannot be shared with
+        * any standalone port.
         */
-       skb_queue_tail(&sp->xmit_queue, skb_get(skb));
-       kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
+       if (br_vlan_enabled(br))
+               return skb;
 
-       return NULL;
+       /* If the port is under a VLAN-unaware bridge, use an imprecise
+        * TX VLAN that targets the bridge's entire broadcast domain,
+        * instead of just the specific port.
+        */
+       tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(dp->bridge_num);
+
+       return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), tx_vid);
 }
 
-static u16 sja1105_xmit_tpid(struct sja1105_port *sp)
+/* Transform untagged control packets into pvid-tagged control packets so that
+ * all packets sent by this tagger are VLAN-tagged and we can configure the
+ * switch to drop untagged packets coming from the DSA master.
+ */
+static struct sk_buff *sja1105_pvid_tag_control_pkt(struct dsa_port *dp,
+                                                   struct sk_buff *skb, u8 pcp)
 {
-       return sp->xmit_tpid;
+       __be16 xmit_tpid = htons(sja1105_xmit_tpid(dp));
+       struct vlan_ethhdr *hdr;
+
+       /* If VLAN tag is in hwaccel area, move it to the payload
+        * to deal with both cases uniformly and to ensure that
+        * the VLANs are added in the right order.
+        */
+       if (unlikely(skb_vlan_tag_present(skb))) {
+               skb = __vlan_hwaccel_push_inside(skb);
+               if (!skb)
+                       return NULL;
+       }
+
+       hdr = (struct vlan_ethhdr *)skb_mac_header(skb);
+
+       /* If skb is already VLAN-tagged, leave that VLAN ID in place */
+       if (hdr->h_vlan_proto == xmit_tpid)
+               return skb;
+
+       return vlan_insert_tag(skb, xmit_tpid, (pcp << VLAN_PRIO_SHIFT) |
+                              SJA1105_DEFAULT_VLAN);
 }
 
 static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
@@ -175,14 +236,22 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
        u16 queue_mapping = skb_get_queue_mapping(skb);
        u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
 
+       if (skb->offload_fwd_mark)
+               return sja1105_imprecise_xmit(skb, netdev);
+
        /* Transmitting management traffic does not rely upon switch tagging,
         * but instead SPI-installed management routes. Part 2 of this
         * is the .port_deferred_xmit driver callback.
         */
-       if (unlikely(sja1105_is_link_local(skb)))
-               return sja1105_defer_xmit(dp->priv, skb);
+       if (unlikely(sja1105_is_link_local(skb))) {
+               skb = sja1105_pvid_tag_control_pkt(dp, skb, pcp);
+               if (!skb)
+                       return NULL;
+
+               return sja1105_defer_xmit(dp, skb);
+       }
 
-       return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv),
+       return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp),
                             ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
 }
 
@@ -194,43 +263,45 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb,
        u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
        u16 queue_mapping = skb_get_queue_mapping(skb);
        u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
-       struct ethhdr *eth_hdr;
        __be32 *tx_trailer;
        __be16 *tx_header;
        int trailer_pos;
 
+       if (skb->offload_fwd_mark)
+               return sja1105_imprecise_xmit(skb, netdev);
+
        /* Transmitting control packets is done using in-band control
         * extensions, while data packets are transmitted using
         * tag_8021q TX VLANs.
         */
        if (likely(!sja1105_is_link_local(skb)))
-               return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv),
+               return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp),
                                     ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
 
+       skb = sja1105_pvid_tag_control_pkt(dp, skb, pcp);
+       if (!skb)
+               return NULL;
+
        skb_push(skb, SJA1110_HEADER_LEN);
 
-       /* Move Ethernet header to the left, making space for DSA tag */
-       memmove(skb->data, skb->data + SJA1110_HEADER_LEN, 2 * ETH_ALEN);
+       dsa_alloc_etype_header(skb, SJA1110_HEADER_LEN);
 
        trailer_pos = skb->len;
 
-       /* On TX, skb->data points to skb_mac_header(skb) */
-       eth_hdr = (struct ethhdr *)skb->data;
-       tx_header = (__be16 *)(eth_hdr + 1);
+       tx_header = dsa_etype_header_pos_tx(skb);
        tx_trailer = skb_put(skb, SJA1110_TX_TRAILER_LEN);
 
-       eth_hdr->h_proto = htons(ETH_P_SJA1110);
-
-       *tx_header = htons(SJA1110_HEADER_HOST_TO_SWITCH |
-                          SJA1110_TX_HEADER_HAS_TRAILER |
-                          SJA1110_TX_HEADER_TRAILER_POS(trailer_pos));
+       tx_header[0] = htons(ETH_P_SJA1110);
+       tx_header[1] = htons(SJA1110_HEADER_HOST_TO_SWITCH |
+                            SJA1110_TX_HEADER_HAS_TRAILER |
+                            SJA1110_TX_HEADER_TRAILER_POS(trailer_pos));
        *tx_trailer = cpu_to_be32(SJA1110_TX_TRAILER_PRIO(pcp) |
                                  SJA1110_TX_TRAILER_SWITCHID(dp->ds->index) |
                                  SJA1110_TX_TRAILER_DESTPORTS(BIT(dp->index)));
        if (clone) {
                u8 ts_id = SJA1105_SKB_CB(clone)->ts_id;
 
-               *tx_header |= htons(SJA1110_TX_HEADER_TAKE_TS);
+               tx_header[1] |= htons(SJA1110_TX_HEADER_TAKE_TS);
                *tx_trailer |= cpu_to_be32(SJA1110_TX_TRAILER_TSTAMP_ID(ts_id));
        }
 
@@ -273,16 +344,16 @@ static struct sk_buff
                                bool is_link_local,
                                bool is_meta)
 {
-       struct sja1105_port *sp;
-       struct dsa_port *dp;
-
-       dp = dsa_slave_to_port(skb->dev);
-       sp = dp->priv;
-
        /* Step 1: A timestampable frame was received.
         * Buffer it until we get its meta frame.
         */
        if (is_link_local) {
+               struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+               struct sja1105_port *sp = dp->priv;
+
+               if (unlikely(!dsa_port_is_sja1105(dp)))
+                       return skb;
+
                if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state))
                        /* Do normal processing. */
                        return skb;
@@ -315,8 +386,13 @@ static struct sk_buff
         * frame, which serves no further purpose).
         */
        } else if (is_meta) {
+               struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+               struct sja1105_port *sp = dp->priv;
                struct sk_buff *stampable_skb;
 
+               if (unlikely(!dsa_port_is_sja1105(dp)))
+                       return skb;
+
                /* Drop the meta frame if we're not in the right state
                 * to process it.
                 */
@@ -358,20 +434,6 @@ static struct sk_buff
        return skb;
 }
 
-static void sja1105_decode_subvlan(struct sk_buff *skb, u16 subvlan)
-{
-       struct dsa_port *dp = dsa_slave_to_port(skb->dev);
-       struct sja1105_port *sp = dp->priv;
-       u16 vid = sp->subvlan_map[subvlan];
-       u16 vlan_tci;
-
-       if (vid == VLAN_N_VID)
-               return;
-
-       vlan_tci = (skb->priority << VLAN_PRIO_SHIFT) | vid;
-       __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
-}
-
 static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb)
 {
        u16 tpid = ntohs(eth_hdr(skb)->h_proto);
@@ -385,25 +447,45 @@ static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb)
        return ntohs(eth_hdr(skb)->h_proto) == ETH_P_SJA1110;
 }
 
+/* If the VLAN in the packet is a tag_8021q one, set @source_port and
+ * @switch_id and strip the header. Otherwise set @vid and keep it in the
+ * packet.
+ */
+static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
+                            int *switch_id, u16 *vid)
+{
+       struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)skb_mac_header(skb);
+       u16 vlan_tci;
+
+       if (skb_vlan_tag_present(skb))
+               vlan_tci = skb_vlan_tag_get(skb);
+       else
+               vlan_tci = ntohs(hdr->h_vlan_TCI);
+
+       if (vid_is_dsa_8021q_rxvlan(vlan_tci & VLAN_VID_MASK))
+               return dsa_8021q_rcv(skb, source_port, switch_id);
+
+       /* Try our best with imprecise RX */
+       *vid = vlan_tci & VLAN_VID_MASK;
+}
+
 static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
-                                  struct net_device *netdev,
-                                  struct packet_type *pt)
+                                  struct net_device *netdev)
 {
-       int source_port, switch_id, subvlan = 0;
+       int source_port = -1, switch_id = -1;
        struct sja1105_meta meta = {0};
        struct ethhdr *hdr;
        bool is_link_local;
        bool is_meta;
+       u16 vid;
 
        hdr = eth_hdr(skb);
        is_link_local = sja1105_is_link_local(skb);
        is_meta = sja1105_is_meta_frame(skb);
 
-       skb->offload_fwd_mark = 1;
-
        if (sja1105_skb_has_tag_8021q(skb)) {
                /* Normal traffic path. */
-               dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan);
+               sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid);
        } else if (is_link_local) {
                /* Management traffic path. Switch embeds the switch ID and
                 * port ID into bytes of the destination MAC, courtesy of
@@ -422,14 +504,17 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
                return NULL;
        }
 
-       skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+       if (source_port == -1 || switch_id == -1)
+               skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
+       else
+               skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
        if (!skb->dev) {
                netdev_warn(netdev, "Couldn't decode source port\n");
                return NULL;
        }
 
-       if (subvlan)
-               sja1105_decode_subvlan(skb, subvlan);
+       if (!is_link_local)
+               dsa_default_offload_fwd_mark(skb);
 
        return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local,
                                              is_meta);
@@ -437,11 +522,11 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 
 static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
 {
+       u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN;
        int switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header);
        int n_ts = SJA1110_RX_HEADER_N_TS(rx_header);
        struct net_device *master = skb->dev;
        struct dsa_port *cpu_dp;
-       u8 *buf = skb->data + 2;
        struct dsa_switch *ds;
        int i;
 
@@ -474,7 +559,8 @@ static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
 
 static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
                                                            int *source_port,
-                                                           int *switch_id)
+                                                           int *switch_id,
+                                                           bool *host_only)
 {
        u16 rx_header;
 
@@ -488,6 +574,9 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
         */
        rx_header = ntohs(*(__be16 *)skb->data);
 
+       if (rx_header & SJA1110_RX_HEADER_HOST_ONLY)
+               *host_only = true;
+
        if (rx_header & SJA1110_RX_HEADER_IS_METADATA)
                return sja1110_rcv_meta(skb, rx_header);
 
@@ -522,9 +611,7 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
        /* Advance skb->data past the DSA header */
        skb_pull_rcsum(skb, SJA1110_HEADER_LEN);
 
-       /* Remove the DSA header */
-       memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - SJA1110_HEADER_LEN,
-               2 * ETH_ALEN);
+       dsa_strip_etype_header(skb, SJA1110_HEADER_LEN);
 
        /* With skb->data in its final place, update the MAC header
         * so that eth_hdr() continues to works properly.
@@ -535,34 +622,35 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
 }
 
 static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
-                                  struct net_device *netdev,
-                                  struct packet_type *pt)
+                                  struct net_device *netdev)
 {
-       int source_port = -1, switch_id = -1, subvlan = 0;
-
-       skb->offload_fwd_mark = 1;
+       int source_port = -1, switch_id = -1;
+       bool host_only = false;
+       u16 vid = 0;
 
        if (sja1110_skb_has_inband_control_extension(skb)) {
                skb = sja1110_rcv_inband_control_extension(skb, &source_port,
-                                                          &switch_id);
+                                                          &switch_id,
+                                                          &host_only);
                if (!skb)
                        return NULL;
        }
 
        /* Packets with in-band control extensions might still have RX VLANs */
        if (likely(sja1105_skb_has_tag_8021q(skb)))
-               dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan);
+               sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid);
 
-       skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+       if (source_port == -1 || switch_id == -1)
+               skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
+       else
+               skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
        if (!skb->dev) {
-               netdev_warn(netdev,
-                           "Couldn't decode source port %d and switch id %d\n",
-                           source_port, switch_id);
+               netdev_warn(netdev, "Couldn't decode source port\n");
                return NULL;
        }
 
-       if (subvlan)
-               sja1105_decode_subvlan(skb, subvlan);
+       if (!host_only)
+               dsa_default_offload_fwd_mark(skb);
 
        return skb;
 }
@@ -596,7 +684,6 @@ static const struct dsa_device_ops sja1105_netdev_ops = {
        .proto = DSA_TAG_PROTO_SJA1105,
        .xmit = sja1105_xmit,
        .rcv = sja1105_rcv,
-       .filter = sja1105_filter,
        .needed_headroom = VLAN_HLEN,
        .flow_dissect = sja1105_flow_dissect,
        .promisc_on_master = true,
@@ -610,7 +697,6 @@ static const struct dsa_device_ops sja1110_netdev_ops = {
        .proto = DSA_TAG_PROTO_SJA1110,
        .xmit = sja1110_xmit,
        .rcv = sja1110_rcv,
-       .filter = sja1105_filter,
        .flow_dissect = sja1110_flow_dissect,
        .needed_headroom = SJA1110_HEADER_LEN + VLAN_HLEN,
        .needed_tailroom = SJA1110_RX_TRAILER_LEN + SJA1110_MAX_PADDING_LEN,
index ba73804..5749ba8 100644 (file)
@@ -24,8 +24,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        u8 *trailer;
        int source_port;
index a31ff7f..ff442b8 100644 (file)
@@ -25,8 +25,7 @@ static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev)
        return skb;
 }
 
-static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev)
 {
        int source_port;
        u8 *trailer;
@@ -46,7 +45,7 @@ static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev,
                return NULL;
 
        /* Frame is forwarded by hardware, don't forward in software. */
-       skb->offload_fwd_mark = 1;
+       dsa_default_offload_fwd_mark(skb);
 
        return skb;
 }
index 9cce612..73fce94 100644 (file)
@@ -62,8 +62,6 @@
 #include <linux/uaccess.h>
 #include <net/pkt_sched.h>
 
-__setup("ether=", netdev_boot_setup);
-
 /**
  * eth_header - create the Ethernet header
  * @skb:       buffer to alter
@@ -182,12 +180,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
         * at all, so we check here whether one of those tagging
         * variants has been configured on the receiving interface,
         * and if so, set skb->protocol without looking at the packet.
-        * The DSA tagging protocol may be able to decode some but not all
-        * traffic (for example only for management). In that case give it the
-        * option to filter the packets from which it can decode source port
-        * information.
         */
-       if (unlikely(netdev_uses_dsa(dev)) && dsa_can_decode(skb, dev))
+       if (unlikely(netdev_uses_dsa(dev)))
                return htons(ETH_P_XDSA);
 
        if (likely(eth_proto_is_802_3(eth->h_proto)))
index 1d6bc13..46776ea 100644 (file)
@@ -10,6 +10,7 @@ struct coalesce_req_info {
 struct coalesce_reply_data {
        struct ethnl_reply_data         base;
        struct ethtool_coalesce         coalesce;
+       struct kernel_ethtool_coalesce  kernel_coalesce;
        u32                             supported_params;
 };
 
@@ -61,6 +62,7 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
                                 struct genl_info *info)
 {
        struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
+       struct netlink_ext_ack *extack = info ? info->extack : NULL;
        struct net_device *dev = reply_base->dev;
        int ret;
 
@@ -70,7 +72,8 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
        ret = ethnl_ops_begin(dev);
        if (ret < 0)
                return ret;
-       ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce);
+       ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce,
+                                            &data->kernel_coalesce, extack);
        ethnl_ops_complete(dev);
 
        return ret;
@@ -100,7 +103,9 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base,
               nla_total_size(sizeof(u32)) +    /* _RX_MAX_FRAMES_HIGH */
               nla_total_size(sizeof(u32)) +    /* _TX_USECS_HIGH */
               nla_total_size(sizeof(u32)) +    /* _TX_MAX_FRAMES_HIGH */
-              nla_total_size(sizeof(u32));     /* _RATE_SAMPLE_INTERVAL */
+              nla_total_size(sizeof(u32)) +    /* _RATE_SAMPLE_INTERVAL */
+              nla_total_size(sizeof(u8)) +     /* _USE_CQE_MODE_TX */
+              nla_total_size(sizeof(u8));      /* _USE_CQE_MODE_RX */
 }
 
 static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val,
@@ -124,6 +129,7 @@ static int coalesce_fill_reply(struct sk_buff *skb,
                               const struct ethnl_reply_data *reply_base)
 {
        const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
+       const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce;
        const struct ethtool_coalesce *coal = &data->coalesce;
        u32 supported = data->supported_params;
 
@@ -170,7 +176,11 @@ static int coalesce_fill_reply(struct sk_buff *skb,
            coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH,
                             coal->tx_max_coalesced_frames_high, supported) ||
            coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL,
-                            coal->rate_sample_interval, supported))
+                            coal->rate_sample_interval, supported) ||
+           coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_TX,
+                             kcoal->use_cqe_mode_tx, supported) ||
+           coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_RX,
+                             kcoal->use_cqe_mode_rx, supported))
                return -EMSGSIZE;
 
        return 0;
@@ -215,10 +225,13 @@ const struct nla_policy ethnl_coalesce_set_policy[] = {
        [ETHTOOL_A_COALESCE_TX_USECS_HIGH]      = { .type = NLA_U32 },
        [ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH] = { .type = NLA_U32 },
        [ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL] = { .type = NLA_U32 },
+       [ETHTOOL_A_COALESCE_USE_CQE_MODE_TX]    = NLA_POLICY_MAX(NLA_U8, 1),
+       [ETHTOOL_A_COALESCE_USE_CQE_MODE_RX]    = NLA_POLICY_MAX(NLA_U8, 1),
 };
 
 int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
 {
+       struct kernel_ethtool_coalesce kernel_coalesce = {};
        struct ethtool_coalesce coalesce = {};
        struct ethnl_req_info req_info = {};
        struct nlattr **tb = info->attrs;
@@ -255,7 +268,8 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
        ret = ethnl_ops_begin(dev);
        if (ret < 0)
                goto out_rtnl;
-       ret = ops->get_coalesce(dev, &coalesce);
+       ret = ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+                               info->extack);
        if (ret < 0)
                goto out_ops;
 
@@ -303,11 +317,16 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
                         tb[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH], &mod);
        ethnl_update_u32(&coalesce.rate_sample_interval,
                         tb[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL], &mod);
+       ethnl_update_u8(&kernel_coalesce.use_cqe_mode_tx,
+                       tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX], &mod);
+       ethnl_update_u8(&kernel_coalesce.use_cqe_mode_rx,
+                       tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX], &mod);
        ret = 0;
        if (!mod)
                goto out_ops;
 
-       ret = dev->ethtool_ops->set_coalesce(dev, &coalesce);
+       ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce,
+                                            info->extack);
        if (ret < 0)
                goto out_ops;
        ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL);
index baa5d10..f2abc31 100644 (file)
@@ -7,6 +7,7 @@
  * the information ethtool needs.
  */
 
+#include <linux/compat.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/capability.h>
@@ -23,6 +24,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/sched/signal.h>
 #include <linux/net.h>
+#include <linux/pm_runtime.h>
 #include <net/devlink.h>
 #include <net/xdp_sock_drv.h>
 #include <net/flow_offload.h>
@@ -807,6 +809,120 @@ out:
        return ret;
 }
 
+static noinline_for_stack int
+ethtool_rxnfc_copy_from_compat(struct ethtool_rxnfc *rxnfc,
+                              const struct compat_ethtool_rxnfc __user *useraddr,
+                              size_t size)
+{
+       struct compat_ethtool_rxnfc crxnfc = {};
+
+       /* We expect there to be holes between fs.m_ext and
+        * fs.ring_cookie and at the end of fs, but nowhere else.
+        * On non-x86, no conversion should be needed.
+        */
+       BUILD_BUG_ON(!IS_ENABLED(CONFIG_X86_64) &&
+                    sizeof(struct compat_ethtool_rxnfc) !=
+                    sizeof(struct ethtool_rxnfc));
+       BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
+                    sizeof(useraddr->fs.m_ext) !=
+                    offsetof(struct ethtool_rxnfc, fs.m_ext) +
+                    sizeof(rxnfc->fs.m_ext));
+       BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.location) -
+                    offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
+                    offsetof(struct ethtool_rxnfc, fs.location) -
+                    offsetof(struct ethtool_rxnfc, fs.ring_cookie));
+
+       if (copy_from_user(&crxnfc, useraddr, min(size, sizeof(crxnfc))))
+               return -EFAULT;
+
+       *rxnfc = (struct ethtool_rxnfc) {
+               .cmd            = crxnfc.cmd,
+               .flow_type      = crxnfc.flow_type,
+               .data           = crxnfc.data,
+               .fs             = {
+                       .flow_type      = crxnfc.fs.flow_type,
+                       .h_u            = crxnfc.fs.h_u,
+                       .h_ext          = crxnfc.fs.h_ext,
+                       .m_u            = crxnfc.fs.m_u,
+                       .m_ext          = crxnfc.fs.m_ext,
+                       .ring_cookie    = crxnfc.fs.ring_cookie,
+                       .location       = crxnfc.fs.location,
+               },
+               .rule_cnt       = crxnfc.rule_cnt,
+       };
+
+       return 0;
+}
+
+static int ethtool_rxnfc_copy_from_user(struct ethtool_rxnfc *rxnfc,
+                                       const void __user *useraddr,
+                                       size_t size)
+{
+       if (compat_need_64bit_alignment_fixup())
+               return ethtool_rxnfc_copy_from_compat(rxnfc, useraddr, size);
+
+       if (copy_from_user(rxnfc, useraddr, size))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int ethtool_rxnfc_copy_to_compat(void __user *useraddr,
+                                       const struct ethtool_rxnfc *rxnfc,
+                                       size_t size, const u32 *rule_buf)
+{
+       struct compat_ethtool_rxnfc crxnfc;
+
+       memset(&crxnfc, 0, sizeof(crxnfc));
+       crxnfc = (struct compat_ethtool_rxnfc) {
+               .cmd            = rxnfc->cmd,
+               .flow_type      = rxnfc->flow_type,
+               .data           = rxnfc->data,
+               .fs             = {
+                       .flow_type      = rxnfc->fs.flow_type,
+                       .h_u            = rxnfc->fs.h_u,
+                       .h_ext          = rxnfc->fs.h_ext,
+                       .m_u            = rxnfc->fs.m_u,
+                       .m_ext          = rxnfc->fs.m_ext,
+                       .ring_cookie    = rxnfc->fs.ring_cookie,
+                       .location       = rxnfc->fs.location,
+               },
+               .rule_cnt       = rxnfc->rule_cnt,
+       };
+
+       if (copy_to_user(useraddr, &crxnfc, min(size, sizeof(crxnfc))))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int ethtool_rxnfc_copy_to_user(void __user *useraddr,
+                                     const struct ethtool_rxnfc *rxnfc,
+                                     size_t size, const u32 *rule_buf)
+{
+       int ret;
+
+       if (compat_need_64bit_alignment_fixup()) {
+               ret = ethtool_rxnfc_copy_to_compat(useraddr, rxnfc, size,
+                                                  rule_buf);
+               useraddr += offsetof(struct compat_ethtool_rxnfc, rule_locs);
+       } else {
+               ret = copy_to_user(useraddr, rxnfc, size);
+               useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
+       }
+
+       if (ret)
+               return -EFAULT;
+
+       if (rule_buf) {
+               if (copy_to_user(useraddr, rule_buf,
+                                rxnfc->rule_cnt * sizeof(u32)))
+                       return -EFAULT;
+       }
+
+       return 0;
+}
+
 static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
                                                u32 cmd, void __user *useraddr)
 {
@@ -825,7 +941,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
                info_size = (offsetof(struct ethtool_rxnfc, data) +
                             sizeof(info.data));
 
-       if (copy_from_user(&info, useraddr, info_size))
+       if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
                return -EFAULT;
 
        rc = dev->ethtool_ops->set_rxnfc(dev, &info);
@@ -833,7 +949,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
                return rc;
 
        if (cmd == ETHTOOL_SRXCLSRLINS &&
-           copy_to_user(useraddr, &info, info_size))
+           ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, NULL))
                return -EFAULT;
 
        return 0;
@@ -859,7 +975,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
                info_size = (offsetof(struct ethtool_rxnfc, data) +
                             sizeof(info.data));
 
-       if (copy_from_user(&info, useraddr, info_size))
+       if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
                return -EFAULT;
 
        /* If FLOW_RSS was requested then user-space must be using the
@@ -867,7 +983,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
         */
        if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
                info_size = sizeof(info);
-               if (copy_from_user(&info, useraddr, info_size))
+               if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
                        return -EFAULT;
                /* Since malicious users may modify the original data,
                 * we need to check whether FLOW_RSS is still requested.
@@ -893,18 +1009,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
        if (ret < 0)
                goto err_out;
 
-       ret = -EFAULT;
-       if (copy_to_user(useraddr, &info, info_size))
-               goto err_out;
-
-       if (rule_buf) {
-               useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
-               if (copy_to_user(useraddr, rule_buf,
-                                info.rule_cnt * sizeof(u32)))
-                       goto err_out;
-       }
-       ret = 0;
-
+       ret = ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, rule_buf);
 err_out:
        kfree(rule_buf);
 
@@ -1514,12 +1619,14 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
                                                   void __user *useraddr)
 {
        struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
+       struct kernel_ethtool_coalesce kernel_coalesce = {};
        int ret;
 
        if (!dev->ethtool_ops->get_coalesce)
                return -EOPNOTSUPP;
 
-       ret = dev->ethtool_ops->get_coalesce(dev, &coalesce);
+       ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+                                            NULL);
        if (ret)
                return ret;
 
@@ -1586,19 +1693,26 @@ ethtool_set_coalesce_supported(struct net_device *dev,
 static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
                                                   void __user *useraddr)
 {
+       struct kernel_ethtool_coalesce kernel_coalesce = {};
        struct ethtool_coalesce coalesce;
        int ret;
 
-       if (!dev->ethtool_ops->set_coalesce)
+       if (!dev->ethtool_ops->set_coalesce && !dev->ethtool_ops->get_coalesce)
                return -EOPNOTSUPP;
 
+       ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+                                            NULL);
+       if (ret)
+               return ret;
+
        if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
                return -EFAULT;
 
        if (!ethtool_set_coalesce_supported(dev, &coalesce))
                return -EOPNOTSUPP;
 
-       ret = dev->ethtool_ops->set_coalesce(dev, &coalesce);
+       ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce,
+                                            NULL);
        if (!ret)
                ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL);
        return ret;
@@ -2581,15 +2695,14 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
 
 /* The main entry point in this file.  Called from net/core/dev_ioctl.c */
 
-int dev_ethtool(struct net *net, struct ifreq *ifr)
+int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
 {
        struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
-       void __user *useraddr = ifr->ifr_data;
        u32 ethcmd, sub_cmd;
        int rc;
        netdev_features_t old_features;
 
-       if (!dev || !netif_device_present(dev))
+       if (!dev)
                return -ENODEV;
 
        if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
@@ -2645,10 +2758,18 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
                        return -EPERM;
        }
 
+       if (dev->dev.parent)
+               pm_runtime_get_sync(dev->dev.parent);
+
+       if (!netif_device_present(dev)) {
+               rc = -ENODEV;
+               goto out;
+       }
+
        if (dev->ethtool_ops->begin) {
                rc = dev->ethtool_ops->begin(dev);
-               if (rc  < 0)
-                       return rc;
+               if (rc < 0)
+                       goto out;
        }
        old_features = dev->features;
 
@@ -2867,6 +2988,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 
        if (old_features != dev->features)
                netdev_features_change(dev);
+out:
+       if (dev->dev.parent)
+               pm_runtime_put(dev->dev.parent);
 
        return rc;
 }
index 73e0f5b..1797a0a 100644 (file)
@@ -2,6 +2,7 @@
 
 #include <net/sock.h>
 #include <linux/ethtool_netlink.h>
+#include <linux/pm_runtime.h>
 #include "netlink.h"
 
 static struct genl_family ethtool_genl_family;
@@ -29,6 +30,44 @@ const struct nla_policy ethnl_header_policy_stats[] = {
                                                          ETHTOOL_FLAGS_STATS),
 };
 
+int ethnl_ops_begin(struct net_device *dev)
+{
+       int ret;
+
+       if (!dev)
+               return -ENODEV;
+
+       if (dev->dev.parent)
+               pm_runtime_get_sync(dev->dev.parent);
+
+       if (!netif_device_present(dev)) {
+               ret = -ENODEV;
+               goto err;
+       }
+
+       if (dev->ethtool_ops->begin) {
+               ret = dev->ethtool_ops->begin(dev);
+               if (ret)
+                       goto err;
+       }
+
+       return 0;
+err:
+       if (dev->dev.parent)
+               pm_runtime_put(dev->dev.parent);
+
+       return ret;
+}
+
+void ethnl_ops_complete(struct net_device *dev)
+{
+       if (dev->ethtool_ops->complete)
+               dev->ethtool_ops->complete(dev);
+
+       if (dev->dev.parent)
+               pm_runtime_put(dev->dev.parent);
+}
+
 /**
  * ethnl_parse_header_dev_get() - parse request header
  * @req_info:    structure to put results into
@@ -101,12 +140,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
                return -EINVAL;
        }
 
-       if (dev && !netif_device_present(dev)) {
-               dev_put(dev);
-               NL_SET_ERR_MSG(extack, "device not present");
-               return -ENODEV;
-       }
-
        req_info->dev = dev;
        req_info->flags = flags;
        return 0;
@@ -365,8 +398,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
                ops->cleanup_data(reply_data);
 
        genlmsg_end(rskb, reply_payload);
-       if (req_info->dev)
-               dev_put(req_info->dev);
+       dev_put(req_info->dev);
        kfree(reply_data);
        kfree(req_info);
        return genlmsg_reply(rskb, info);
@@ -378,8 +410,7 @@ err_cleanup:
        if (ops->cleanup_data)
                ops->cleanup_data(reply_data);
 err_dev:
-       if (req_info->dev)
-               dev_put(req_info->dev);
+       dev_put(req_info->dev);
        kfree(reply_data);
        kfree(req_info);
        return ret;
index 3fc395c..e8987e2 100644 (file)
@@ -247,19 +247,8 @@ struct ethnl_reply_data {
        struct net_device               *dev;
 };
 
-static inline int ethnl_ops_begin(struct net_device *dev)
-{
-       if (dev && dev->ethtool_ops->begin)
-               return dev->ethtool_ops->begin(dev);
-       else
-               return 0;
-}
-
-static inline void ethnl_ops_complete(struct net_device *dev)
-{
-       if (dev && dev->ethtool_ops->complete)
-               dev->ethtool_ops->complete(dev);
-}
+int ethnl_ops_begin(struct net_device *dev);
+void ethnl_ops_complete(struct net_device *dev);
 
 /**
  * struct ethnl_request_ops - unified handling of GET requests
@@ -370,7 +359,7 @@ extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX + 1];
 extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1];
 extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
 extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
-extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL + 1];
+extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_MAX + 1];
 extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_HEADER + 1];
 extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1];
 extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1];
index 88215b5..dd5a45f 100644 (file)
@@ -340,8 +340,7 @@ nla_put_failure:
 out_dev:
        wpan_phy_put(phy);
 out:
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
 
        return rc;
 }
index 0cf2374..277124f 100644 (file)
@@ -2226,8 +2226,7 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
                if (ops->internal_flags & NL802154_FLAG_NEED_WPAN_DEV) {
                        struct wpan_dev *wpan_dev = info->user_ptr[1];
 
-                       if (wpan_dev->netdev)
-                               dev_put(wpan_dev->netdev);
+                       dev_put(wpan_dev->netdev);
                } else {
                        dev_put(info->user_ptr[1]);
                }
index c25f761..7bb9ef3 100644 (file)
@@ -41,8 +41,7 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
                ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr);
                rcu_read_lock();
                dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr);
-               if (dev)
-                       dev_hold(dev);
+               dev_hold(dev);
                rcu_read_unlock();
                break;
        case IEEE802154_ADDR_SHORT:
@@ -129,7 +128,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
        int ret = -ENOIOCTLCMD;
        struct net_device *dev;
 
-       if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+       if (get_user_ifreq(&ifr, NULL, arg))
                return -EFAULT;
 
        ifr.ifr_name[IFNAMSIZ-1] = 0;
@@ -143,7 +142,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
        if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl)
                ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd);
 
-       if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+       if (!ret && put_user_ifreq(&ifr, arg))
                ret = -EFAULT;
        dev_put(dev);
 
index 5464818..1d816a5 100644 (file)
@@ -452,7 +452,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         * changes context in a wrong way it will be caught.
         */
        err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
-                                                BPF_CGROUP_INET4_BIND, &flags);
+                                                CGROUP_INET4_BIND, &flags);
        if (err)
                return err;
 
@@ -781,7 +781,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
                sin->sin_port = inet->inet_dport;
                sin->sin_addr.s_addr = inet->inet_daddr;
                BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-                                           BPF_CGROUP_INET4_GETPEERNAME,
+                                           CGROUP_INET4_GETPEERNAME,
                                            NULL);
        } else {
                __be32 addr = inet->inet_rcv_saddr;
@@ -790,7 +790,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
                sin->sin_port = inet->inet_sport;
                sin->sin_addr.s_addr = addr;
                BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-                                           BPF_CGROUP_INET4_GETSOCKNAME,
+                                           CGROUP_INET4_GETSOCKNAME,
                                            NULL);
        }
        memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
@@ -953,10 +953,10 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
        case SIOCGIFNETMASK:
        case SIOCGIFDSTADDR:
        case SIOCGIFPFLAGS:
-               if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+               if (get_user_ifreq(&ifr, NULL, p))
                        return -EFAULT;
                err = devinet_ioctl(net, cmd, &ifr);
-               if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq)))
+               if (!err && put_user_ifreq(&ifr, p))
                        err = -EFAULT;
                break;
 
@@ -966,7 +966,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
        case SIOCSIFDSTADDR:
        case SIOCSIFPFLAGS:
        case SIOCSIFFLAGS:
-               if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+               if (get_user_ifreq(&ifr, NULL, p))
                        return -EFAULT;
                err = devinet_ioctl(net, cmd, &ifr);
                break;
index 9e41eff..0dcee9d 100644 (file)
@@ -10,6 +10,9 @@
 #include <net/tcp.h>
 #include <net/bpf_sk_storage.h>
 
+/* "extern" is to avoid sparse warning.  It is only used in bpf_struct_ops.c. */
+extern struct bpf_struct_ops bpf_tcp_congestion_ops;
+
 static u32 optional_ops[] = {
        offsetof(struct tcp_congestion_ops, init),
        offsetof(struct tcp_congestion_ops, release),
@@ -163,6 +166,19 @@ static const struct bpf_func_proto bpf_tcp_send_ack_proto = {
        .arg2_type      = ARG_ANYTHING,
 };
 
+static u32 prog_ops_moff(const struct bpf_prog *prog)
+{
+       const struct btf_member *m;
+       const struct btf_type *t;
+       u32 midx;
+
+       midx = prog->expected_attach_type;
+       t = bpf_tcp_congestion_ops.type;
+       m = &btf_type_member(t)[midx];
+
+       return btf_member_bit_offset(t, m) / 8;
+}
+
 static const struct bpf_func_proto *
 bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
                          const struct bpf_prog *prog)
@@ -174,6 +190,28 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
                return &bpf_sk_storage_get_proto;
        case BPF_FUNC_sk_storage_delete:
                return &bpf_sk_storage_delete_proto;
+       case BPF_FUNC_setsockopt:
+               /* Does not allow release() to call setsockopt.
+                * release() is called when the current bpf-tcp-cc
+                * is retiring.  It is not allowed to call
+                * setsockopt() to make further changes which
+                * may potentially allocate new resources.
+                */
+               if (prog_ops_moff(prog) !=
+                   offsetof(struct tcp_congestion_ops, release))
+                       return &bpf_sk_setsockopt_proto;
+               return NULL;
+       case BPF_FUNC_getsockopt:
+               /* Since get/setsockopt is usually expected to
+                * be available together, disable getsockopt for
+                * release also to avoid usage surprise.
+                * The bpf-tcp-cc already has a more powerful way
+                * to read tcp_sock from the PTR_TO_BTF_ID.
+                */
+               if (prog_ops_moff(prog) !=
+                   offsetof(struct tcp_congestion_ops, release))
+                       return &bpf_sk_getsockopt_proto;
+               return NULL;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -286,9 +324,6 @@ static void bpf_tcp_ca_unreg(void *kdata)
        tcp_unregister_congestion_control(kdata);
 }
 
-/* Avoid sparse warning.  It is only used in bpf_struct_ops.c. */
-extern struct bpf_struct_ops bpf_tcp_congestion_ops;
-
 struct bpf_struct_ops bpf_tcp_congestion_ops = {
        .verifier_ops = &bpf_tcp_ca_verifier_ops,
        .reg = bpf_tcp_ca_reg,
index 73721a4..f446898 100644 (file)
@@ -215,7 +215,7 @@ static void devinet_sysctl_unregister(struct in_device *idev)
 
 static struct in_ifaddr *inet_alloc_ifa(void)
 {
-       return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
+       return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
 }
 
 static void inet_rcu_free_ifa(struct rcu_head *head)
@@ -1243,7 +1243,7 @@ out:
        return ret;
 }
 
-static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
+int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
 {
        struct in_device *in_dev = __in_dev_get_rtnl(dev);
        const struct in_ifaddr *ifa;
@@ -1950,7 +1950,8 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
 };
 
 static int inet_validate_link_af(const struct net_device *dev,
-                                const struct nlattr *nla)
+                                const struct nlattr *nla,
+                                struct netlink_ext_ack *extack)
 {
        struct nlattr *a, *tb[IFLA_INET_MAX+1];
        int err, rem;
@@ -1959,7 +1960,7 @@ static int inet_validate_link_af(const struct net_device *dev,
                return -EAFNOSUPPORT;
 
        err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
-                                         inet_af_policy, NULL);
+                                         inet_af_policy, extack);
        if (err < 0)
                return err;
 
@@ -2424,11 +2425,15 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
        int *valp = ctl->data;
        int val = *valp;
        loff_t pos = *ppos;
-       int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+       struct net *net = ctl->extra2;
+       int ret;
 
-       if (write && *valp != val) {
-               struct net *net = ctl->extra2;
+       if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+               return -EPERM;
+
+       ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
+       if (write && *valp != val) {
                if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
                        if (!rtnl_trylock()) {
                                /* Restore the original values before restarting */
@@ -2762,8 +2767,6 @@ void __init devinet_init(void)
                INIT_HLIST_HEAD(&inet_addr_lst[i]);
 
        register_pernet_subsys(&devinet_ops);
-
-       register_gifconf(PF_INET, inet_gifconf);
        register_netdevice_notifier(&ip_netdev_notifier);
 
        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
index a09e36c..851f542 100644 (file)
@@ -97,7 +97,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
 
 static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
 {
-       struct esp_output_extra *extra = esp_tmp_extra(tmp);
        struct crypto_aead *aead = x->data;
        int extralen = 0;
        u8 *iv;
@@ -105,9 +104,8 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
        struct scatterlist *sg;
 
        if (x->props.flags & XFRM_STATE_ESN)
-               extralen += sizeof(*extra);
+               extralen += sizeof(struct esp_output_extra);
 
-       extra = esp_tmp_extra(tmp);
        iv = esp_tmp_iv(aead, tmp, extralen);
        req = esp_tmp_req(aead, iv);
 
index 4c0c33e..b42c429 100644 (file)
@@ -208,9 +208,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
 
 void fib_nh_common_release(struct fib_nh_common *nhc)
 {
-       if (nhc->nhc_dev)
-               dev_put(nhc->nhc_dev);
-
+       dev_put(nhc->nhc_dev);
        lwtstate_put(nhc->nhc_lwtstate);
        rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
        rt_fibinfo_free(&nhc->nhc_rth_input);
@@ -260,7 +258,7 @@ EXPORT_SYMBOL_GPL(free_fib_info);
 void fib_release_info(struct fib_info *fi)
 {
        spin_lock_bh(&fib_info_lock);
-       if (fi && --fi->fib_treeref == 0) {
+       if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
                hlist_del(&fi->fib_hash);
                if (fi->fib_prefsrc)
                        hlist_del(&fi->fib_lhash);
@@ -1373,7 +1371,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
                if (!cfg->fc_mx) {
                        fi = fib_find_info_nh(net, cfg);
                        if (fi) {
-                               fi->fib_treeref++;
+                               refcount_inc(&fi->fib_treeref);
                                return fi;
                        }
                }
@@ -1547,11 +1545,11 @@ link_it:
        if (ofi) {
                fi->fib_dead = 1;
                free_fib_info(fi);
-               ofi->fib_treeref++;
+               refcount_inc(&ofi->fib_treeref);
                return ofi;
        }
 
-       fi->fib_treeref++;
+       refcount_set(&fi->fib_treeref, 1);
        refcount_set(&fi->fib_clntref, 1);
        spin_lock_bh(&fib_info_lock);
        hlist_add_head(&fi->fib_hash,
index 25cf387..8060524 100644 (file)
@@ -2380,11 +2380,11 @@ void __init fib_trie_init(void)
 {
        fn_alias_kmem = kmem_cache_create("ip_fib_alias",
                                          sizeof(struct fib_alias),
-                                         0, SLAB_PANIC, NULL);
+                                         0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
 
        trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
                                           LEAF_SIZE,
-                                          0, SLAB_PANIC, NULL);
+                                          0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
 }
 
 struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
index e5f69b0..8fcbc62 100644 (file)
@@ -230,8 +230,8 @@ static struct sk_buff *fou_gro_receive(struct sock *sk,
                                       struct list_head *head,
                                       struct sk_buff *skb)
 {
+       const struct net_offload __rcu **offloads;
        u8 proto = fou_from_sock(sk)->protocol;
-       const struct net_offload **offloads;
        const struct net_offload *ops;
        struct sk_buff *pp = NULL;
 
@@ -263,10 +263,10 @@ out_unlock:
 static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
                            int nhoff)
 {
-       const struct net_offload *ops;
+       const struct net_offload __rcu **offloads;
        u8 proto = fou_from_sock(sk)->protocol;
+       const struct net_offload *ops;
        int err = -ENOSYS;
-       const struct net_offload **offloads;
 
        rcu_read_lock();
        offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
@@ -311,7 +311,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
                                       struct list_head *head,
                                       struct sk_buff *skb)
 {
-       const struct net_offload **offloads;
+       const struct net_offload __rcu **offloads;
        const struct net_offload *ops;
        struct sk_buff *pp = NULL;
        struct sk_buff *p;
@@ -457,8 +457,8 @@ out:
 
 static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
 {
-       const struct net_offload **offloads;
        struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
+       const struct net_offload __rcu **offloads;
        const struct net_offload *ops;
        unsigned int guehlen = 0;
        u8 proto;
index c695d29..8b30cad 100644 (file)
@@ -1095,8 +1095,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
                                         sizeof(struct in6_addr))
                                goto send_mal_query;
                        dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
-                       if (dev)
-                               dev_hold(dev);
+                       dev_hold(dev);
                        break;
 #endif
                default:
index 00576ba..d2e2b3d 100644 (file)
@@ -2240,7 +2240,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
                        iml->sfmode, psf->sl_count, psf->sl_addr, 0);
        RCU_INIT_POINTER(iml->sflist, NULL);
        /* decrease mem now to avoid the memleak warning */
-       atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
+       atomic_sub(struct_size(psf, sl_addr, psf->sl_max), &sk->sk_omem_alloc);
        kfree_rcu(psf, rcu);
        return err;
 }
@@ -2389,7 +2389,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 
                if (psl)
                        count += psl->sl_max;
-               newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+                                     GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
@@ -2400,7 +2401,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
                        for (i = 0; i < psl->sl_count; i++)
                                newpsl->sl_addr[i] = psl->sl_addr[i];
                        /* decrease mem now to avoid the memleak warning */
-                       atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+                       atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                                  &sk->sk_omem_alloc);
                        kfree_rcu(psl, rcu);
                }
                rcu_assign_pointer(pmc->sflist, newpsl);
@@ -2475,19 +2477,22 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
                goto done;
        }
        if (msf->imsf_numsrc) {
-               newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc),
-                                                          GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+                                                     msf->imsf_numsrc),
+                                     GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
                }
                newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc;
-               memcpy(newpsl->sl_addr, msf->imsf_slist,
-                       msf->imsf_numsrc * sizeof(msf->imsf_slist[0]));
+               memcpy(newpsl->sl_addr, msf->imsf_slist_flex,
+                      flex_array_size(msf, imsf_slist_flex, msf->imsf_numsrc));
                err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
                        msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0);
                if (err) {
-                       sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max));
+                       sock_kfree_s(sk, newpsl,
+                                    struct_size(newpsl, sl_addr,
+                                                newpsl->sl_max));
                        goto done;
                }
        } else {
@@ -2500,7 +2505,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
                (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
                        psl->sl_count, psl->sl_addr, 0);
                /* decrease mem now to avoid the memleak warning */
-               atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+               atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                          &sk->sk_omem_alloc);
                kfree_rcu(psl, rcu);
        } else
                (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
@@ -2558,14 +2564,14 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
                count = psl->sl_count;
        }
        copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc;
-       len = copycount * sizeof(psl->sl_addr[0]);
+       len = flex_array_size(psl, sl_addr, copycount);
        msf->imsf_numsrc = count;
        if (put_user(IP_MSFILTER_SIZE(copycount), optlen) ||
            copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) {
                return -EFAULT;
        }
        if (len &&
-           copy_to_user(&optval->imsf_slist[0], psl->sl_addr, len))
+           copy_to_user(&optval->imsf_slist_flex[0], psl->sl_addr, len))
                return -EFAULT;
        return 0;
 done:
@@ -2720,6 +2726,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
                rv = 1;
        } else if (im) {
                if (src_addr) {
+                       spin_lock_bh(&im->lock);
                        for (psf = im->sources; psf; psf = psf->sf_next) {
                                if (psf->sf_inaddr == src_addr)
                                        break;
@@ -2730,6 +2737,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
                                        im->sfcount[MCAST_EXCLUDE];
                        else
                                rv = im->sfcount[MCAST_EXCLUDE] != 0;
+                       spin_unlock_bh(&im->lock);
                } else
                        rv = 1; /* unspecified source; tentatively allow */
        }
index 754013f..f25d02a 100644 (file)
@@ -534,7 +534,8 @@ out:
                                   atomic_read(&newsk->sk_rmem_alloc));
                mem_cgroup_sk_alloc(newsk);
                if (newsk->sk_memcg && amt)
-                       mem_cgroup_charge_skmem(newsk->sk_memcg, amt);
+                       mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
+                                               GFP_KERNEL | __GFP_NOFAIL);
 
                release_sock(newsk);
        }
index 95419b7..177d26d 100644 (file)
@@ -925,7 +925,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
        .ndo_stop               = ipgre_close,
 #endif
        .ndo_start_xmit         = ipgre_xmit,
-       .ndo_do_ioctl           = ip_tunnel_ioctl,
+       .ndo_siocdevprivate     = ip_tunnel_siocdevprivate,
        .ndo_change_mtu         = ip_tunnel_change_mtu,
        .ndo_get_stats64        = dev_get_tstats64,
        .ndo_get_iflink         = ip_tunnel_get_iflink,
index 8d8a8da..9bca57e 100644 (file)
@@ -198,19 +198,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
        } else if (rt->rt_type == RTN_BROADCAST)
                IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
 
-       /* Be paranoid, rather than too clever. */
        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
-               if (!skb2) {
-                       kfree_skb(skb);
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb)
                        return -ENOMEM;
-               }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
        }
 
        if (lwtunnel_xmit_redirect(dst->lwtstate)) {
@@ -446,8 +437,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
 {
        BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) !=
                     offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr));
-       memcpy(&iph->saddr, &fl4->saddr,
-              sizeof(fl4->saddr) + sizeof(fl4->daddr));
+
+       iph->saddr = fl4->saddr;
+       iph->daddr = fl4->daddr;
 }
 
 /* Note: skb->sk can be different from sk, in case of tunnels */
@@ -614,18 +606,6 @@ void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
 }
 EXPORT_SYMBOL(ip_fraglist_init);
 
-static void ip_fraglist_ipcb_prepare(struct sk_buff *skb,
-                                    struct ip_fraglist_iter *iter)
-{
-       struct sk_buff *to = iter->frag;
-
-       /* Copy the flags to each fragment. */
-       IPCB(to)->flags = IPCB(skb)->flags;
-
-       if (iter->offset == 0)
-               ip_options_fragment(to);
-}
-
 void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter)
 {
        unsigned int hlen = iter->hlen;
@@ -671,7 +651,7 @@ void ip_frag_init(struct sk_buff *skb, unsigned int hlen,
 EXPORT_SYMBOL(ip_frag_init);
 
 static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to,
-                        bool first_frag, struct ip_frag_state *state)
+                        bool first_frag)
 {
        /* Copy the flags to each fragment. */
        IPCB(to)->flags = IPCB(from)->flags;
@@ -846,11 +826,14 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
                /* Everything is OK. Generate! */
                ip_fraglist_init(skb, iph, hlen, &iter);
 
+               if (iter.frag)
+                       ip_options_fragment(iter.frag);
+
                for (;;) {
                        /* Prepare header of the next frame,
                         * before previous one went down. */
                        if (iter.frag) {
-                               ip_fraglist_ipcb_prepare(skb, &iter);
+                               IPCB(iter.frag)->flags = IPCB(skb)->flags;
                                ip_fraglist_prepare(skb, &iter);
                        }
 
@@ -905,7 +888,7 @@ slow_path:
                        err = PTR_ERR(skb2);
                        goto fail;
                }
-               ip_frag_ipcb(skb, skb2, first_frag, &state);
+               ip_frag_ipcb(skb, skb2, first_frag);
 
                /*
                 *      Put this fragment into the sending queue.
index ec60367..b297bb2 100644 (file)
@@ -663,12 +663,11 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
                              struct sockaddr_storage *group,
                              struct sockaddr_storage *list)
 {
-       int msize = IP_MSFILTER_SIZE(numsrc);
        struct ip_msfilter *msf;
        struct sockaddr_in *psin;
        int err, i;
 
-       msf = kmalloc(msize, GFP_KERNEL);
+       msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL);
        if (!msf)
                return -ENOBUFS;
 
@@ -684,7 +683,7 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
 
                if (psin->sin_family != AF_INET)
                        goto Eaddrnotavail;
-               msf->imsf_slist[i] = psin->sin_addr.s_addr;
+               msf->imsf_slist_flex[i] = psin->sin_addr.s_addr;
        }
        err = ip_mc_msfilter(sk, msf, ifindex);
        kfree(msf);
@@ -791,7 +790,8 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
                goto out_free_gsf;
 
        err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc,
-                                gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist);
+                                gsf->gf_fmode, &gsf->gf_group,
+                                gsf->gf_slist_flex);
 out_free_gsf:
        kfree(gsf);
        return err;
@@ -800,7 +800,7 @@ out_free_gsf:
 static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                int optlen)
 {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
        struct compat_group_filter *gf32;
        unsigned int n;
        void *p;
@@ -814,7 +814,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
        p = kmalloc(optlen + 4, GFP_KERNEL);
        if (!p)
                return -ENOMEM;
-       gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+       gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
 
        err = -EFAULT;
        if (copy_from_sockptr(gf32, optval, optlen))
@@ -827,7 +827,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                goto out_free_gsf;
 
        err = -EINVAL;
-       if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+       if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
                goto out_free_gsf;
 
        /* numsrc >= (4G-140)/128 overflow in 32 bits */
@@ -835,7 +835,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
        if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
                goto out_free_gsf;
        err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
-                                &gf32->gf_group, gf32->gf_slist);
+                                &gf32->gf_group, gf32->gf_slist_flex);
 out_free_gsf:
        kfree(p);
        return err;
@@ -1456,7 +1456,7 @@ static bool getsockopt_needs_rtnl(int optname)
 static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
                int __user *optlen, int len)
 {
-       const int size0 = offsetof(struct group_filter, gf_slist);
+       const int size0 = offsetof(struct group_filter, gf_slist_flex);
        struct group_filter __user *p = optval;
        struct group_filter gsf;
        int num;
@@ -1468,7 +1468,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
                return -EFAULT;
 
        num = gsf.gf_numsrc;
-       err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
+       err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex);
        if (err)
                return err;
        if (gsf.gf_numsrc < num)
@@ -1482,7 +1482,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
 static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
                int __user *optlen, int len)
 {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
        struct compat_group_filter __user *p = optval;
        struct compat_group_filter gf32;
        struct group_filter gf;
@@ -1499,7 +1499,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
        num = gf.gf_numsrc = gf32.gf_numsrc;
        gf.gf_group = gf32.gf_group;
 
-       err = ip_mc_gsfget(sk, &gf, p->gf_slist);
+       err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex);
        if (err)
                return err;
        if (gf.gf_numsrc < num)
index be75b40..fe9101d 100644 (file)
@@ -958,19 +958,20 @@ done:
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
 
-int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                            void __user *data, int cmd)
 {
        struct ip_tunnel_parm p;
        int err;
 
-       if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+       if (copy_from_user(&p, data, sizeof(p)))
                return -EFAULT;
        err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
-       if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+       if (!err && copy_to_user(data, &p, sizeof(p)))
                return -EFAULT;
        return err;
 }
-EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
+EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
 
 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 {
index eb560ee..efe25a0 100644 (file)
@@ -405,7 +405,7 @@ static const struct net_device_ops vti_netdev_ops = {
        .ndo_init       = vti_tunnel_init,
        .ndo_uninit     = ip_tunnel_uninit,
        .ndo_start_xmit = vti_tunnel_xmit,
-       .ndo_do_ioctl   = ip_tunnel_ioctl,
+       .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
        .ndo_change_mtu = ip_tunnel_change_mtu,
        .ndo_get_stats64 = dev_get_tstats64,
        .ndo_get_iflink = ip_tunnel_get_iflink,
index 266c655..3aa78cc 100644 (file)
@@ -347,7 +347,7 @@ static const struct net_device_ops ipip_netdev_ops = {
        .ndo_init       = ipip_tunnel_init,
        .ndo_uninit     = ip_tunnel_uninit,
        .ndo_start_xmit = ipip_tunnel_xmit,
-       .ndo_do_ioctl   = ip_tunnel_ioctl,
+       .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
        .ndo_change_mtu = ip_tunnel_change_mtu,
        .ndo_get_stats64 = dev_get_tstats64,
        .ndo_get_iflink = ip_tunnel_get_iflink,
index 6922612..3de7841 100644 (file)
@@ -18,15 +18,12 @@ MODULE_DESCRIPTION("arptables filter table");
 #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
                           (1 << NF_ARP_FORWARD))
 
-static int __net_init arptable_filter_table_init(struct net *net);
-
 static const struct xt_table packet_filter = {
        .name           = "filter",
        .valid_hooks    = FILTER_VALID_HOOKS,
        .me             = THIS_MODULE,
        .af             = NFPROTO_ARP,
        .priority       = NF_IP_PRI_FILTER,
-       .table_init     = arptable_filter_table_init,
 };
 
 /* The work comes in here from netfilter.c */
@@ -39,7 +36,7 @@ arptable_filter_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_hook_ops *arpfilter_ops __read_mostly;
 
-static int __net_init arptable_filter_table_init(struct net *net)
+static int arptable_filter_table_init(struct net *net)
 {
        struct arpt_replace *repl;
        int err;
@@ -69,30 +66,32 @@ static struct pernet_operations arptable_filter_net_ops = {
 
 static int __init arptable_filter_init(void)
 {
-       int ret;
+       int ret = xt_register_template(&packet_filter,
+                                      arptable_filter_table_init);
+
+       if (ret < 0)
+               return ret;
 
        arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
-       if (IS_ERR(arpfilter_ops))
+       if (IS_ERR(arpfilter_ops)) {
+               xt_unregister_template(&packet_filter);
                return PTR_ERR(arpfilter_ops);
+       }
 
        ret = register_pernet_subsys(&arptable_filter_net_ops);
        if (ret < 0) {
+               xt_unregister_template(&packet_filter);
                kfree(arpfilter_ops);
                return ret;
        }
 
-       ret = arptable_filter_table_init(&init_net);
-       if (ret) {
-               unregister_pernet_subsys(&arptable_filter_net_ops);
-               kfree(arpfilter_ops);
-       }
-
        return ret;
 }
 
 static void __exit arptable_filter_fini(void)
 {
        unregister_pernet_subsys(&arptable_filter_net_ops);
+       xt_unregister_template(&packet_filter);
        kfree(arpfilter_ops);
 }
 
index 8f7ca67..8fd1aba 100644 (file)
@@ -66,11 +66,22 @@ struct clusterip_net {
        /* lock protects the configs list */
        spinlock_t lock;
 
+       bool clusterip_deprecated_warning;
 #ifdef CONFIG_PROC_FS
        struct proc_dir_entry *procdir;
        /* mutex protects the config->pde*/
        struct mutex mutex;
 #endif
+       unsigned int hook_users;
+};
+
+static unsigned int clusterip_arp_mangle(void *priv, struct sk_buff *skb, const struct nf_hook_state *state);
+
+static const struct nf_hook_ops cip_arp_ops = {
+       .hook = clusterip_arp_mangle,
+       .pf = NFPROTO_ARP,
+       .hooknum = NF_ARP_OUT,
+       .priority = -1
 };
 
 static unsigned int clusterip_net_id __read_mostly;
@@ -458,6 +469,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 static int clusterip_tg_check(const struct xt_tgchk_param *par)
 {
        struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+       struct clusterip_net *cn = clusterip_pernet(par->net);
        const struct ipt_entry *e = par->entryinfo;
        struct clusterip_config *config;
        int ret, i;
@@ -467,6 +479,9 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
                return -EOPNOTSUPP;
        }
 
+       if (cn->hook_users == UINT_MAX)
+               return -EOVERFLOW;
+
        if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
            cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
            cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
@@ -517,10 +532,23 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
                return ret;
        }
 
-       if (!par->net->xt.clusterip_deprecated_warning) {
+       if (cn->hook_users == 0) {
+               ret = nf_register_net_hook(par->net, &cip_arp_ops);
+
+               if (ret < 0) {
+                       clusterip_config_entry_put(config);
+                       clusterip_config_put(config);
+                       nf_ct_netns_put(par->net, par->family);
+                       return ret;
+               }
+       }
+
+       cn->hook_users++;
+
+       if (!cn->clusterip_deprecated_warning) {
                pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
                        "use xt_cluster instead\n");
-               par->net->xt.clusterip_deprecated_warning = true;
+               cn->clusterip_deprecated_warning = true;
        }
 
        cipinfo->config = config;
@@ -531,6 +559,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
 {
        const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+       struct clusterip_net *cn = clusterip_pernet(par->net);
 
        /* if no more entries are referencing the config, remove it
         * from the list and destroy the proc entry */
@@ -539,6 +568,10 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
        clusterip_config_put(cipinfo->config);
 
        nf_ct_netns_put(par->net, par->family);
+       cn->hook_users--;
+
+       if (cn->hook_users == 0)
+               nf_unregister_net_hook(par->net, &cip_arp_ops);
 }
 
 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
@@ -602,9 +635,8 @@ static void arp_print(struct arp_payload *payload)
 #endif
 
 static unsigned int
-arp_mangle(void *priv,
-          struct sk_buff *skb,
-          const struct nf_hook_state *state)
+clusterip_arp_mangle(void *priv, struct sk_buff *skb,
+                    const struct nf_hook_state *state)
 {
        struct arphdr *arp = arp_hdr(skb);
        struct arp_payload *payload;
@@ -654,13 +686,6 @@ arp_mangle(void *priv,
        return NF_ACCEPT;
 }
 
-static const struct nf_hook_ops cip_arp_ops = {
-       .hook = arp_mangle,
-       .pf = NFPROTO_ARP,
-       .hooknum = NF_ARP_OUT,
-       .priority = -1
-};
-
 /***********************************************************************
  * PROC DIR HANDLING
  ***********************************************************************/
@@ -817,20 +842,14 @@ static const struct proc_ops clusterip_proc_ops = {
 static int clusterip_net_init(struct net *net)
 {
        struct clusterip_net *cn = clusterip_pernet(net);
-       int ret;
 
        INIT_LIST_HEAD(&cn->configs);
 
        spin_lock_init(&cn->lock);
 
-       ret = nf_register_net_hook(net, &cip_arp_ops);
-       if (ret < 0)
-               return ret;
-
 #ifdef CONFIG_PROC_FS
        cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
        if (!cn->procdir) {
-               nf_unregister_net_hook(net, &cip_arp_ops);
                pr_err("Unable to proc dir entry\n");
                return -ENOMEM;
        }
@@ -850,7 +869,6 @@ static void clusterip_net_exit(struct net *net)
        cn->procdir = NULL;
        mutex_unlock(&cn->mutex);
 #endif
-       nf_unregister_net_hook(net, &cip_arp_ops);
 }
 
 static struct pernet_operations clusterip_net_ops = {
index 8272df7..0eb0e2a 100644 (file)
@@ -19,7 +19,6 @@ MODULE_DESCRIPTION("iptables filter table");
 #define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
                            (1 << NF_INET_FORWARD) | \
                            (1 << NF_INET_LOCAL_OUT))
-static int __net_init iptable_filter_table_init(struct net *net);
 
 static const struct xt_table packet_filter = {
        .name           = "filter",
@@ -27,7 +26,6 @@ static const struct xt_table packet_filter = {
        .me             = THIS_MODULE,
        .af             = NFPROTO_IPV4,
        .priority       = NF_IP_PRI_FILTER,
-       .table_init     = iptable_filter_table_init,
 };
 
 static unsigned int
@@ -43,7 +41,7 @@ static struct nf_hook_ops *filter_ops __read_mostly;
 static bool forward __read_mostly = true;
 module_param(forward, bool, 0000);
 
-static int __net_init iptable_filter_table_init(struct net *net)
+static int iptable_filter_table_init(struct net *net)
 {
        struct ipt_replace *repl;
        int err;
@@ -62,7 +60,7 @@ static int __net_init iptable_filter_table_init(struct net *net)
 
 static int __net_init iptable_filter_net_init(struct net *net)
 {
-       if (net == &init_net || !forward)
+       if (!forward)
                return iptable_filter_table_init(net);
 
        return 0;
@@ -86,22 +84,32 @@ static struct pernet_operations iptable_filter_net_ops = {
 
 static int __init iptable_filter_init(void)
 {
-       int ret;
+       int ret = xt_register_template(&packet_filter,
+                                      iptable_filter_table_init);
+
+       if (ret < 0)
+               return ret;
 
        filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
-       if (IS_ERR(filter_ops))
+       if (IS_ERR(filter_ops)) {
+               xt_unregister_template(&packet_filter);
                return PTR_ERR(filter_ops);
+       }
 
        ret = register_pernet_subsys(&iptable_filter_net_ops);
-       if (ret < 0)
+       if (ret < 0) {
+               xt_unregister_template(&packet_filter);
                kfree(filter_ops);
+               return ret;
+       }
 
-       return ret;
+       return 0;
 }
 
 static void __exit iptable_filter_fini(void)
 {
        unregister_pernet_subsys(&iptable_filter_net_ops);
+       xt_unregister_template(&packet_filter);
        kfree(filter_ops);
 }
 
index 2abc383..40417a3 100644 (file)
@@ -25,15 +25,12 @@ MODULE_DESCRIPTION("iptables mangle table");
                            (1 << NF_INET_LOCAL_OUT) | \
                            (1 << NF_INET_POST_ROUTING))
 
-static int __net_init iptable_mangle_table_init(struct net *net);
-
 static const struct xt_table packet_mangler = {
        .name           = "mangle",
        .valid_hooks    = MANGLE_VALID_HOOKS,
        .me             = THIS_MODULE,
        .af             = NFPROTO_IPV4,
        .priority       = NF_IP_PRI_MANGLE,
-       .table_init     = iptable_mangle_table_init,
 };
 
 static unsigned int
@@ -83,7 +80,7 @@ iptable_mangle_hook(void *priv,
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
-static int __net_init iptable_mangle_table_init(struct net *net)
+static int iptable_mangle_table_init(struct net *net)
 {
        struct ipt_replace *repl;
        int ret;
@@ -113,32 +110,32 @@ static struct pernet_operations iptable_mangle_net_ops = {
 
 static int __init iptable_mangle_init(void)
 {
-       int ret;
+       int ret = xt_register_template(&packet_mangler,
+                                      iptable_mangle_table_init);
+       if (ret < 0)
+               return ret;
 
        mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook);
        if (IS_ERR(mangle_ops)) {
+               xt_unregister_template(&packet_mangler);
                ret = PTR_ERR(mangle_ops);
                return ret;
        }
 
        ret = register_pernet_subsys(&iptable_mangle_net_ops);
        if (ret < 0) {
+               xt_unregister_template(&packet_mangler);
                kfree(mangle_ops);
                return ret;
        }
 
-       ret = iptable_mangle_table_init(&init_net);
-       if (ret) {
-               unregister_pernet_subsys(&iptable_mangle_net_ops);
-               kfree(mangle_ops);
-       }
-
        return ret;
 }
 
 static void __exit iptable_mangle_fini(void)
 {
        unregister_pernet_subsys(&iptable_mangle_net_ops);
+       xt_unregister_template(&packet_mangler);
        kfree(mangle_ops);
 }
 
index a991384..45d7e07 100644 (file)
@@ -17,8 +17,6 @@ struct iptable_nat_pernet {
        struct nf_hook_ops *nf_nat_ops;
 };
 
-static int __net_init iptable_nat_table_init(struct net *net);
-
 static unsigned int iptable_nat_net_id __read_mostly;
 
 static const struct xt_table nf_nat_ipv4_table = {
@@ -29,7 +27,6 @@ static const struct xt_table nf_nat_ipv4_table = {
                          (1 << NF_INET_LOCAL_IN),
        .me             = THIS_MODULE,
        .af             = NFPROTO_IPV4,
-       .table_init     = iptable_nat_table_init,
 };
 
 static unsigned int iptable_nat_do_chain(void *priv,
@@ -113,7 +110,7 @@ static void ipt_nat_unregister_lookups(struct net *net)
        kfree(ops);
 }
 
-static int __net_init iptable_nat_table_init(struct net *net)
+static int iptable_nat_table_init(struct net *net)
 {
        struct ipt_replace *repl;
        int ret;
@@ -155,20 +152,25 @@ static struct pernet_operations iptable_nat_net_ops = {
 
 static int __init iptable_nat_init(void)
 {
-       int ret = register_pernet_subsys(&iptable_nat_net_ops);
+       int ret = xt_register_template(&nf_nat_ipv4_table,
+                                      iptable_nat_table_init);
+
+       if (ret < 0)
+               return ret;
 
-       if (ret)
+       ret = register_pernet_subsys(&iptable_nat_net_ops);
+       if (ret < 0) {
+               xt_unregister_template(&nf_nat_ipv4_table);
                return ret;
+       }
 
-       ret = iptable_nat_table_init(&init_net);
-       if (ret)
-               unregister_pernet_subsys(&iptable_nat_net_ops);
        return ret;
 }
 
 static void __exit iptable_nat_exit(void)
 {
        unregister_pernet_subsys(&iptable_nat_net_ops);
+       xt_unregister_template(&nf_nat_ipv4_table);
 }
 
 module_init(iptable_nat_init);
index ceef397..b88e0f3 100644 (file)
@@ -12,8 +12,6 @@
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
-static int __net_init iptable_raw_table_init(struct net *net);
-
 static bool raw_before_defrag __read_mostly;
 MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
 module_param(raw_before_defrag, bool, 0000);
@@ -24,7 +22,6 @@ static const struct xt_table packet_raw = {
        .me = THIS_MODULE,
        .af = NFPROTO_IPV4,
        .priority = NF_IP_PRI_RAW,
-       .table_init = iptable_raw_table_init,
 };
 
 static const struct xt_table packet_raw_before_defrag = {
@@ -33,7 +30,6 @@ static const struct xt_table packet_raw_before_defrag = {
        .me = THIS_MODULE,
        .af = NFPROTO_IPV4,
        .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG,
-       .table_init = iptable_raw_table_init,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -89,22 +85,24 @@ static int __init iptable_raw_init(void)
                pr_info("Enabling raw table before defrag\n");
        }
 
+       ret = xt_register_template(table,
+                                  iptable_raw_table_init);
+       if (ret < 0)
+               return ret;
+
        rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook);
-       if (IS_ERR(rawtable_ops))
+       if (IS_ERR(rawtable_ops)) {
+               xt_unregister_template(table);
                return PTR_ERR(rawtable_ops);
+       }
 
        ret = register_pernet_subsys(&iptable_raw_net_ops);
        if (ret < 0) {
+               xt_unregister_template(table);
                kfree(rawtable_ops);
                return ret;
        }
 
-       ret = iptable_raw_table_init(&init_net);
-       if (ret) {
-               unregister_pernet_subsys(&iptable_raw_net_ops);
-               kfree(rawtable_ops);
-       }
-
        return ret;
 }
 
@@ -112,6 +110,7 @@ static void __exit iptable_raw_fini(void)
 {
        unregister_pernet_subsys(&iptable_raw_net_ops);
        kfree(rawtable_ops);
+       xt_unregister_template(&packet_raw);
 }
 
 module_init(iptable_raw_init);
index 77973f5..f519162 100644 (file)
@@ -25,15 +25,12 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
                                (1 << NF_INET_FORWARD) | \
                                (1 << NF_INET_LOCAL_OUT)
 
-static int __net_init iptable_security_table_init(struct net *net);
-
 static const struct xt_table security_table = {
        .name           = "security",
        .valid_hooks    = SECURITY_VALID_HOOKS,
        .me             = THIS_MODULE,
        .af             = NFPROTO_IPV4,
        .priority       = NF_IP_PRI_SECURITY,
-       .table_init     = iptable_security_table_init,
 };
 
 static unsigned int
@@ -45,7 +42,7 @@ iptable_security_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
 
-static int __net_init iptable_security_table_init(struct net *net)
+static int iptable_security_table_init(struct net *net)
 {
        struct ipt_replace *repl;
        int ret;
@@ -75,24 +72,25 @@ static struct pernet_operations iptable_security_net_ops = {
 
 static int __init iptable_security_init(void)
 {
-       int ret;
+       int ret = xt_register_template(&security_table,
+                                      iptable_security_table_init);
+
+       if (ret < 0)
+               return ret;
 
        sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
-       if (IS_ERR(sectbl_ops))
+       if (IS_ERR(sectbl_ops)) {
+               xt_unregister_template(&security_table);
                return PTR_ERR(sectbl_ops);
+       }
 
        ret = register_pernet_subsys(&iptable_security_net_ops);
        if (ret < 0) {
+               xt_unregister_template(&security_table);
                kfree(sectbl_ops);
                return ret;
        }
 
-       ret = iptable_security_table_init(&init_net);
-       if (ret) {
-               unregister_pernet_subsys(&iptable_security_net_ops);
-               kfree(sectbl_ops);
-       }
-
        return ret;
 }
 
@@ -100,6 +98,7 @@ static void __exit iptable_security_fini(void)
 {
        unregister_pernet_subsys(&iptable_security_net_ops);
        kfree(sectbl_ops);
+       xt_unregister_template(&security_table);
 }
 
 module_init(iptable_security_init);
index a6f20ee..d6899ab 100644 (file)
@@ -276,12 +276,13 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
        struct rt_cache_stat *st = v;
 
        if (v == SEQ_START_TOKEN) {
-               seq_printf(seq, "entries  in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src  out_hit out_slow_tot out_slow_mc  gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
+               seq_puts(seq, "entries  in_hit   in_slow_tot in_slow_mc in_no_route in_brd   in_martian_dst in_martian_src out_hit  out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
                return 0;
        }
 
-       seq_printf(seq,"%08x  %08x %08x %08x %08x %08x %08x %08x "
-                  " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
+       seq_printf(seq, "%08x %08x %08x    %08x   %08x    %08x %08x       "
+                       "%08x       %08x %08x     %08x    %08x %08x   "
+                       "%08x     %08x        %08x        %08x\n",
                   dst_entries_get_slow(&ipv4_dst_ops),
                   0, /* st->in_hit */
                   st->in_slow_tot,
@@ -586,18 +587,25 @@ static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
        }
 }
 
-static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
+static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash)
 {
-       struct fib_nh_exception *fnhe, *oldest;
+       struct fib_nh_exception __rcu **fnhe_p, **oldest_p;
+       struct fib_nh_exception *fnhe, *oldest = NULL;
 
-       oldest = rcu_dereference(hash->chain);
-       for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
-            fnhe = rcu_dereference(fnhe->fnhe_next)) {
-               if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
+       for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) {
+               fnhe = rcu_dereference_protected(*fnhe_p,
+                                                lockdep_is_held(&fnhe_lock));
+               if (!fnhe)
+                       break;
+               if (!oldest ||
+                   time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) {
                        oldest = fnhe;
+                       oldest_p = fnhe_p;
+               }
        }
        fnhe_flush_routes(oldest);
-       return oldest;
+       *oldest_p = oldest->fnhe_next;
+       kfree_rcu(oldest, rcu);
 }
 
 static u32 fnhe_hashfun(__be32 daddr)
@@ -676,16 +684,21 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
                if (rt)
                        fill_route_from_fnhe(rt, fnhe);
        } else {
-               if (depth > FNHE_RECLAIM_DEPTH)
-                       fnhe = fnhe_oldest(hash);
-               else {
-                       fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
-                       if (!fnhe)
-                               goto out_unlock;
-
-                       fnhe->fnhe_next = hash->chain;
-                       rcu_assign_pointer(hash->chain, fnhe);
+               /* Randomize max depth to avoid some side channels attacks. */
+               int max_depth = FNHE_RECLAIM_DEPTH +
+                               prandom_u32_max(FNHE_RECLAIM_DEPTH);
+
+               while (depth > max_depth) {
+                       fnhe_remove_oldest(hash);
+                       depth--;
                }
+
+               fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
+               if (!fnhe)
+                       goto out_unlock;
+
+               fnhe->fnhe_next = hash->chain;
+
                fnhe->fnhe_genid = genid;
                fnhe->fnhe_daddr = daddr;
                fnhe->fnhe_gw = gw;
@@ -693,6 +706,8 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
                fnhe->fnhe_mtu_locked = lock;
                fnhe->fnhe_expires = max(1UL, expires);
 
+               rcu_assign_pointer(hash->chain, fnhe);
+
                /* Exception created; mark the cached routes for the nexthop
                 * stale, so anyone caching it rechecks if this exception
                 * applies to them.
@@ -1299,26 +1314,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
 
 INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
 {
-       const struct rtable *rt = (const struct rtable *)dst;
-       unsigned int mtu = rt->rt_pmtu;
-
-       if (!mtu || time_after_eq(jiffies, rt->dst.expires))
-               mtu = dst_metric_raw(dst, RTAX_MTU);
-
-       if (mtu)
-               goto out;
-
-       mtu = READ_ONCE(dst->dev->mtu);
-
-       if (unlikely(ip_mtu_locked(dst))) {
-               if (rt->rt_uses_gateway && mtu > 576)
-                       mtu = 576;
-       }
-
-out:
-       mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
-
-       return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+       return ip_dst_mtu_maybe_forward(dst, false);
 }
 EXPORT_INDIRECT_CALLABLE(ipv4_mtu);
 
@@ -2831,8 +2827,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
                new->output = dst_discard_out;
 
                new->dev = net->loopback_dev;
-               if (new->dev)
-                       dev_hold(new->dev);
+               dev_hold(new->dev);
 
                rt->rt_is_input = ort->rt_is_input;
                rt->rt_iif = ort->rt_iif;
@@ -3170,7 +3165,7 @@ static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
                udph = skb_put_zero(skb, sizeof(struct udphdr));
                udph->source = sport;
                udph->dest = dport;
-               udph->len = sizeof(struct udphdr);
+               udph->len = htons(sizeof(struct udphdr));
                udph->check = 0;
                break;
        }
index 8cb4404..e8b48df 100644 (file)
@@ -3338,6 +3338,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
        } else {
                tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
                        SOCK_MIN_RCVBUF / 2 : val;
+               tp->rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp);
        }
        return 0;
 }
@@ -4512,7 +4513,9 @@ void __init tcp_init(void)
        tcp_hashinfo.bind_bucket_cachep =
                kmem_cache_create("tcp_bind_bucket",
                                  sizeof(struct inet_bind_bucket), 0,
-                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+                                 SLAB_HWCACHE_ALIGN | SLAB_PANIC |
+                                 SLAB_ACCOUNT,
+                                 NULL);
 
        /* Size and allocate the main established and bind bucket
         * hash tables.
index 25fa4c0..59412d6 100644 (file)
@@ -55,12 +55,7 @@ void tcp_fastopen_ctx_destroy(struct net *net)
 {
        struct tcp_fastopen_context *ctxt;
 
-       spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
-
-       ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
-                               lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
-       rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
-       spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
+       ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL);
 
        if (ctxt)
                call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
@@ -89,18 +84,12 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
                ctx->num = 1;
        }
 
-       spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
        if (sk) {
                q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
-               octx = rcu_dereference_protected(q->ctx,
-                       lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
-               rcu_assign_pointer(q->ctx, ctx);
+               octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx);
        } else {
-               octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
-                       lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
-               rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+               octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx);
        }
-       spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
 
        if (octx)
                call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
@@ -379,8 +368,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
                return NULL;
        }
 
-       if (syn_data &&
-           tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
+       if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
                goto fastopen;
 
        if (foc->len == 0) {
index 149ceb5..3f7bd7a 100644 (file)
@@ -100,6 +100,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 #define FLAG_UPDATE_TS_RECENT  0x4000 /* tcp_replace_ts_recent() */
 #define FLAG_NO_CHALLENGE_ACK  0x8000 /* do not call tcp_send_challenge_ack()  */
 #define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
+#define FLAG_DSACK_TLP         0x20000 /* DSACK for tail loss probe */
 
 #define FLAG_ACKED             (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP           (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -454,11 +455,12 @@ static void tcp_sndbuf_expand(struct sock *sk)
  */
 
 /* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
+                            unsigned int skbtruesize)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        /* Optimize this! */
-       int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+       int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
        int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
 
        while (tp->rcv_ssthresh <= window) {
@@ -471,7 +473,27 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
        return 0;
 }
 
-static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
+/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing
+ * can play nice with us, as sk_buff and skb->head might be either
+ * freed or shared with up to MAX_SKB_FRAGS segments.
+ * Only give a boost to drivers using page frag(s) to hold the frame(s),
+ * and if no payload was pulled in skb->head before reaching us.
+ */
+static u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
+{
+       u32 truesize = skb->truesize;
+
+       if (adjust && !skb_headlen(skb)) {
+               truesize -= SKB_TRUESIZE(skb_end_offset(skb));
+               /* paranoid check, some drivers might be buggy */
+               if (unlikely((int)truesize < (int)skb->len))
+                       truesize = skb->truesize;
+       }
+       return truesize;
+}
+
+static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
+                           bool adjust)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        int room;
@@ -480,15 +502,16 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 
        /* Check #1 */
        if (room > 0 && !tcp_under_memory_pressure(sk)) {
+               unsigned int truesize = truesize_adjust(adjust, skb);
                int incr;
 
                /* Check #2. Increase window, if skb with such overhead
                 * will fit to rcvbuf in future.
                 */
-               if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
+               if (tcp_win_from_space(sk, truesize) <= skb->len)
                        incr = 2 * tp->advmss;
                else
-                       incr = __tcp_grow_window(sk, skb);
+                       incr = __tcp_grow_window(sk, skb, truesize);
 
                if (incr) {
                        incr = max_t(int, incr, 2 * skb->len);
@@ -782,7 +805,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
        tcp_ecn_check_ce(sk, skb);
 
        if (skb->len >= 128)
-               tcp_grow_window(sk, skb);
+               tcp_grow_window(sk, skb, true);
 }
 
 /* Called to compute a smoothed rtt estimate. The data fed to this
@@ -969,6 +992,8 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
                return 0;
        if (seq_len > tp->mss_cache)
                dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
+       else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq)
+               state->flag |= FLAG_DSACK_TLP;
 
        tp->dsack_dups += dup_segs;
        /* Skip the DSACK if dup segs weren't retransmitted by sender */
@@ -976,7 +1001,14 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
                return 0;
 
        tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
-       tp->rack.dsack_seen = 1;
+       /* We increase the RACK ordering window in rounds where we receive
+        * DSACKs that may have been due to reordering causing RACK to trigger
+        * a spurious fast recovery. Thus RACK ignores DSACKs that happen
+        * without having seen reordering, or that match TLP probes (TLP
+        * is timer-driven, not triggered by RACK).
+        */
+       if (tp->reord_seen && !(state->flag & FLAG_DSACK_TLP))
+               tp->rack.dsack_seen = 1;
 
        state->flag |= FLAG_DSACKING_ACK;
        /* A spurious retransmission is delivered */
@@ -3628,7 +3660,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
        if (!tp->tlp_retrans) {
                /* TLP of new data has been acknowledged */
                tp->tlp_high_seq = 0;
-       } else if (flag & FLAG_DSACKING_ACK) {
+       } else if (flag & FLAG_DSACK_TLP) {
                /* This DSACK means original and TLP probe arrived; no loss */
                tp->tlp_high_seq = 0;
        } else if (after(ack, tp->tlp_high_seq)) {
@@ -4769,7 +4801,7 @@ coalesce_done:
                 * and trigger fast retransmit.
                 */
                if (tcp_is_sack(tp))
-                       tcp_grow_window(sk, skb);
+                       tcp_grow_window(sk, skb, true);
                kfree_skb_partial(skb, fragstolen);
                skb = NULL;
                goto add_sack;
@@ -4857,7 +4889,7 @@ end:
                 * and trigger fast retransmit.
                 */
                if (tcp_is_sack(tp))
-                       tcp_grow_window(sk, skb);
+                       tcp_grow_window(sk, skb, false);
                skb_condense(skb);
                skb_set_owner_r(skb, sk);
        }
@@ -5383,7 +5415,7 @@ static void tcp_new_space(struct sock *sk)
                tp->snd_cwnd_stamp = tcp_jiffies32;
        }
 
-       sk->sk_write_space(sk);
+       INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
 }
 
 static void tcp_check_space(struct sock *sk)
index a692626..2e62e0d 100644 (file)
@@ -2277,51 +2277,72 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCP sock list dumping. */
 
-/*
- * Get next listener socket follow cur.  If cur is NULL, get first socket
- * starting from bucket given in st->bucket; when st->bucket is zero the
- * very first socket in the hash table is returned.
+static unsigned short seq_file_family(const struct seq_file *seq);
+
+static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
+{
+       unsigned short family = seq_file_family(seq);
+
+       /* AF_UNSPEC is used as a match all */
+       return ((family == AF_UNSPEC || family == sk->sk_family) &&
+               net_eq(sock_net(sk), seq_file_net(seq)));
+}
+
+/* Find a non empty bucket (starting from st->bucket)
+ * and return the first sk from it.
  */
-static void *listening_get_next(struct seq_file *seq, void *cur)
+static void *listening_get_first(struct seq_file *seq)
 {
-       struct tcp_seq_afinfo *afinfo;
        struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct inet_listen_hashbucket *ilb;
-       struct hlist_nulls_node *node;
-       struct sock *sk = cur;
 
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
+       st->offset = 0;
+       for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
+               struct inet_listen_hashbucket *ilb2;
+               struct inet_connection_sock *icsk;
+               struct sock *sk;
 
-       if (!sk) {
-get_head:
-               ilb = &tcp_hashinfo.listening_hash[st->bucket];
-               spin_lock(&ilb->lock);
-               sk = sk_nulls_head(&ilb->nulls_head);
-               st->offset = 0;
-               goto get_sk;
+               ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+               if (hlist_empty(&ilb2->head))
+                       continue;
+
+               spin_lock(&ilb2->lock);
+               inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
+                       sk = (struct sock *)icsk;
+                       if (seq_sk_match(seq, sk))
+                               return sk;
+               }
+               spin_unlock(&ilb2->lock);
        }
-       ilb = &tcp_hashinfo.listening_hash[st->bucket];
+
+       return NULL;
+}
+
+/* Find the next sk of "cur" within the same bucket (i.e. st->bucket).
+ * If "cur" is the last one in the st->bucket,
+ * call listening_get_first() to return the first sk of the next
+ * non empty bucket.
+ */
+static void *listening_get_next(struct seq_file *seq, void *cur)
+{
+       struct tcp_iter_state *st = seq->private;
+       struct inet_listen_hashbucket *ilb2;
+       struct inet_connection_sock *icsk;
+       struct sock *sk = cur;
+
        ++st->num;
        ++st->offset;
 
-       sk = sk_nulls_next(sk);
-get_sk:
-       sk_nulls_for_each_from(sk, node) {
-               if (!net_eq(sock_net(sk), net))
-                       continue;
-               if (afinfo->family == AF_UNSPEC ||
-                   sk->sk_family == afinfo->family)
+       icsk = inet_csk(sk);
+       inet_lhash2_for_each_icsk_continue(icsk) {
+               sk = (struct sock *)icsk;
+               if (seq_sk_match(seq, sk))
                        return sk;
        }
-       spin_unlock(&ilb->lock);
-       st->offset = 0;
-       if (++st->bucket < INET_LHTABLE_SIZE)
-               goto get_head;
-       return NULL;
+
+       ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+       spin_unlock(&ilb2->lock);
+       ++st->bucket;
+       return listening_get_first(seq);
 }
 
 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
@@ -2331,7 +2352,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
 
        st->bucket = 0;
        st->offset = 0;
-       rc = listening_get_next(seq, NULL);
+       rc = listening_get_first(seq);
 
        while (rc && *pos) {
                rc = listening_get_next(seq, rc);
@@ -2351,15 +2372,7 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
  */
 static void *established_get_first(struct seq_file *seq)
 {
-       struct tcp_seq_afinfo *afinfo;
        struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-       void *rc = NULL;
-
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
 
        st->offset = 0;
        for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
@@ -2373,32 +2386,20 @@ static void *established_get_first(struct seq_file *seq)
 
                spin_lock_bh(lock);
                sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
-                       if ((afinfo->family != AF_UNSPEC &&
-                            sk->sk_family != afinfo->family) ||
-                           !net_eq(sock_net(sk), net)) {
-                               continue;
-                       }
-                       rc = sk;
-                       goto out;
+                       if (seq_sk_match(seq, sk))
+                               return sk;
                }
                spin_unlock_bh(lock);
        }
-out:
-       return rc;
+
+       return NULL;
 }
 
 static void *established_get_next(struct seq_file *seq, void *cur)
 {
-       struct tcp_seq_afinfo *afinfo;
        struct sock *sk = cur;
        struct hlist_nulls_node *node;
        struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
 
        ++st->num;
        ++st->offset;
@@ -2406,9 +2407,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
        sk = sk_nulls_next(sk);
 
        sk_nulls_for_each_from(sk, node) {
-               if ((afinfo->family == AF_UNSPEC ||
-                    sk->sk_family == afinfo->family) &&
-                   net_eq(sock_net(sk), net))
+               if (seq_sk_match(seq, sk))
                        return sk;
        }
 
@@ -2451,17 +2450,18 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
 static void *tcp_seek_last_pos(struct seq_file *seq)
 {
        struct tcp_iter_state *st = seq->private;
+       int bucket = st->bucket;
        int offset = st->offset;
        int orig_num = st->num;
        void *rc = NULL;
 
        switch (st->state) {
        case TCP_SEQ_STATE_LISTENING:
-               if (st->bucket >= INET_LHTABLE_SIZE)
+               if (st->bucket > tcp_hashinfo.lhash2_mask)
                        break;
                st->state = TCP_SEQ_STATE_LISTENING;
-               rc = listening_get_next(seq, NULL);
-               while (offset-- && rc)
+               rc = listening_get_first(seq);
+               while (offset-- && rc && bucket == st->bucket)
                        rc = listening_get_next(seq, rc);
                if (rc)
                        break;
@@ -2472,7 +2472,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
                if (st->bucket > tcp_hashinfo.ehash_mask)
                        break;
                rc = established_get_first(seq);
-               while (offset-- && rc)
+               while (offset-- && rc && bucket == st->bucket)
                        rc = established_get_next(seq, rc);
        }
 
@@ -2542,7 +2542,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v)
        switch (st->state) {
        case TCP_SEQ_STATE_LISTENING:
                if (v != SEQ_START_TOKEN)
-                       spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
+                       spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
                break;
        case TCP_SEQ_STATE_ESTABLISHED:
                if (v)
@@ -2687,6 +2687,15 @@ out:
 }
 
 #ifdef CONFIG_BPF_SYSCALL
+struct bpf_tcp_iter_state {
+       struct tcp_iter_state state;
+       unsigned int cur_sk;
+       unsigned int end_sk;
+       unsigned int max_sk;
+       struct sock **batch;
+       bool st_bucket_done;
+};
+
 struct bpf_iter__tcp {
        __bpf_md_ptr(struct bpf_iter_meta *, meta);
        __bpf_md_ptr(struct sock_common *, sk_common);
@@ -2705,16 +2714,204 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
        return bpf_iter_run_prog(prog, &ctx);
 }
 
+static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
+{
+       while (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
+                                     unsigned int new_batch_sz)
+{
+       struct sock **new_batch;
+
+       new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+                            GFP_USER | __GFP_NOWARN);
+       if (!new_batch)
+               return -ENOMEM;
+
+       bpf_iter_tcp_put_batch(iter);
+       kvfree(iter->batch);
+       iter->batch = new_batch;
+       iter->max_sk = new_batch_sz;
+
+       return 0;
+}
+
+static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
+                                                struct sock *start_sk)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct inet_connection_sock *icsk;
+       unsigned int expected = 1;
+       struct sock *sk;
+
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+
+       icsk = inet_csk(start_sk);
+       inet_lhash2_for_each_icsk_continue(icsk) {
+               sk = (struct sock *)icsk;
+               if (seq_sk_match(seq, sk)) {
+                       if (iter->end_sk < iter->max_sk) {
+                               sock_hold(sk);
+                               iter->batch[iter->end_sk++] = sk;
+                       }
+                       expected++;
+               }
+       }
+       spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
+
+       return expected;
+}
+
+static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
+                                                  struct sock *start_sk)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct hlist_nulls_node *node;
+       unsigned int expected = 1;
+       struct sock *sk;
+
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+
+       sk = sk_nulls_next(start_sk);
+       sk_nulls_for_each_from(sk, node) {
+               if (seq_sk_match(seq, sk)) {
+                       if (iter->end_sk < iter->max_sk) {
+                               sock_hold(sk);
+                               iter->batch[iter->end_sk++] = sk;
+                       }
+                       expected++;
+               }
+       }
+       spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+
+       return expected;
+}
+
+static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       unsigned int expected;
+       bool resized = false;
+       struct sock *sk;
+
+       /* The st->bucket is done.  Directly advance to the next
+        * bucket instead of having the tcp_seek_last_pos() to skip
+        * one by one in the current bucket and eventually find out
+        * it has to advance to the next bucket.
+        */
+       if (iter->st_bucket_done) {
+               st->offset = 0;
+               st->bucket++;
+               if (st->state == TCP_SEQ_STATE_LISTENING &&
+                   st->bucket > tcp_hashinfo.lhash2_mask) {
+                       st->state = TCP_SEQ_STATE_ESTABLISHED;
+                       st->bucket = 0;
+               }
+       }
+
+again:
+       /* Get a new batch */
+       iter->cur_sk = 0;
+       iter->end_sk = 0;
+       iter->st_bucket_done = false;
+
+       sk = tcp_seek_last_pos(seq);
+       if (!sk)
+               return NULL; /* Done */
+
+       if (st->state == TCP_SEQ_STATE_LISTENING)
+               expected = bpf_iter_tcp_listening_batch(seq, sk);
+       else
+               expected = bpf_iter_tcp_established_batch(seq, sk);
+
+       if (iter->end_sk == expected) {
+               iter->st_bucket_done = true;
+               return sk;
+       }
+
+       if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) {
+               resized = true;
+               goto again;
+       }
+
+       return sk;
+}
+
+static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       /* bpf iter does not support lseek, so it always
+        * continue from where it was stop()-ped.
+        */
+       if (*pos)
+               return bpf_iter_tcp_batch(seq);
+
+       return SEQ_START_TOKEN;
+}
+
+static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct sock *sk;
+
+       /* Whenever seq_next() is called, the iter->cur_sk is
+        * done with seq_show(), so advance to the next sk in
+        * the batch.
+        */
+       if (iter->cur_sk < iter->end_sk) {
+               /* Keeping st->num consistent in tcp_iter_state.
+                * bpf_iter_tcp does not use st->num.
+                * meta.seq_num is used instead.
+                */
+               st->num++;
+               /* Move st->offset to the next sk in the bucket such that
+                * the future start() will resume at st->offset in
+                * st->bucket.  See tcp_seek_last_pos().
+                */
+               st->offset++;
+               sock_put(iter->batch[iter->cur_sk++]);
+       }
+
+       if (iter->cur_sk < iter->end_sk)
+               sk = iter->batch[iter->cur_sk];
+       else
+               sk = bpf_iter_tcp_batch(seq);
+
+       ++*pos;
+       /* Keeping st->last_pos consistent in tcp_iter_state.
+        * bpf iter does not do lseek, so st->last_pos always equals to *pos.
+        */
+       st->last_pos = *pos;
+       return sk;
+}
+
 static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
 {
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
        struct sock *sk = v;
+       bool slow;
        uid_t uid;
+       int ret;
 
        if (v == SEQ_START_TOKEN)
                return 0;
 
+       if (sk_fullsock(sk))
+               slow = lock_sock_fast(sk);
+
+       if (unlikely(sk_unhashed(sk))) {
+               ret = SEQ_SKIP;
+               goto unlock;
+       }
+
        if (sk->sk_state == TCP_TIME_WAIT) {
                uid = 0;
        } else if (sk->sk_state == TCP_NEW_SYN_RECV) {
@@ -2728,11 +2925,18 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
 
        meta.seq = seq;
        prog = bpf_iter_get_info(&meta, false);
-       return tcp_prog_seq_show(prog, &meta, v, uid);
+       ret = tcp_prog_seq_show(prog, &meta, v, uid);
+
+unlock:
+       if (sk_fullsock(sk))
+               unlock_sock_fast(sk, slow);
+       return ret;
+
 }
 
 static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
 {
+       struct bpf_tcp_iter_state *iter = seq->private;
        struct bpf_iter_meta meta;
        struct bpf_prog *prog;
 
@@ -2743,17 +2947,34 @@ static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
                        (void)tcp_prog_seq_show(prog, &meta, v, 0);
        }
 
-       tcp_seq_stop(seq, v);
+       if (iter->cur_sk < iter->end_sk) {
+               bpf_iter_tcp_put_batch(iter);
+               iter->st_bucket_done = false;
+       }
 }
 
 static const struct seq_operations bpf_iter_tcp_seq_ops = {
        .show           = bpf_iter_tcp_seq_show,
-       .start          = tcp_seq_start,
-       .next           = tcp_seq_next,
+       .start          = bpf_iter_tcp_seq_start,
+       .next           = bpf_iter_tcp_seq_next,
        .stop           = bpf_iter_tcp_seq_stop,
 };
+#endif
+static unsigned short seq_file_family(const struct seq_file *seq)
+{
+       const struct tcp_seq_afinfo *afinfo;
+
+#ifdef CONFIG_BPF_SYSCALL
+       /* Iterated from bpf_iter.  Let the bpf prog to filter instead. */
+       if (seq->op == &bpf_iter_tcp_seq_ops)
+               return AF_UNSPEC;
 #endif
 
+       /* Iterated from proc fs */
+       afinfo = PDE_DATA(file_inode(seq->file));
+       return afinfo->family;
+}
+
 static const struct seq_operations tcp4_seq_ops = {
        .show           = tcp4_seq_show,
        .start          = tcp_seq_start,
@@ -2964,7 +3185,6 @@ static int __net_init tcp_sk_init(struct net *net)
        net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
        net->ipv4.sysctl_tcp_comp_sack_nr = 44;
        net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
-       spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
        net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
        atomic_set(&net->ipv4.tfo_active_disable_times, 0);
 
@@ -3003,39 +3223,55 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
 DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
                     struct sock_common *sk_common, uid_t uid)
 
+#define INIT_BATCH_SZ 16
+
 static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
 {
-       struct tcp_iter_state *st = priv_data;
-       struct tcp_seq_afinfo *afinfo;
-       int ret;
+       struct bpf_tcp_iter_state *iter = priv_data;
+       int err;
 
-       afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
-       if (!afinfo)
-               return -ENOMEM;
+       err = bpf_iter_init_seq_net(priv_data, aux);
+       if (err)
+               return err;
 
-       afinfo->family = AF_UNSPEC;
-       st->bpf_seq_afinfo = afinfo;
-       ret = bpf_iter_init_seq_net(priv_data, aux);
-       if (ret)
-               kfree(afinfo);
-       return ret;
+       err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ);
+       if (err) {
+               bpf_iter_fini_seq_net(priv_data);
+               return err;
+       }
+
+       return 0;
 }
 
 static void bpf_iter_fini_tcp(void *priv_data)
 {
-       struct tcp_iter_state *st = priv_data;
+       struct bpf_tcp_iter_state *iter = priv_data;
 
-       kfree(st->bpf_seq_afinfo);
        bpf_iter_fini_seq_net(priv_data);
+       kvfree(iter->batch);
 }
 
 static const struct bpf_iter_seq_info tcp_seq_info = {
        .seq_ops                = &bpf_iter_tcp_seq_ops,
        .init_seq_private       = bpf_iter_init_tcp,
        .fini_seq_private       = bpf_iter_fini_tcp,
-       .seq_priv_size          = sizeof(struct tcp_iter_state),
+       .seq_priv_size          = sizeof(struct bpf_tcp_iter_state),
 };
 
+static const struct bpf_func_proto *
+bpf_iter_tcp_get_func_proto(enum bpf_func_id func_id,
+                           const struct bpf_prog *prog)
+{
+       switch (func_id) {
+       case BPF_FUNC_setsockopt:
+               return &bpf_sk_setsockopt_proto;
+       case BPF_FUNC_getsockopt:
+               return &bpf_sk_getsockopt_proto;
+       default:
+               return NULL;
+       }
+}
+
 static struct bpf_iter_reg tcp_reg_info = {
        .target                 = "tcp",
        .ctx_arg_info_size      = 1,
@@ -3043,6 +3279,7 @@ static struct bpf_iter_reg tcp_reg_info = {
                { offsetof(struct bpf_iter__tcp, sk_common),
                  PTR_TO_BTF_ID_OR_NULL },
        },
+       .get_func_proto         = bpf_iter_tcp_get_func_proto,
        .seq_info               = &tcp_seq_info,
 };
 
index 29553fc..6d72f3e 100644 (file)
@@ -3373,7 +3373,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
        sk_memory_allocated_add(sk, amt);
 
        if (mem_cgroup_sockets_enabled && sk->sk_memcg)
-               mem_cgroup_charge_skmem(sk->sk_memcg, amt);
+               mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+                                       gfp_memcg_charge() | __GFP_NOFAIL);
 }
 
 /* Send a FIN. The caller locks the socket for us.
index 6f1b4ac..fd113f6 100644 (file)
@@ -172,7 +172,8 @@ void tcp_rack_reo_timeout(struct sock *sk)
 
 /* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
  *
- * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded
+ * If a DSACK is received that seems like it may have been due to reordering
+ * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded
  * by srtt), since there is possibility that spurious retransmission was
  * due to reordering delay longer than reo_wnd.
  *
index 1a742b7..8851c94 100644 (file)
@@ -1143,7 +1143,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                rcu_read_unlock();
        }
 
-       if (cgroup_bpf_enabled(BPF_CGROUP_UDP4_SENDMSG) && !connected) {
+       if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
                err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
                                            (struct sockaddr *)usin, &ipc.addr);
                if (err)
index 9f5a5cd..7a1d5f4 100644 (file)
@@ -112,7 +112,6 @@ static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS];
 static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
 {
        *prot        = *base;
-       prot->unhash = sock_map_unhash;
        prot->close  = sock_map_close;
        prot->recvmsg = udp_bpf_recvmsg;
 }
index 1380a6b..86d32a1 100644 (file)
@@ -152,8 +152,8 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
                                       netdev_features_t features,
                                       bool is_ipv6)
 {
+       const struct net_offload __rcu **offloads;
        __be16 protocol = skb->protocol;
-       const struct net_offload **offloads;
        const struct net_offload *ops;
        struct sk_buff *segs = ERR_PTR(-EINVAL);
        struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
index 747f56e..e504204 100644 (file)
@@ -328,4 +328,15 @@ config IPV6_RPL_LWTUNNEL
 
          If unsure, say N.
 
+config IPV6_IOAM6_LWTUNNEL
+       bool "IPv6: IOAM Pre-allocated Trace insertion support"
+       depends on IPV6
+       select LWTUNNEL
+       help
+         Support for the inline insertion of IOAM Pre-allocated
+         Trace Header (only on locally generated packets), using
+         the lightweight tunnels mechanism.
+
+         If unsure, say N.
+
 endif # IPV6
index cf7b47b..1bc7e14 100644 (file)
@@ -10,7 +10,7 @@ ipv6-objs :=  af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
                route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
                raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
                exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
-               udp_offload.o seg6.o fib6_notifier.o rpl.o
+               udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o
 
 ipv6-offload :=        ip6_offload.o tcpv6_offload.o exthdrs_offload.o
 
@@ -27,6 +27,7 @@ ipv6-$(CONFIG_NETLABEL) += calipso.o
 ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
 ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
 ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o
+ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o
 
 ipv6-objs += $(ipv6-y)
 
index 3bf685f..17756f3 100644 (file)
@@ -89,6 +89,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/export.h>
+#include <linux/ioam6.h>
 
 #define        INFINITY_LIFE_TIME      0xFFFFFFFF
 
@@ -237,6 +238,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
        .addr_gen_mode          = IN6_ADDR_GEN_MODE_EUI64,
        .disable_policy         = 0,
        .rpl_seg_enabled        = 0,
+       .ioam6_enabled          = 0,
+       .ioam6_id               = IOAM6_DEFAULT_IF_ID,
+       .ioam6_id_wide          = IOAM6_DEFAULT_IF_ID_WIDE,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -293,6 +297,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
        .addr_gen_mode          = IN6_ADDR_GEN_MODE_EUI64,
        .disable_policy         = 0,
        .rpl_seg_enabled        = 0,
+       .ioam6_enabled          = 0,
+       .ioam6_id               = IOAM6_DEFAULT_IF_ID,
+       .ioam6_id_wide          = IOAM6_DEFAULT_IF_ID_WIDE,
 };
 
 /* Check if link is ready: is it up and is a valid qdisc available */
@@ -387,6 +394,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
                ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
 
        ndev->cnf.mtu6 = dev->mtu;
+       ndev->ra_mtu = 0;
        ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
        if (!ndev->nd_parms) {
                kfree(ndev);
@@ -694,8 +702,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
 errout:
        if (in6_dev)
                in6_dev_put(in6_dev);
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
        return err;
 }
 
@@ -1080,7 +1087,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
                        goto out;
        }
 
-       ifa = kzalloc(sizeof(*ifa), gfp_flags);
+       ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT);
        if (!ifa) {
                err = -ENOBUFS;
                goto out;
@@ -3843,6 +3850,7 @@ restart:
        }
 
        idev->tstamp = jiffies;
+       idev->ra_mtu = 0;
 
        /* Last: Shot the device (if unregistered) */
        if (unregister) {
@@ -5211,8 +5219,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
                .netnsid = -1,
                .type = type,
        };
-       struct net *net = sock_net(skb->sk);
-       struct net *tgt_net = net;
+       struct net *tgt_net = sock_net(skb->sk);
        int idx, s_idx, s_ip_idx;
        int h, s_h;
        struct net_device *dev;
@@ -5351,7 +5358,7 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
 static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                             struct netlink_ext_ack *extack)
 {
-       struct net *net = sock_net(in_skb->sk);
+       struct net *tgt_net = sock_net(in_skb->sk);
        struct inet6_fill_args fillargs = {
                .portid = NETLINK_CB(in_skb).portid,
                .seq = nlh->nlmsg_seq,
@@ -5359,7 +5366,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                .flags = 0,
                .netnsid = -1,
        };
-       struct net *tgt_net = net;
        struct ifaddrmsg *ifm;
        struct nlattr *tb[IFA_MAX+1];
        struct in6_addr *addr = NULL, *peer;
@@ -5412,8 +5418,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 errout_ifa:
        in6_ifa_put(ifa);
 errout:
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
        if (fillargs.netnsid >= 0)
                put_net(tgt_net);
 
@@ -5526,6 +5531,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
        array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
        array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
        array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
+       array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
+       array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
+       array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5537,6 +5545,7 @@ static inline size_t inet6_ifla6_size(void)
             + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
             + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */
             + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */
+            + nla_total_size(4) /* IFLA_INET6_RA_MTU */
             + 0;
 }
 
@@ -5645,6 +5654,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
        if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
                goto nla_put_failure;
 
+       if (idev->ra_mtu &&
+           nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu))
+               goto nla_put_failure;
+
        return 0;
 
 nla_put_failure:
@@ -5761,6 +5774,9 @@ update_lft:
 static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = {
        [IFLA_INET6_ADDR_GEN_MODE]      = { .type = NLA_U8 },
        [IFLA_INET6_TOKEN]              = { .len = sizeof(struct in6_addr) },
+       [IFLA_INET6_RA_MTU]             = { .type = NLA_REJECT,
+                                           .reject_message =
+                                               "IFLA_INET6_RA_MTU can not be set" },
 };
 
 static int check_addr_gen_mode(int mode)
@@ -5784,7 +5800,8 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net,
 }
 
 static int inet6_validate_link_af(const struct net_device *dev,
-                                 const struct nlattr *nla)
+                                 const struct nlattr *nla,
+                                 struct netlink_ext_ack *extack)
 {
        struct nlattr *tb[IFLA_INET6_MAX + 1];
        struct inet6_dev *idev = NULL;
@@ -5797,7 +5814,7 @@ static int inet6_validate_link_af(const struct net_device *dev,
        }
 
        err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla,
-                                         inet6_af_policy, NULL);
+                                         inet6_af_policy, extack);
        if (err)
                return err;
 
@@ -6540,6 +6557,7 @@ static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
 
 static int minus_one = -1;
 static const int two_five_five = 255;
+static u32 ioam6_if_id_max = U16_MAX;
 
 static const struct ctl_table addrconf_sysctl[] = {
        {
@@ -6932,6 +6950,31 @@ static const struct ctl_table addrconf_sysctl[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "ioam6_enabled",
+               .data           = &ipv6_devconf.ioam6_enabled,
+               .maxlen         = sizeof(u8),
+               .mode           = 0644,
+               .proc_handler   = proc_dou8vec_minmax,
+               .extra1         = (void *)SYSCTL_ZERO,
+               .extra2         = (void *)SYSCTL_ONE,
+       },
+       {
+               .procname       = "ioam6_id",
+               .data           = &ipv6_devconf.ioam6_id,
+               .maxlen         = sizeof(u32),
+               .mode           = 0644,
+               .proc_handler   = proc_douintvec_minmax,
+               .extra1         = (void *)SYSCTL_ZERO,
+               .extra2         = (void *)&ioam6_if_id_max,
+       },
+       {
+               .procname       = "ioam6_id_wide",
+               .data           = &ipv6_devconf.ioam6_id_wide,
+               .maxlen         = sizeof(u32),
+               .mode           = 0644,
+               .proc_handler   = proc_douintvec,
+       },
        {
                /* sentinel */
        }
index 2389ff7..b5878bb 100644 (file)
@@ -62,6 +62,7 @@
 #include <net/rpl.h>
 #include <net/compat.h>
 #include <net/xfrm.h>
+#include <net/ioam6.h>
 
 #include <linux/uaccess.h>
 #include <linux/mroute6.h>
@@ -454,7 +455,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         * changes context in a wrong way it will be caught.
         */
        err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
-                                                BPF_CGROUP_INET6_BIND, &flags);
+                                                CGROUP_INET6_BIND, &flags);
        if (err)
                return err;
 
@@ -531,7 +532,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
                if (np->sndflow)
                        sin->sin6_flowinfo = np->flow_label;
                BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-                                           BPF_CGROUP_INET6_GETPEERNAME,
+                                           CGROUP_INET6_GETPEERNAME,
                                            NULL);
        } else {
                if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -540,7 +541,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
                        sin->sin6_addr = sk->sk_v6_rcv_saddr;
                sin->sin6_port = inet->inet_sport;
                BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-                                           BPF_CGROUP_INET6_GETSOCKNAME,
+                                           CGROUP_INET6_GETSOCKNAME,
                                            NULL);
        }
        sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
@@ -961,6 +962,9 @@ static int __net_init inet6_net_init(struct net *net)
        net->ipv6.sysctl.fib_notify_on_flag_change = 0;
        atomic_set(&net->ipv6.fib6_sernum, 1);
 
+       net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID;
+       net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE;
+
        err = ipv6_init_mibs(net);
        if (err)
                return err;
@@ -1191,6 +1195,10 @@ static int __init inet6_init(void)
        if (err)
                goto rpl_fail;
 
+       err = ioam6_init();
+       if (err)
+               goto ioam6_fail;
+
        err = igmp6_late_init();
        if (err)
                goto igmp6_late_err;
@@ -1213,6 +1221,8 @@ sysctl_fail:
        igmp6_late_cleanup();
 #endif
 igmp6_late_err:
+       ioam6_exit();
+ioam6_fail:
        rpl_exit();
 rpl_fail:
        seg6_exit();
index 26882e1..3a871a0 100644 (file)
 #include <net/seg6_hmac.h>
 #endif
 #include <net/rpl.h>
+#include <linux/ioam6.h>
+#include <net/ioam6.h>
+#include <net/dst_metadata.h>
 
 #include <linux/uaccess.h>
 
-/*
- *     Parsing tlv encoded headers.
- *
- *     Parsing function "func" returns true, if parsing succeed
- *     and false, if it failed.
- *     It MUST NOT touch skb->h.
- */
-
-struct tlvtype_proc {
-       int     type;
-       bool    (*func)(struct sk_buff *skb, int offset);
-};
-
 /*********************
   Generic functions
  *********************/
@@ -109,16 +99,23 @@ drop:
        return false;
 }
 
+static bool ipv6_hop_ra(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static bool ipv6_dest_hao(struct sk_buff *skb, int optoff);
+#endif
+
 /* Parse tlv encoded option header (hop-by-hop or destination) */
 
-static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
+static bool ip6_parse_tlv(bool hopbyhop,
                          struct sk_buff *skb,
                          int max_count)
 {
        int len = (skb_transport_header(skb)[1] + 1) << 3;
        const unsigned char *nh = skb_network_header(skb);
        int off = skb_network_header_len(skb);
-       const struct tlvtype_proc *curr;
        bool disallow_unknowns = false;
        int tlv_count = 0;
        int padlen = 0;
@@ -173,20 +170,45 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
                        if (tlv_count > max_count)
                                goto bad;
 
-                       for (curr = procs; curr->type >= 0; curr++) {
-                               if (curr->type == nh[off]) {
-                                       /* type specific length/alignment
-                                          checks will be performed in the
-                                          func(). */
-                                       if (curr->func(skb, off) == false)
+                       if (hopbyhop) {
+                               switch (nh[off]) {
+                               case IPV6_TLV_ROUTERALERT:
+                                       if (!ipv6_hop_ra(skb, off))
+                                               return false;
+                                       break;
+                               case IPV6_TLV_IOAM:
+                                       if (!ipv6_hop_ioam(skb, off))
+                                               return false;
+                                       break;
+                               case IPV6_TLV_JUMBO:
+                                       if (!ipv6_hop_jumbo(skb, off))
+                                               return false;
+                                       break;
+                               case IPV6_TLV_CALIPSO:
+                                       if (!ipv6_hop_calipso(skb, off))
+                                               return false;
+                                       break;
+                               default:
+                                       if (!ip6_tlvopt_unknown(skb, off,
+                                                               disallow_unknowns))
+                                               return false;
+                                       break;
+                               }
+                       } else {
+                               switch (nh[off]) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+                               case IPV6_TLV_HAO:
+                                       if (!ipv6_dest_hao(skb, off))
+                                               return false;
+                                       break;
+#endif
+                               default:
+                                       if (!ip6_tlvopt_unknown(skb, off,
+                                                               disallow_unknowns))
                                                return false;
                                        break;
                                }
                        }
-                       if (curr->type < 0 &&
-                           !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
-                               return false;
-
                        padlen = 0;
                }
                off += optlen;
@@ -264,16 +286,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
 }
 #endif
 
-static const struct tlvtype_proc tlvprocdestopt_lst[] = {
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
-       {
-               .type   = IPV6_TLV_HAO,
-               .func   = ipv6_dest_hao,
-       },
-#endif
-       {-1,                    NULL}
-};
-
 static int ipv6_destopt_rcv(struct sk_buff *skb)
 {
        struct inet6_dev *idev = __in6_dev_get(skb->dev);
@@ -304,8 +316,7 @@ fail_and_free:
        dstbuf = opt->dst1;
 #endif
 
-       if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
-                         net->ipv6.sysctl.max_dst_opts_cnt)) {
+       if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) {
                skb->transport_header += extlen;
                opt = IP6CB(skb);
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
@@ -928,6 +939,60 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
        return false;
 }
 
+/* IOAM */
+
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
+{
+       struct ioam6_trace_hdr *trace;
+       struct ioam6_namespace *ns;
+       struct ioam6_hdr *hdr;
+
+       /* Bad alignment (must be 4n-aligned) */
+       if (optoff & 3)
+               goto drop;
+
+       /* Ignore if IOAM is not enabled on ingress */
+       if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled)
+               goto ignore;
+
+       /* Truncated Option header */
+       hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff);
+       if (hdr->opt_len < 2)
+               goto drop;
+
+       switch (hdr->type) {
+       case IOAM6_TYPE_PREALLOC:
+               /* Truncated Pre-allocated Trace header */
+               if (hdr->opt_len < 2 + sizeof(*trace))
+                       goto drop;
+
+               /* Malformed Pre-allocated Trace header */
+               trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr));
+               if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4)
+                       goto drop;
+
+               /* Ignore if the IOAM namespace is unknown */
+               ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id);
+               if (!ns)
+                       goto ignore;
+
+               if (!skb_valid_dst(skb))
+                       ip6_route_input(skb);
+
+               ioam6_fill_trace_data(skb, ns, trace);
+               break;
+       default:
+               break;
+       }
+
+ignore:
+       return true;
+
+drop:
+       kfree_skb(skb);
+       return false;
+}
+
 /* Jumbo payload */
 
 static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
@@ -994,22 +1059,6 @@ drop:
        return false;
 }
 
-static const struct tlvtype_proc tlvprochopopt_lst[] = {
-       {
-               .type   = IPV6_TLV_ROUTERALERT,
-               .func   = ipv6_hop_ra,
-       },
-       {
-               .type   = IPV6_TLV_JUMBO,
-               .func   = ipv6_hop_jumbo,
-       },
-       {
-               .type   = IPV6_TLV_CALIPSO,
-               .func   = ipv6_hop_calipso,
-       },
-       { -1, }
-};
-
 int ipv6_parse_hopopts(struct sk_buff *skb)
 {
        struct inet6_skb_parm *opt = IP6CB(skb);
@@ -1035,8 +1084,7 @@ fail_and_free:
                goto fail_and_free;
 
        opt->flags |= IP6SKB_HOPBYHOP;
-       if (ip6_parse_tlv(tlvprochopopt_lst, skb,
-                         net->ipv6.sysctl.max_hbh_opts_cnt)) {
+       if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) {
                skb->transport_header += extlen;
                opt = IP6CB(skb);
                opt->nhoff = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
new file mode 100644 (file)
index 0000000..5e89610
--- /dev/null
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ *  IPv6 IOAM implementation
+ *
+ *  Author:
+ *  Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
+#include <linux/rhashtable.h>
+
+#include <net/addrconf.h>
+#include <net/genetlink.h>
+#include <net/ioam6.h>
+
+static void ioam6_ns_release(struct ioam6_namespace *ns)
+{
+       kfree_rcu(ns, rcu);
+}
+
+static void ioam6_sc_release(struct ioam6_schema *sc)
+{
+       kfree_rcu(sc, rcu);
+}
+
+static void ioam6_free_ns(void *ptr, void *arg)
+{
+       struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr;
+
+       if (ns)
+               ioam6_ns_release(ns);
+}
+
+static void ioam6_free_sc(void *ptr, void *arg)
+{
+       struct ioam6_schema *sc = (struct ioam6_schema *)ptr;
+
+       if (sc)
+               ioam6_sc_release(sc);
+}
+
+static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+       const struct ioam6_namespace *ns = obj;
+
+       return (ns->id != *(__be16 *)arg->key);
+}
+
+static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+       const struct ioam6_schema *sc = obj;
+
+       return (sc->id != *(u32 *)arg->key);
+}
+
+static const struct rhashtable_params rht_ns_params = {
+       .key_len                = sizeof(__be16),
+       .key_offset             = offsetof(struct ioam6_namespace, id),
+       .head_offset            = offsetof(struct ioam6_namespace, head),
+       .automatic_shrinking    = true,
+       .obj_cmpfn              = ioam6_ns_cmpfn,
+};
+
+static const struct rhashtable_params rht_sc_params = {
+       .key_len                = sizeof(u32),
+       .key_offset             = offsetof(struct ioam6_schema, id),
+       .head_offset            = offsetof(struct ioam6_schema, head),
+       .automatic_shrinking    = true,
+       .obj_cmpfn              = ioam6_sc_cmpfn,
+};
+
+static struct genl_family ioam6_genl_family;
+
+static const struct nla_policy ioam6_genl_policy_addns[] = {
+       [IOAM6_ATTR_NS_ID]      = { .type = NLA_U16 },
+       [IOAM6_ATTR_NS_DATA]    = { .type = NLA_U32 },
+       [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 },
+};
+
+static const struct nla_policy ioam6_genl_policy_delns[] = {
+       [IOAM6_ATTR_NS_ID]      = { .type = NLA_U16 },
+};
+
+static const struct nla_policy ioam6_genl_policy_addsc[] = {
+       [IOAM6_ATTR_SC_ID]      = { .type = NLA_U32 },
+       [IOAM6_ATTR_SC_DATA]    = { .type = NLA_BINARY,
+                                   .len = IOAM6_MAX_SCHEMA_DATA_LEN },
+};
+
+static const struct nla_policy ioam6_genl_policy_delsc[] = {
+       [IOAM6_ATTR_SC_ID]      = { .type = NLA_U32 },
+};
+
+static const struct nla_policy ioam6_genl_policy_ns_sc[] = {
+       [IOAM6_ATTR_NS_ID]      = { .type = NLA_U16 },
+       [IOAM6_ATTR_SC_ID]      = { .type = NLA_U32 },
+       [IOAM6_ATTR_SC_NONE]    = { .type = NLA_FLAG },
+};
+
+static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info)
+{
+       struct ioam6_pernet_data *nsdata;
+       struct ioam6_namespace *ns;
+       u64 data64;
+       u32 data32;
+       __be16 id;
+       int err;
+
+       if (!info->attrs[IOAM6_ATTR_NS_ID])
+               return -EINVAL;
+
+       id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+       nsdata = ioam6_pernet(genl_info_net(info));
+
+       mutex_lock(&nsdata->lock);
+
+       ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+       if (ns) {
+               err = -EEXIST;
+               goto out_unlock;
+       }
+
+       ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+       if (!ns) {
+               err = -ENOMEM;
+               goto out_unlock;
+       }
+
+       ns->id = id;
+
+       if (!info->attrs[IOAM6_ATTR_NS_DATA])
+               data32 = IOAM6_U32_UNAVAILABLE;
+       else
+               data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]);
+
+       if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE])
+               data64 = IOAM6_U64_UNAVAILABLE;
+       else
+               data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]);
+
+       ns->data = cpu_to_be32(data32);
+       ns->data_wide = cpu_to_be64(data64);
+
+       err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head,
+                                           rht_ns_params);
+       if (err)
+               kfree(ns);
+
+out_unlock:
+       mutex_unlock(&nsdata->lock);
+       return err;
+}
+
+static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info)
+{
+       struct ioam6_pernet_data *nsdata;
+       struct ioam6_namespace *ns;
+       struct ioam6_schema *sc;
+       __be16 id;
+       int err;
+
+       if (!info->attrs[IOAM6_ATTR_NS_ID])
+               return -EINVAL;
+
+       id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+       nsdata = ioam6_pernet(genl_info_net(info));
+
+       mutex_lock(&nsdata->lock);
+
+       ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+       if (!ns) {
+               err = -ENOENT;
+               goto out_unlock;
+       }
+
+       sc = rcu_dereference_protected(ns->schema,
+                                      lockdep_is_held(&nsdata->lock));
+
+       err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head,
+                                    rht_ns_params);
+       if (err)
+               goto out_unlock;
+
+       if (sc)
+               rcu_assign_pointer(sc->ns, NULL);
+
+       ioam6_ns_release(ns);
+
+out_unlock:
+       mutex_unlock(&nsdata->lock);
+       return err;
+}
+
+static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns,
+                                      u32 portid,
+                                      u32 seq,
+                                      u32 flags,
+                                      struct sk_buff *skb,
+                                      u8 cmd)
+{
+       struct ioam6_schema *sc;
+       u64 data64;
+       u32 data32;
+       void *hdr;
+
+       hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+       if (!hdr)
+               return -ENOMEM;
+
+       data32 = be32_to_cpu(ns->data);
+       data64 = be64_to_cpu(ns->data_wide);
+
+       if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) ||
+           (data32 != IOAM6_U32_UNAVAILABLE &&
+            nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) ||
+           (data64 != IOAM6_U64_UNAVAILABLE &&
+            nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE,
+                              data64, IOAM6_ATTR_PAD)))
+               goto nla_put_failure;
+
+       rcu_read_lock();
+
+       sc = rcu_dereference(ns->schema);
+       if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) {
+               rcu_read_unlock();
+               goto nla_put_failure;
+       }
+
+       rcu_read_unlock();
+
+       genlmsg_end(skb, hdr);
+       return 0;
+
+nla_put_failure:
+       genlmsg_cancel(skb, hdr);
+       return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpns_start(struct netlink_callback *cb)
+{
+       struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+       struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+       if (!iter) {
+               iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+               if (!iter)
+                       return -ENOMEM;
+
+               cb->args[0] = (long)iter;
+       }
+
+       rhashtable_walk_enter(&nsdata->namespaces, iter);
+
+       return 0;
+}
+
+static int ioam6_genl_dumpns_done(struct netlink_callback *cb)
+{
+       struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+       rhashtable_walk_exit(iter);
+       kfree(iter);
+
+       return 0;
+}
+
+static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct rhashtable_iter *iter;
+       struct ioam6_namespace *ns;
+       int err;
+
+       iter = (struct rhashtable_iter *)cb->args[0];
+       rhashtable_walk_start(iter);
+
+       for (;;) {
+               ns = rhashtable_walk_next(iter);
+
+               if (IS_ERR(ns)) {
+                       if (PTR_ERR(ns) == -EAGAIN)
+                               continue;
+                       err = PTR_ERR(ns);
+                       goto done;
+               } else if (!ns) {
+                       break;
+               }
+
+               err = __ioam6_genl_dumpns_element(ns,
+                                                 NETLINK_CB(cb->skb).portid,
+                                                 cb->nlh->nlmsg_seq,
+                                                 NLM_F_MULTI,
+                                                 skb,
+                                                 IOAM6_CMD_DUMP_NAMESPACES);
+               if (err)
+                       goto done;
+       }
+
+       err = skb->len;
+
+done:
+       rhashtable_walk_stop(iter);
+       return err;
+}
+
+static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info)
+{
+       struct ioam6_pernet_data *nsdata;
+       int len, len_aligned, err;
+       struct ioam6_schema *sc;
+       u32 id;
+
+       if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA])
+               return -EINVAL;
+
+       id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+       nsdata = ioam6_pernet(genl_info_net(info));
+
+       mutex_lock(&nsdata->lock);
+
+       sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+       if (sc) {
+               err = -EEXIST;
+               goto out_unlock;
+       }
+
+       len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]);
+       len_aligned = ALIGN(len, 4);
+
+       sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL);
+       if (!sc) {
+               err = -ENOMEM;
+               goto out_unlock;
+       }
+
+       sc->id = id;
+       sc->len = len_aligned;
+       sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24));
+       nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len);
+
+       err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head,
+                                           rht_sc_params);
+       if (err)
+               goto free_sc;
+
+out_unlock:
+       mutex_unlock(&nsdata->lock);
+       return err;
+free_sc:
+       kfree(sc);
+       goto out_unlock;
+}
+
+static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info)
+{
+       struct ioam6_pernet_data *nsdata;
+       struct ioam6_namespace *ns;
+       struct ioam6_schema *sc;
+       int err;
+       u32 id;
+
+       if (!info->attrs[IOAM6_ATTR_SC_ID])
+               return -EINVAL;
+
+       id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+       nsdata = ioam6_pernet(genl_info_net(info));
+
+       mutex_lock(&nsdata->lock);
+
+       sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+       if (!sc) {
+               err = -ENOENT;
+               goto out_unlock;
+       }
+
+       ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock));
+
+       err = rhashtable_remove_fast(&nsdata->schemas, &sc->head,
+                                    rht_sc_params);
+       if (err)
+               goto out_unlock;
+
+       if (ns)
+               rcu_assign_pointer(ns->schema, NULL);
+
+       ioam6_sc_release(sc);
+
+out_unlock:
+       mutex_unlock(&nsdata->lock);
+       return err;
+}
+
+static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc,
+                                      u32 portid, u32 seq, u32 flags,
+                                      struct sk_buff *skb, u8 cmd)
+{
+       struct ioam6_namespace *ns;
+       void *hdr;
+
+       hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+       if (!hdr)
+               return -ENOMEM;
+
+       if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) ||
+           nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data))
+               goto nla_put_failure;
+
+       rcu_read_lock();
+
+       ns = rcu_dereference(sc->ns);
+       if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) {
+               rcu_read_unlock();
+               goto nla_put_failure;
+       }
+
+       rcu_read_unlock();
+
+       genlmsg_end(skb, hdr);
+       return 0;
+
+nla_put_failure:
+       genlmsg_cancel(skb, hdr);
+       return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpsc_start(struct netlink_callback *cb)
+{
+       struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+       struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+       if (!iter) {
+               iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+               if (!iter)
+                       return -ENOMEM;
+
+               cb->args[0] = (long)iter;
+       }
+
+       rhashtable_walk_enter(&nsdata->schemas, iter);
+
+       return 0;
+}
+
+static int ioam6_genl_dumpsc_done(struct netlink_callback *cb)
+{
+       struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+       rhashtable_walk_exit(iter);
+       kfree(iter);
+
+       return 0;
+}
+
+static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct rhashtable_iter *iter;
+       struct ioam6_schema *sc;
+       int err;
+
+       iter = (struct rhashtable_iter *)cb->args[0];
+       rhashtable_walk_start(iter);
+
+       for (;;) {
+               sc = rhashtable_walk_next(iter);
+
+               if (IS_ERR(sc)) {
+                       if (PTR_ERR(sc) == -EAGAIN)
+                               continue;
+                       err = PTR_ERR(sc);
+                       goto done;
+               } else if (!sc) {
+                       break;
+               }
+
+               err = __ioam6_genl_dumpsc_element(sc,
+                                                 NETLINK_CB(cb->skb).portid,
+                                                 cb->nlh->nlmsg_seq,
+                                                 NLM_F_MULTI,
+                                                 skb,
+                                                 IOAM6_CMD_DUMP_SCHEMAS);
+               if (err)
+                       goto done;
+       }
+
+       err = skb->len;
+
+done:
+       rhashtable_walk_stop(iter);
+       return err;
+}
+
+static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info)
+{
+       struct ioam6_namespace *ns, *ns_ref;
+       struct ioam6_schema *sc, *sc_ref;
+       struct ioam6_pernet_data *nsdata;
+       __be16 ns_id;
+       u32 sc_id;
+       int err;
+
+       if (!info->attrs[IOAM6_ATTR_NS_ID] ||
+           (!info->attrs[IOAM6_ATTR_SC_ID] &&
+            !info->attrs[IOAM6_ATTR_SC_NONE]))
+               return -EINVAL;
+
+       ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+       nsdata = ioam6_pernet(genl_info_net(info));
+
+       mutex_lock(&nsdata->lock);
+
+       ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params);
+       if (!ns) {
+               err = -ENOENT;
+               goto out_unlock;
+       }
+
+       if (info->attrs[IOAM6_ATTR_SC_NONE]) {
+               sc = NULL;
+       } else {
+               sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+               sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id,
+                                           rht_sc_params);
+               if (!sc) {
+                       err = -ENOENT;
+                       goto out_unlock;
+               }
+       }
+
+       sc_ref = rcu_dereference_protected(ns->schema,
+                                          lockdep_is_held(&nsdata->lock));
+       if (sc_ref)
+               rcu_assign_pointer(sc_ref->ns, NULL);
+       rcu_assign_pointer(ns->schema, sc);
+
+       if (sc) {
+               ns_ref = rcu_dereference_protected(sc->ns,
+                                                  lockdep_is_held(&nsdata->lock));
+               if (ns_ref)
+                       rcu_assign_pointer(ns_ref->schema, NULL);
+               rcu_assign_pointer(sc->ns, ns);
+       }
+
+       err = 0;
+
+out_unlock:
+       mutex_unlock(&nsdata->lock);
+       return err;
+}
+
+static const struct genl_ops ioam6_genl_ops[] = {
+       {
+               .cmd    = IOAM6_CMD_ADD_NAMESPACE,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .doit   = ioam6_genl_addns,
+               .flags  = GENL_ADMIN_PERM,
+               .policy = ioam6_genl_policy_addns,
+               .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1,
+       },
+       {
+               .cmd    = IOAM6_CMD_DEL_NAMESPACE,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .doit   = ioam6_genl_delns,
+               .flags  = GENL_ADMIN_PERM,
+               .policy = ioam6_genl_policy_delns,
+               .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1,
+       },
+       {
+               .cmd    = IOAM6_CMD_DUMP_NAMESPACES,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .start  = ioam6_genl_dumpns_start,
+               .dumpit = ioam6_genl_dumpns,
+               .done   = ioam6_genl_dumpns_done,
+               .flags  = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd    = IOAM6_CMD_ADD_SCHEMA,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .doit   = ioam6_genl_addsc,
+               .flags  = GENL_ADMIN_PERM,
+               .policy = ioam6_genl_policy_addsc,
+               .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1,
+       },
+       {
+               .cmd    = IOAM6_CMD_DEL_SCHEMA,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .doit   = ioam6_genl_delsc,
+               .flags  = GENL_ADMIN_PERM,
+               .policy = ioam6_genl_policy_delsc,
+               .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1,
+       },
+       {
+               .cmd    = IOAM6_CMD_DUMP_SCHEMAS,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .start  = ioam6_genl_dumpsc_start,
+               .dumpit = ioam6_genl_dumpsc,
+               .done   = ioam6_genl_dumpsc_done,
+               .flags  = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd    = IOAM6_CMD_NS_SET_SCHEMA,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .doit   = ioam6_genl_ns_set_schema,
+               .flags  = GENL_ADMIN_PERM,
+               .policy = ioam6_genl_policy_ns_sc,
+               .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1,
+       },
+};
+
+static struct genl_family ioam6_genl_family __ro_after_init = {
+       .name           = IOAM6_GENL_NAME,
+       .version        = IOAM6_GENL_VERSION,
+       .netnsok        = true,
+       .parallel_ops   = true,
+       .ops            = ioam6_genl_ops,
+       .n_ops          = ARRAY_SIZE(ioam6_genl_ops),
+       .module         = THIS_MODULE,
+};
+
+struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id)
+{
+       struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+       return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+}
+
+static void __ioam6_fill_trace_data(struct sk_buff *skb,
+                                   struct ioam6_namespace *ns,
+                                   struct ioam6_trace_hdr *trace,
+                                   struct ioam6_schema *sc,
+                                   u8 sclen)
+{
+       struct __kernel_sock_timeval ts;
+       u64 raw64;
+       u32 raw32;
+       u16 raw16;
+       u8 *data;
+       u8 byte;
+
+       data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4;
+
+       /* hop_lim and node_id */
+       if (trace->type.bit0) {
+               byte = ipv6_hdr(skb)->hop_limit;
+               if (skb->dev)
+                       byte--;
+
+               raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id;
+
+               *(__be32 *)data = cpu_to_be32((byte << 24) | raw32);
+               data += sizeof(__be32);
+       }
+
+       /* ingress_if_id and egress_if_id */
+       if (trace->type.bit1) {
+               if (!skb->dev)
+                       raw16 = IOAM6_U16_UNAVAILABLE;
+               else
+                       raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id;
+
+               *(__be16 *)data = cpu_to_be16(raw16);
+               data += sizeof(__be16);
+
+               if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+                       raw16 = IOAM6_U16_UNAVAILABLE;
+               else
+                       raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id;
+
+               *(__be16 *)data = cpu_to_be16(raw16);
+               data += sizeof(__be16);
+       }
+
+       /* timestamp seconds */
+       if (trace->type.bit2) {
+               if (!skb->dev) {
+                       *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+               } else {
+                       if (!skb->tstamp)
+                               __net_timestamp(skb);
+
+                       skb_get_new_timestamp(skb, &ts);
+                       *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec);
+               }
+               data += sizeof(__be32);
+       }
+
+       /* timestamp subseconds */
+       if (trace->type.bit3) {
+               if (!skb->dev) {
+                       *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+               } else {
+                       if (!skb->tstamp)
+                               __net_timestamp(skb);
+
+                       if (!trace->type.bit2)
+                               skb_get_new_timestamp(skb, &ts);
+
+                       *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec);
+               }
+               data += sizeof(__be32);
+       }
+
+       /* transit delay */
+       if (trace->type.bit4) {
+               *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+               data += sizeof(__be32);
+       }
+
+       /* namespace data */
+       if (trace->type.bit5) {
+               *(__be32 *)data = ns->data;
+               data += sizeof(__be32);
+       }
+
+       /* queue depth */
+       if (trace->type.bit6) {
+               *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+               data += sizeof(__be32);
+       }
+
+       /* checksum complement */
+       if (trace->type.bit7) {
+               *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+               data += sizeof(__be32);
+       }
+
+       /* hop_lim and node_id (wide) */
+       if (trace->type.bit8) {
+               byte = ipv6_hdr(skb)->hop_limit;
+               if (skb->dev)
+                       byte--;
+
+               raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide;
+
+               *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64);
+               data += sizeof(__be64);
+       }
+
+       /* ingress_if_id and egress_if_id (wide) */
+       if (trace->type.bit9) {
+               if (!skb->dev)
+                       raw32 = IOAM6_U32_UNAVAILABLE;
+               else
+                       raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide;
+
+               *(__be32 *)data = cpu_to_be32(raw32);
+               data += sizeof(__be32);
+
+               if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+                       raw32 = IOAM6_U32_UNAVAILABLE;
+               else
+                       raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide;
+
+               *(__be32 *)data = cpu_to_be32(raw32);
+               data += sizeof(__be32);
+       }
+
+       /* namespace data (wide) */
+       if (trace->type.bit10) {
+               *(__be64 *)data = ns->data_wide;
+               data += sizeof(__be64);
+       }
+
+       /* buffer occupancy */
+       if (trace->type.bit11) {
+               *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+               data += sizeof(__be32);
+       }
+
+       /* opaque state snapshot */
+       if (trace->type.bit22) {
+               if (!sc) {
+                       *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8);
+               } else {
+                       *(__be32 *)data = sc->hdr;
+                       data += sizeof(__be32);
+
+                       memcpy(data, sc->data, sc->len);
+               }
+       }
+}
+
+/* called with rcu_read_lock() */
+void ioam6_fill_trace_data(struct sk_buff *skb,
+                          struct ioam6_namespace *ns,
+                          struct ioam6_trace_hdr *trace)
+{
+       struct ioam6_schema *sc;
+       u8 sclen = 0;
+
+       /* Skip if Overflow flag is set OR
+        * if an unknown type (bit 12-21) is set
+        */
+       if (trace->overflow ||
+           trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
+           trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
+           trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
+           trace->type.bit21) {
+               return;
+       }
+
+       /* NodeLen does not include Opaque State Snapshot length. We need to
+        * take it into account if the corresponding bit is set (bit 22) and
+        * if the current IOAM namespace has an active schema attached to it
+        */
+       sc = rcu_dereference(ns->schema);
+       if (trace->type.bit22) {
+               sclen = sizeof_field(struct ioam6_schema, hdr) / 4;
+
+               if (sc)
+                       sclen += sc->len / 4;
+       }
+
+       /* If there is no space remaining, we set the Overflow flag and we
+        * skip without filling the trace
+        */
+       if (!trace->remlen || trace->remlen < trace->nodelen + sclen) {
+               trace->overflow = 1;
+               return;
+       }
+
+       __ioam6_fill_trace_data(skb, ns, trace, sc, sclen);
+       trace->remlen -= trace->nodelen + sclen;
+}
+
+static int __net_init ioam6_net_init(struct net *net)
+{
+       struct ioam6_pernet_data *nsdata;
+       int err = -ENOMEM;
+
+       nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL);
+       if (!nsdata)
+               goto out;
+
+       mutex_init(&nsdata->lock);
+       net->ipv6.ioam6_data = nsdata;
+
+       err = rhashtable_init(&nsdata->namespaces, &rht_ns_params);
+       if (err)
+               goto free_nsdata;
+
+       err = rhashtable_init(&nsdata->schemas, &rht_sc_params);
+       if (err)
+               goto free_rht_ns;
+
+out:
+       return err;
+free_rht_ns:
+       rhashtable_destroy(&nsdata->namespaces);
+free_nsdata:
+       kfree(nsdata);
+       net->ipv6.ioam6_data = NULL;
+       goto out;
+}
+
+static void __net_exit ioam6_net_exit(struct net *net)
+{
+       struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+       rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL);
+       rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL);
+
+       kfree(nsdata);
+}
+
+static struct pernet_operations ioam6_net_ops = {
+       .init = ioam6_net_init,
+       .exit = ioam6_net_exit,
+};
+
+int __init ioam6_init(void)
+{
+       int err = register_pernet_subsys(&ioam6_net_ops);
+       if (err)
+               goto out;
+
+       err = genl_register_family(&ioam6_genl_family);
+       if (err)
+               goto out_unregister_pernet_subsys;
+
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+       err = ioam6_iptunnel_init();
+       if (err)
+               goto out_unregister_genl;
+#endif
+
+       pr_info("In-situ OAM (IOAM) with IPv6\n");
+
+out:
+       return err;
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+out_unregister_genl:
+       genl_unregister_family(&ioam6_genl_family);
+#endif
+out_unregister_pernet_subsys:
+       unregister_pernet_subsys(&ioam6_net_ops);
+       goto out;
+}
+
+void ioam6_exit(void)
+{
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+       ioam6_iptunnel_exit();
+#endif
+       genl_unregister_family(&ioam6_genl_family);
+       unregister_pernet_subsys(&ioam6_net_ops);
+}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
new file mode 100644 (file)
index 0000000..f9ee045
--- /dev/null
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ *  IPv6 IOAM Lightweight Tunnel implementation
+ *
+ *  Author:
+ *  Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/netlink.h>
+#include <linux/in6.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_iptunnel.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/lwtunnel.h>
+#include <net/ioam6.h>
+
+#define IOAM6_MASK_SHORT_FIELDS 0xff100000
+#define IOAM6_MASK_WIDE_FIELDS 0xe00000
+
+struct ioam6_lwt_encap {
+       struct ipv6_hopopt_hdr  eh;
+       u8                      pad[2]; /* 2-octet padding for 4n-alignment */
+       struct ioam6_hdr        ioamh;
+       struct ioam6_trace_hdr  traceh;
+} __packed;
+
+struct ioam6_lwt {
+       struct ioam6_lwt_encap  tuninfo;
+};
+
+static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
+{
+       return (struct ioam6_lwt *)lwt->data;
+}
+
+static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
+{
+       return &ioam6_lwt_state(lwt)->tuninfo;
+}
+
+static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt)
+{
+       return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
+}
+
+static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
+       [IOAM6_IPTUNNEL_TRACE]  = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
+};
+
+static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype,
+                              struct ioam6_trace_hdr *trace)
+{
+       struct ioam6_trace_hdr *data;
+       struct nlattr *nla;
+       int len;
+
+       len = sizeof(*trace);
+
+       nla = nla_reserve(skb, attrtype, len);
+       if (!nla)
+               return -EMSGSIZE;
+
+       data = nla_data(nla);
+       memcpy(data, trace, len);
+
+       return 0;
+}
+
+static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
+{
+       u32 fields;
+
+       if (!trace->type_be32 || !trace->remlen ||
+           trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4)
+               return false;
+
+       trace->nodelen = 0;
+       fields = be32_to_cpu(trace->type_be32);
+
+       trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS)
+                               * (sizeof(__be32) / 4);
+       trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS)
+                               * (sizeof(__be64) / 4);
+
+       return true;
+}
+
+static int ioam6_build_state(struct net *net, struct nlattr *nla,
+                            unsigned int family, const void *cfg,
+                            struct lwtunnel_state **ts,
+                            struct netlink_ext_ack *extack)
+{
+       struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
+       struct ioam6_lwt_encap *tuninfo;
+       struct ioam6_trace_hdr *trace;
+       struct lwtunnel_state *s;
+       int len_aligned;
+       int len, err;
+
+       if (family != AF_INET6)
+               return -EINVAL;
+
+       err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla,
+                              ioam6_iptunnel_policy, extack);
+       if (err < 0)
+               return err;
+
+       if (!tb[IOAM6_IPTUNNEL_TRACE]) {
+               NL_SET_ERR_MSG(extack, "missing trace");
+               return -EINVAL;
+       }
+
+       trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]);
+       if (!ioam6_validate_trace_hdr(trace)) {
+               NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE],
+                                   "invalid trace validation");
+               return -EINVAL;
+       }
+
+       len = sizeof(*tuninfo) + trace->remlen * 4;
+       len_aligned = ALIGN(len, 8);
+
+       s = lwtunnel_state_alloc(len_aligned);
+       if (!s)
+               return -ENOMEM;
+
+       tuninfo = ioam6_lwt_info(s);
+       tuninfo->eh.hdrlen = (len_aligned >> 3) - 1;
+       tuninfo->pad[0] = IPV6_TLV_PADN;
+       tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
+       tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
+       tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace)
+                                       + trace->remlen * 4;
+
+       memcpy(&tuninfo->traceh, trace, sizeof(*trace));
+
+       len = len_aligned - len;
+       if (len == 1) {
+               tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1;
+       } else if (len > 0) {
+               tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
+               tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2;
+       }
+
+       s->type = LWTUNNEL_ENCAP_IOAM6;
+       s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+
+       *ts = s;
+
+       return 0;
+}
+
+static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo)
+{
+       struct ioam6_trace_hdr *trace;
+       struct ipv6hdr *oldhdr, *hdr;
+       struct ioam6_namespace *ns;
+       int hdrlen, err;
+
+       hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
+
+       err = skb_cow_head(skb, hdrlen + skb->mac_len);
+       if (unlikely(err))
+               return err;
+
+       oldhdr = ipv6_hdr(skb);
+       skb_pull(skb, sizeof(*oldhdr));
+       skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr));
+
+       skb_push(skb, sizeof(*oldhdr) + hdrlen);
+       skb_reset_network_header(skb);
+       skb_mac_header_rebuild(skb);
+
+       hdr = ipv6_hdr(skb);
+       memmove(hdr, oldhdr, sizeof(*oldhdr));
+       tuninfo->eh.nexthdr = hdr->nexthdr;
+
+       skb_set_transport_header(skb, sizeof(*hdr));
+       skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen);
+
+       memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
+
+       hdr->nexthdr = NEXTHDR_HOP;
+       hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
+
+       trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
+                                          + sizeof(struct ipv6_hopopt_hdr) + 2
+                                          + sizeof(struct ioam6_hdr));
+
+       ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id);
+       if (ns)
+               ioam6_fill_trace_data(skb, ns, trace);
+
+       return 0;
+}
+
+static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+       struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate;
+       int err = -EINVAL;
+
+       if (skb->protocol != htons(ETH_P_IPV6))
+               goto drop;
+
+       /* Only for packets we send and
+        * that do not contain a Hop-by-Hop yet
+        */
+       if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
+               goto out;
+
+       err = ioam6_do_inline(skb, ioam6_lwt_info(lwt));
+       if (unlikely(err))
+               goto drop;
+
+       err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev));
+       if (unlikely(err))
+               goto drop;
+
+out:
+       return lwt->orig_output(net, sk, skb);
+
+drop:
+       kfree_skb(skb);
+       return err;
+}
+
+static int ioam6_fill_encap_info(struct sk_buff *skb,
+                                struct lwtunnel_state *lwtstate)
+{
+       struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+       if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace))
+               return -EMSGSIZE;
+
+       return 0;
+}
+
+static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+       struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+       return nla_total_size(sizeof(*trace));
+}
+
+static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+       struct ioam6_trace_hdr *a_hdr = ioam6_trace(a);
+       struct ioam6_trace_hdr *b_hdr = ioam6_trace(b);
+
+       return (a_hdr->namespace_id != b_hdr->namespace_id);
+}
+
+static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
+       .build_state    = ioam6_build_state,
+       .output         = ioam6_output,
+       .fill_encap     = ioam6_fill_encap_info,
+       .get_encap_size = ioam6_encap_nlsize,
+       .cmp_encap      = ioam6_encap_cmp,
+       .owner          = THIS_MODULE,
+};
+
+int __init ioam6_iptunnel_init(void)
+{
+       return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
+
+void ioam6_iptunnel_exit(void)
+{
+       lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
index ef75c9b..1bec5b2 100644 (file)
@@ -2449,8 +2449,8 @@ int __init fib6_init(void)
        int ret = -ENOMEM;
 
        fib6_node_kmem = kmem_cache_create("fib6_nodes",
-                                          sizeof(struct fib6_node),
-                                          0, SLAB_HWCACHE_ALIGN,
+                                          sizeof(struct fib6_node), 0,
+                                          SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
                                           NULL);
        if (!fib6_node_kmem)
                goto out;
index 7a5e90e..7baf41d 100644 (file)
@@ -1246,8 +1246,9 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
        memcpy(u->name, p->name, sizeof(u->name));
 }
 
-static int ip6gre_tunnel_ioctl(struct net_device *dev,
-       struct ifreq *ifr, int cmd)
+static int ip6gre_tunnel_siocdevprivate(struct net_device *dev,
+                                       struct ifreq *ifr, void __user *data,
+                                       int cmd)
 {
        int err = 0;
        struct ip6_tnl_parm2 p;
@@ -1261,7 +1262,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
        switch (cmd) {
        case SIOCGETTUNNEL:
                if (dev == ign->fb_tunnel_dev) {
-                       if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+                       if (copy_from_user(&p, data, sizeof(p))) {
                                err = -EFAULT;
                                break;
                        }
@@ -1272,7 +1273,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
                }
                memset(&p, 0, sizeof(p));
                ip6gre_tnl_parm_to_user(&p, &t->parms);
-               if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+               if (copy_to_user(data, &p, sizeof(p)))
                        err = -EFAULT;
                break;
 
@@ -1283,7 +1284,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
                        goto done;
 
                err = -EFAULT;
-               if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+               if (copy_from_user(&p, data, sizeof(p)))
                        goto done;
 
                err = -EINVAL;
@@ -1320,7 +1321,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
 
                        memset(&p, 0, sizeof(p));
                        ip6gre_tnl_parm_to_user(&p, &t->parms);
-                       if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+                       if (copy_to_user(data, &p, sizeof(p)))
                                err = -EFAULT;
                } else
                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
@@ -1333,7 +1334,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
 
                if (dev == ign->fb_tunnel_dev) {
                        err = -EFAULT;
-                       if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+                       if (copy_from_user(&p, data, sizeof(p)))
                                goto done;
                        err = -ENOENT;
                        ip6gre_tnl_parm_from_user(&p1, &p);
@@ -1400,7 +1401,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
        .ndo_init               = ip6gre_tunnel_init,
        .ndo_uninit             = ip6gre_tunnel_uninit,
        .ndo_start_xmit         = ip6gre_tunnel_xmit,
-       .ndo_do_ioctl           = ip6gre_tunnel_ioctl,
+       .ndo_siocdevprivate     = ip6gre_tunnel_siocdevprivate,
        .ndo_change_mtu         = ip6_tnl_change_mtu,
        .ndo_get_stats64        = dev_get_tstats64,
        .ndo_get_iflink         = ip6_tnl_get_iflink,
index 8e6ca9a..12f985f 100644 (file)
@@ -60,46 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
 {
        struct dst_entry *dst = skb_dst(skb);
        struct net_device *dev = dst->dev;
+       struct inet6_dev *idev = ip6_dst_idev(dst);
        unsigned int hh_len = LL_RESERVED_SPACE(dev);
-       int delta = hh_len - skb_headroom(skb);
-       const struct in6_addr *nexthop;
+       const struct in6_addr *daddr, *nexthop;
+       struct ipv6hdr *hdr;
        struct neighbour *neigh;
        int ret;
 
        /* Be paranoid, rather than too clever. */
-       if (unlikely(delta > 0) && dev->header_ops) {
-               /* pskb_expand_head() might crash, if skb is shared */
-               if (skb_shared(skb)) {
-                       struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
-                       if (likely(nskb)) {
-                               if (skb->sk)
-                                       skb_set_owner_w(nskb, skb->sk);
-                               consume_skb(skb);
-                       } else {
-                               kfree_skb(skb);
-                       }
-                       skb = nskb;
-               }
-               if (skb &&
-                   pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
-                       kfree_skb(skb);
-                       skb = NULL;
-               }
+       if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
+               skb = skb_expand_head(skb, hh_len);
                if (!skb) {
-                       IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+                       IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
                        return -ENOMEM;
                }
        }
 
-       if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
-               struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
-
+       hdr = ipv6_hdr(skb);
+       daddr = &hdr->daddr;
+       if (ipv6_addr_is_multicast(daddr)) {
                if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
                    ((mroute6_is_socket(net, skb) &&
                     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
-                    ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
-                                        &ipv6_hdr(skb)->saddr))) {
+                    ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
                        struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 
                        /* Do not check for IFF_ALLMULTI; multicast routing
@@ -110,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
                                        net, sk, newskb, NULL, newskb->dev,
                                        dev_loopback_xmit);
 
-                       if (ipv6_hdr(skb)->hop_limit == 0) {
+                       if (hdr->hop_limit == 0) {
                                IP6_INC_STATS(net, idev,
                                              IPSTATS_MIB_OUTDISCARDS);
                                kfree_skb(skb);
@@ -119,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
                }
 
                IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
-
-               if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
-                   IPV6_ADDR_SCOPE_NODELOCAL &&
+               if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
                    !(dev->flags & IFF_LOOPBACK)) {
                        kfree_skb(skb);
                        return 0;
@@ -136,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
        }
 
        rcu_read_lock_bh();
-       nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
-       neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+       nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
+       neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
        if (unlikely(!neigh))
-               neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+               neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
        if (!IS_ERR(neigh)) {
                sock_confirm_neigh(skb, neigh);
                ret = neigh_output(neigh, skb, false);
@@ -148,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
        }
        rcu_read_unlock_bh();
 
-       IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+       IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
        kfree_skb(skb);
        return -EINVAL;
 }
@@ -268,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
        const struct ipv6_pinfo *np = inet6_sk(sk);
        struct in6_addr *first_hop = &fl6->daddr;
        struct dst_entry *dst = skb_dst(skb);
+       struct net_device *dev = dst->dev;
+       struct inet6_dev *idev = ip6_dst_idev(dst);
        unsigned int head_room;
        struct ipv6hdr *hdr;
        u8  proto = fl6->flowi6_proto;
@@ -275,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
        int hlimit = -1;
        u32 mtu;
 
-       head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+       head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
        if (opt)
                head_room += opt->opt_nflen + opt->opt_flen;
 
-       if (unlikely(skb_headroom(skb) < head_room)) {
-               struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
-               if (!skb2) {
-                       IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-                                     IPSTATS_MIB_OUTDISCARDS);
-                       kfree_skb(skb);
+       if (unlikely(head_room > skb_headroom(skb))) {
+               skb = skb_expand_head(skb, head_room);
+               if (!skb) {
+                       IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
                        return -ENOBUFS;
                }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
        }
 
        if (opt) {
@@ -332,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 
        mtu = dst_mtu(dst);
        if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
-               IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
-                             IPSTATS_MIB_OUT, skb->len);
+               IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 
                /* if egress device is enslaved to an L3 master device pass the
                 * skb to its handler for processing
@@ -346,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
                 * we promote our socket to non const
                 */
                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
-                              net, (struct sock *)sk, skb, NULL, dst->dev,
+                              net, (struct sock *)sk, skb, NULL, dev,
                               dst_output);
        }
 
-       skb->dev = dst->dev;
+       skb->dev = dev;
        /* ipv6_local_error() does not require socket lock,
         * we promote our socket to non const
         */
        ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 
-       IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
+       IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
        kfree_skb(skb);
        return -EMSGSIZE;
 }
@@ -608,7 +584,7 @@ int ip6_forward(struct sk_buff *skb)
                }
        }
 
-       mtu = ip6_dst_mtu_forward(dst);
+       mtu = ip6_dst_mtu_maybe_forward(dst, true);
        if (mtu < IPV6_MIN_MTU)
                mtu = IPV6_MIN_MTU;
 
index 322698d..20a67ef 100644 (file)
@@ -1581,9 +1581,10 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
 }
 
 /**
- * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
+ * ip6_tnl_siocdevprivate - configure ipv6 tunnels from userspace
  *   @dev: virtual device associated with tunnel
- *   @ifr: parameters passed from userspace
+ *   @ifr: unused
+ *   @data: parameters passed from userspace
  *   @cmd: command to be performed
  *
  * Description:
@@ -1609,7 +1610,8 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
  **/
 
 static int
-ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                      void __user *data, int cmd)
 {
        int err = 0;
        struct ip6_tnl_parm p;
@@ -1623,7 +1625,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        switch (cmd) {
        case SIOCGETTUNNEL:
                if (dev == ip6n->fb_tnl_dev) {
-                       if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+                       if (copy_from_user(&p, data, sizeof(p))) {
                                err = -EFAULT;
                                break;
                        }
@@ -1635,9 +1637,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                        memset(&p, 0, sizeof(p));
                }
                ip6_tnl_parm_to_user(&p, &t->parms);
-               if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) {
+               if (copy_to_user(data, &p, sizeof(p)))
                        err = -EFAULT;
-               }
                break;
        case SIOCADDTUNNEL:
        case SIOCCHGTUNNEL:
@@ -1645,7 +1646,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        break;
                err = -EFAULT;
-               if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+               if (copy_from_user(&p, data, sizeof(p)))
                        break;
                err = -EINVAL;
                if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
@@ -1669,7 +1670,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                if (!IS_ERR(t)) {
                        err = 0;
                        ip6_tnl_parm_to_user(&p, &t->parms);
-                       if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+                       if (copy_to_user(data, &p, sizeof(p)))
                                err = -EFAULT;
 
                } else {
@@ -1683,7 +1684,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
                if (dev == ip6n->fb_tnl_dev) {
                        err = -EFAULT;
-                       if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+                       if (copy_from_user(&p, data, sizeof(p)))
                                break;
                        err = -ENOENT;
                        ip6_tnl_parm_from_user(&p1, &p);
@@ -1802,7 +1803,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
        .ndo_init       = ip6_tnl_dev_init,
        .ndo_uninit     = ip6_tnl_dev_uninit,
        .ndo_start_xmit = ip6_tnl_start_xmit,
-       .ndo_do_ioctl   = ip6_tnl_ioctl,
+       .ndo_siocdevprivate = ip6_tnl_siocdevprivate,
        .ndo_change_mtu = ip6_tnl_change_mtu,
        .ndo_get_stats64 = dev_get_tstats64,
        .ndo_get_iflink = ip6_tnl_get_iflink,
index 2d048e2..1d8e3ff 100644 (file)
@@ -771,13 +771,14 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
 }
 
 /**
- * vti6_ioctl - configure vti6 tunnels from userspace
+ * vti6_siocdevprivate - configure vti6 tunnels from userspace
  *   @dev: virtual device associated with tunnel
- *   @ifr: parameters passed from userspace
+ *   @ifr: unused
+ *   @data: parameters passed from userspace
  *   @cmd: command to be performed
  *
  * Description:
- *   vti6_ioctl() is used for managing vti6 tunnels
+ *   vti6_siocdevprivate() is used for managing vti6 tunnels
  *   from userspace.
  *
  *   The possible commands are the following:
@@ -798,7 +799,7 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
  *   %-ENODEV if attempting to change or delete a nonexisting device
  **/
 static int
-vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
 {
        int err = 0;
        struct ip6_tnl_parm2 p;
@@ -810,7 +811,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        switch (cmd) {
        case SIOCGETTUNNEL:
                if (dev == ip6n->fb_tnl_dev) {
-                       if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+                       if (copy_from_user(&p, data, sizeof(p))) {
                                err = -EFAULT;
                                break;
                        }
@@ -822,7 +823,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                if (!t)
                        t = netdev_priv(dev);
                vti6_parm_to_user(&p, &t->parms);
-               if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+               if (copy_to_user(data, &p, sizeof(p)))
                        err = -EFAULT;
                break;
        case SIOCADDTUNNEL:
@@ -831,7 +832,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        break;
                err = -EFAULT;
-               if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+               if (copy_from_user(&p, data, sizeof(p)))
                        break;
                err = -EINVAL;
                if (p.proto != IPPROTO_IPV6  && p.proto != 0)
@@ -852,7 +853,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                if (t) {
                        err = 0;
                        vti6_parm_to_user(&p, &t->parms);
-                       if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+                       if (copy_to_user(data, &p, sizeof(p)))
                                err = -EFAULT;
 
                } else
@@ -865,7 +866,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
                if (dev == ip6n->fb_tnl_dev) {
                        err = -EFAULT;
-                       if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+                       if (copy_from_user(&p, data, sizeof(p)))
                                break;
                        err = -ENOENT;
                        vti6_parm_from_user(&p1, &p);
@@ -890,7 +891,7 @@ static const struct net_device_ops vti6_netdev_ops = {
        .ndo_init       = vti6_dev_init,
        .ndo_uninit     = vti6_dev_uninit,
        .ndo_start_xmit = vti6_tnl_xmit,
-       .ndo_do_ioctl   = vti6_ioctl,
+       .ndo_siocdevprivate = vti6_siocdevprivate,
        .ndo_get_stats64 = dev_get_tstats64,
        .ndo_get_iflink = ip6_tnl_get_iflink,
 };
index 06b0d2c..36ed9ef 100644 (file)
@@ -559,8 +559,7 @@ static int pim6_rcv(struct sk_buff *skb)
        read_lock(&mrt_lock);
        if (reg_vif_num >= 0)
                reg_dev = mrt->vif_table[reg_vif_num].dev;
-       if (reg_dev)
-               dev_hold(reg_dev);
+       dev_hold(reg_dev);
        read_unlock(&mrt_lock);
 
        if (!reg_dev)
index a6804a7..e4bdb09 100644 (file)
@@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
        if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
                goto out_free_gsf;
 
-       ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
+       ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex);
 out_free_gsf:
        kfree(gsf);
        return ret;
@@ -234,7 +234,7 @@ out_free_gsf:
 static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                int optlen)
 {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
        struct compat_group_filter *gf32;
        void *p;
        int ret;
@@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
        if (!p)
                return -ENOMEM;
 
-       gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+       gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
        ret = -EFAULT;
        if (copy_from_sockptr(gf32, optval, optlen))
                goto out_free_p;
@@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                goto out_free_p;
 
        ret = -EINVAL;
-       if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+       if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
                goto out_free_p;
 
        ret = ip6_mc_msfilter(sk, &(struct group_filter){
                        .gf_interface = gf32->gf_interface,
                        .gf_group = gf32->gf_group,
                        .gf_fmode = gf32->gf_fmode,
-                       .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+                       .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex);
 
 out_free_p:
        kfree(p);
@@ -1048,7 +1048,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
 static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
                int __user *optlen, int len)
 {
-       const int size0 = offsetof(struct group_filter, gf_slist);
+       const int size0 = offsetof(struct group_filter, gf_slist_flex);
        struct group_filter __user *p = optval;
        struct group_filter gsf;
        int num;
@@ -1062,7 +1062,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
                return -EADDRNOTAVAIL;
        num = gsf.gf_numsrc;
        lock_sock(sk);
-       err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
+       err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex);
        if (!err) {
                if (num > gsf.gf_numsrc)
                        num = gsf.gf_numsrc;
@@ -1077,7 +1077,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
 static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
                int __user *optlen)
 {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
        struct compat_group_filter __user *p = optval;
        struct compat_group_filter gf32;
        struct group_filter gf;
@@ -1100,7 +1100,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
                return -EADDRNOTAVAIL;
 
        lock_sock(sk);
-       err = ip6_mc_msfget(sk, &gf, p->gf_slist);
+       err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex);
        release_sock(sk);
        if (err)
                return err;
index 54ec163..cd951fa 100644 (file)
@@ -447,7 +447,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 
                if (psl)
                        count += psl->sl_max;
-               newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+                                     GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
@@ -457,7 +458,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
                if (psl) {
                        for (i = 0; i < psl->sl_count; i++)
                                newpsl->sl_addr[i] = psl->sl_addr[i];
-                       atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+                       atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                                  &sk->sk_omem_alloc);
                        kfree_rcu(psl, rcu);
                }
                psl = newpsl;
@@ -525,8 +527,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
                goto done;
        }
        if (gsf->gf_numsrc) {
-               newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
-                                                         GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+                                                     gsf->gf_numsrc),
+                                     GFP_KERNEL);
                if (!newpsl) {
                        err = -ENOBUFS;
                        goto done;
@@ -543,7 +546,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
                                     newpsl->sl_count, newpsl->sl_addr, 0);
                if (err) {
                        mutex_unlock(&idev->mc_lock);
-                       sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
+                       sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr,
+                                                            newpsl->sl_max));
                        goto done;
                }
                mutex_unlock(&idev->mc_lock);
@@ -559,7 +563,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
        if (psl) {
                ip6_mc_del_src(idev, group, pmc->sfmode,
                               psl->sl_count, psl->sl_addr, 0);
-               atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+               atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                          &sk->sk_omem_alloc);
                kfree_rcu(psl, rcu);
        } else {
                ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
@@ -2607,7 +2612,8 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
                err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
                                     psl->sl_count, psl->sl_addr, 0);
                RCU_INIT_POINTER(iml->sflist, NULL);
-               atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+               atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                          &sk->sk_omem_alloc);
                kfree_rcu(psl, rcu);
        }
 
index c467c64..4b09852 100644 (file)
@@ -1391,12 +1391,6 @@ skip_defrtr:
                }
        }
 
-       /*
-        *      Send a notify if RA changed managed/otherconf flags or timer settings
-        */
-       if (send_ifinfo_notify)
-               inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
-
 skip_linkparms:
 
        /*
@@ -1496,6 +1490,11 @@ skip_routeinfo:
                memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
                mtu = ntohl(n);
 
+               if (in6_dev->ra_mtu != mtu) {
+                       in6_dev->ra_mtu = mtu;
+                       send_ifinfo_notify = true;
+               }
+
                if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
                        ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
                } else if (in6_dev->cnf.mtu6 != mtu) {
@@ -1519,6 +1518,12 @@ skip_routeinfo:
                ND_PRINTK(2, warn, "RA: invalid RA options\n");
        }
 out:
+       /* Send a notify if RA changed managed/otherconf flags or
+        * timer settings or ra_mtu value
+        */
+       if (send_ifinfo_notify)
+               inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+
        fib6_info_release(rt);
        if (neigh)
                neigh_release(neigh);
index bb784ea..727ee80 100644 (file)
@@ -19,15 +19,12 @@ MODULE_DESCRIPTION("ip6tables filter table");
                            (1 << NF_INET_FORWARD) | \
                            (1 << NF_INET_LOCAL_OUT))
 
-static int __net_init ip6table_filter_table_init(struct net *net);
-
 static const struct xt_table packet_filter = {
        .name           = "filter",
        .valid_hooks    = FILTER_VALID_HOOKS,
        .me             = THIS_MODULE,
        .af             = NFPROTO_IPV6,
        .priority       = NF_IP6_PRI_FILTER,
-       .table_init     = ip6table_filter_table_init,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -44,7 +41,7 @@ static struct nf_hook_ops *filter_ops __read_mostly;
 static bool forward = true;
 module_param(forward, bool, 0000);
 
-static int __net_init ip6table_filter_table_init(struct net *net)
+static int ip6table_filter_table_init(struct net *net)
 {
        struct ip6t_replace *repl;
        int err;
@@ -63,7 +60,7 @@ static int __net_init ip6table_filter_table_init(struct net *net)
 
 static int __net_init ip6table_filter_net_init(struct net *net)
 {
-       if (net == &init_net || !forward)
+       if (!forward)
                return ip6table_filter_table_init(net);
 
        return 0;
@@ -87,15 +84,24 @@ static struct pernet_operations ip6table_filter_net_ops = {
 
 static int __init ip6table_filter_init(void)
 {
-       int ret;
+       int ret = xt_register_template(&packet_filter,
+                                       ip6table_filter_table_init);
+
+       if (ret < 0)
+               return ret;
 
        filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
-       if (IS_ERR(filter_ops))
+       if (IS_ERR(filter_ops)) {
+               xt_unregister_template(&packet_filter);
                return PTR_ERR(filter_ops);
+       }
 
        ret = register_pernet_subsys(&ip6table_filter_net_ops);
-       if (ret < 0)
+       if (ret < 0) {
+               xt_unregister_template(&packet_filter);
                kfree(filter_ops);
+               return ret;
+       }
 
        return ret;
 }
@@ -103,6 +109,7 @@ static int __init ip6table_filter_init(void)
 static void __exit ip6table_filter_fini(void)
 {
        unregister_pernet_subsys(&ip6table_filter_net_ops);
+       xt_unregister_template(&packet_filter);
        kfree(filter_ops);
 }
 
index c76cffd..9b518ce 100644 (file)
@@ -20,15 +20,12 @@ MODULE_DESCRIPTION("ip6tables mangle table");
                            (1 << NF_INET_LOCAL_OUT) | \
                            (1 << NF_INET_POST_ROUTING))
 
-static int __net_init ip6table_mangle_table_init(struct net *net);
-
 static const struct xt_table packet_mangler = {
        .name           = "mangle",
        .valid_hooks    = MANGLE_VALID_HOOKS,
        .me             = THIS_MODULE,
        .af             = NFPROTO_IPV6,
        .priority       = NF_IP6_PRI_MANGLE,
-       .table_init     = ip6table_mangle_table_init,
 };
 
 static unsigned int
@@ -76,7 +73,7 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
-static int __net_init ip6table_mangle_table_init(struct net *net)
+static int ip6table_mangle_table_init(struct net *net)
 {
        struct ip6t_replace *repl;
        int ret;
@@ -106,29 +103,32 @@ static struct pernet_operations ip6table_mangle_net_ops = {
 
 static int __init ip6table_mangle_init(void)
 {
-       int ret;
+       int ret = xt_register_template(&packet_mangler,
+                                      ip6table_mangle_table_init);
+
+       if (ret < 0)
+               return ret;
 
        mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook);
-       if (IS_ERR(mangle_ops))
+       if (IS_ERR(mangle_ops)) {
+               xt_unregister_template(&packet_mangler);
                return PTR_ERR(mangle_ops);
+       }
 
        ret = register_pernet_subsys(&ip6table_mangle_net_ops);
        if (ret < 0) {
+               xt_unregister_template(&packet_mangler);
                kfree(mangle_ops);
                return ret;
        }
 
-       ret = ip6table_mangle_table_init(&init_net);
-       if (ret) {
-               unregister_pernet_subsys(&ip6table_mangle_net_ops);
-               kfree(mangle_ops);
-       }
        return ret;
 }
 
 static void __exit ip6table_mangle_fini(void)
 {
        unregister_pernet_subsys(&ip6table_mangle_net_ops);
+       xt_unregister_template(&packet_mangler);
        kfree(mangle_ops);
 }
 
index b029225..921c172 100644 (file)
@@ -19,8 +19,6 @@ struct ip6table_nat_pernet {
        struct nf_hook_ops *nf_nat_ops;
 };
 
-static int __net_init ip6table_nat_table_init(struct net *net);
-
 static unsigned int ip6table_nat_net_id __read_mostly;
 
 static const struct xt_table nf_nat_ipv6_table = {
@@ -31,7 +29,6 @@ static const struct xt_table nf_nat_ipv6_table = {
                          (1 << NF_INET_LOCAL_IN),
        .me             = THIS_MODULE,
        .af             = NFPROTO_IPV6,
-       .table_init     = ip6table_nat_table_init,
 };
 
 static unsigned int ip6table_nat_do_chain(void *priv,
@@ -115,7 +112,7 @@ static void ip6t_nat_unregister_lookups(struct net *net)
        kfree(ops);
 }
 
-static int __net_init ip6table_nat_table_init(struct net *net)
+static int ip6table_nat_table_init(struct net *net)
 {
        struct ip6t_replace *repl;
        int ret;
@@ -157,20 +154,23 @@ static struct pernet_operations ip6table_nat_net_ops = {
 
 static int __init ip6table_nat_init(void)
 {
-       int ret = register_pernet_subsys(&ip6table_nat_net_ops);
+       int ret = xt_register_template(&nf_nat_ipv6_table,
+                                      ip6table_nat_table_init);
 
-       if (ret)
+       if (ret < 0)
                return ret;
 
-       ret = ip6table_nat_table_init(&init_net);
+       ret = register_pernet_subsys(&ip6table_nat_net_ops);
        if (ret)
-               unregister_pernet_subsys(&ip6table_nat_net_ops);
+               xt_unregister_template(&nf_nat_ipv6_table);
+
        return ret;
 }
 
 static void __exit ip6table_nat_exit(void)
 {
        unregister_pernet_subsys(&ip6table_nat_net_ops);
+       xt_unregister_template(&nf_nat_ipv6_table);
 }
 
 module_init(ip6table_nat_init);
index f63c106..4f2a04a 100644 (file)
@@ -11,8 +11,6 @@
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
-static int __net_init ip6table_raw_table_init(struct net *net);
-
 static bool raw_before_defrag __read_mostly;
 MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
 module_param(raw_before_defrag, bool, 0000);
@@ -23,7 +21,6 @@ static const struct xt_table packet_raw = {
        .me = THIS_MODULE,
        .af = NFPROTO_IPV6,
        .priority = NF_IP6_PRI_RAW,
-       .table_init = ip6table_raw_table_init,
 };
 
 static const struct xt_table packet_raw_before_defrag = {
@@ -32,7 +29,6 @@ static const struct xt_table packet_raw_before_defrag = {
        .me = THIS_MODULE,
        .af = NFPROTO_IPV6,
        .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
-       .table_init = ip6table_raw_table_init,
 };
 
 /* The work comes in here from netfilter.c. */
@@ -45,7 +41,7 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
 
-static int __net_init ip6table_raw_table_init(struct net *net)
+static int ip6table_raw_table_init(struct net *net)
 {
        struct ip6t_replace *repl;
        const struct xt_table *table = &packet_raw;
@@ -79,37 +75,39 @@ static struct pernet_operations ip6table_raw_net_ops = {
 
 static int __init ip6table_raw_init(void)
 {
-       int ret;
        const struct xt_table *table = &packet_raw;
+       int ret;
 
        if (raw_before_defrag) {
                table = &packet_raw_before_defrag;
-
                pr_info("Enabling raw table before defrag\n");
        }
 
+       ret = xt_register_template(table, ip6table_raw_table_init);
+       if (ret < 0)
+               return ret;
+
        /* Register hooks */
        rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
-       if (IS_ERR(rawtable_ops))
+       if (IS_ERR(rawtable_ops)) {
+               xt_unregister_template(table);
                return PTR_ERR(rawtable_ops);
+       }
 
        ret = register_pernet_subsys(&ip6table_raw_net_ops);
        if (ret < 0) {
                kfree(rawtable_ops);
+               xt_unregister_template(table);
                return ret;
        }
 
-       ret = ip6table_raw_table_init(&init_net);
-       if (ret) {
-               unregister_pernet_subsys(&ip6table_raw_net_ops);
-               kfree(rawtable_ops);
-       }
        return ret;
 }
 
 static void __exit ip6table_raw_fini(void)
 {
        unregister_pernet_subsys(&ip6table_raw_net_ops);
+       xt_unregister_template(&packet_raw);
        kfree(rawtable_ops);
 }
 
index 8dc335c..9316740 100644 (file)
@@ -24,15 +24,12 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
                                (1 << NF_INET_FORWARD) | \
                                (1 << NF_INET_LOCAL_OUT)
 
-static int __net_init ip6table_security_table_init(struct net *net);
-
 static const struct xt_table security_table = {
        .name           = "security",
        .valid_hooks    = SECURITY_VALID_HOOKS,
        .me             = THIS_MODULE,
        .af             = NFPROTO_IPV6,
        .priority       = NF_IP6_PRI_SECURITY,
-       .table_init     = ip6table_security_table_init,
 };
 
 static unsigned int
@@ -44,7 +41,7 @@ ip6table_security_hook(void *priv, struct sk_buff *skb,
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
 
-static int __net_init ip6table_security_table_init(struct net *net)
+static int ip6table_security_table_init(struct net *net)
 {
        struct ip6t_replace *repl;
        int ret;
@@ -74,29 +71,32 @@ static struct pernet_operations ip6table_security_net_ops = {
 
 static int __init ip6table_security_init(void)
 {
-       int ret;
+       int ret = xt_register_template(&security_table,
+                                      ip6table_security_table_init);
+
+       if (ret < 0)
+               return ret;
 
        sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
-       if (IS_ERR(sectbl_ops))
+       if (IS_ERR(sectbl_ops)) {
+               xt_unregister_template(&security_table);
                return PTR_ERR(sectbl_ops);
+       }
 
        ret = register_pernet_subsys(&ip6table_security_net_ops);
        if (ret < 0) {
                kfree(sectbl_ops);
+               xt_unregister_template(&security_table);
                return ret;
        }
 
-       ret = ip6table_security_table_init(&init_net);
-       if (ret) {
-               unregister_pernet_subsys(&ip6table_security_net_ops);
-               kfree(sectbl_ops);
-       }
        return ret;
 }
 
 static void __exit ip6table_security_fini(void)
 {
        unregister_pernet_subsys(&ip6table_security_net_ops);
+       xt_unregister_template(&security_table);
        kfree(sectbl_ops);
 }
 
index c5e8ecb..dbc2240 100644 (file)
@@ -1657,6 +1657,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
        struct in6_addr *src_key = NULL;
        struct rt6_exception *rt6_ex;
        struct fib6_nh *nh = res->nh;
+       int max_depth;
        int err = 0;
 
        spin_lock_bh(&rt6_exception_lock);
@@ -1711,7 +1712,9 @@ static int rt6_insert_exception(struct rt6_info *nrt,
        bucket->depth++;
        net->ipv6.rt6_stats->fib_rt_cache++;
 
-       if (bucket->depth > FIB6_MAX_DEPTH)
+       /* Randomize max depth to avoid some side channels attacks. */
+       max_depth = FIB6_MAX_DEPTH + prandom_u32_max(FIB6_MAX_DEPTH);
+       while (bucket->depth > max_depth)
                rt6_exception_remove_oldest(bucket);
 
 out:
@@ -3209,25 +3212,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 
 INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
 {
-       struct inet6_dev *idev;
-       unsigned int mtu;
-
-       mtu = dst_metric_raw(dst, RTAX_MTU);
-       if (mtu)
-               goto out;
-
-       mtu = IPV6_MIN_MTU;
-
-       rcu_read_lock();
-       idev = __in6_dev_get(dst->dev);
-       if (idev)
-               mtu = idev->cnf.mtu6;
-       rcu_read_unlock();
-
-out:
-       mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
-
-       return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+       return ip6_dst_mtu_maybe_forward(dst, false);
 }
 EXPORT_INDIRECT_CALLABLE(ip6_mtu);
 
@@ -3652,8 +3637,7 @@ out:
        if (err) {
                lwtstate_put(fib6_nh->fib_nh_lws);
                fib6_nh->fib_nh_lws = NULL;
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
        }
 
        return err;
@@ -6646,7 +6630,7 @@ int __init ip6_route_init(void)
        ret = -ENOMEM;
        ip6_dst_ops_template.kmem_cachep =
                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
-                                 SLAB_HWCACHE_ALIGN, NULL);
+                                 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
        if (!ip6_dst_ops_template.kmem_cachep)
                goto out;
 
index 897fa59..1bf5f5a 100644 (file)
@@ -26,6 +26,7 @@
 #ifdef CONFIG_IPV6_SEG6_HMAC
 #include <net/seg6_hmac.h>
 #endif
+#include <linux/netfilter.h>
 
 static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
 {
@@ -295,11 +296,19 @@ static int seg6_do_srh(struct sk_buff *skb)
 
        ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
        skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+       nf_reset_ct(skb);
 
        return 0;
 }
 
-static int seg6_input(struct sk_buff *skb)
+static int seg6_input_finish(struct net *net, struct sock *sk,
+                            struct sk_buff *skb)
+{
+       return dst_input(skb);
+}
+
+static int seg6_input_core(struct net *net, struct sock *sk,
+                          struct sk_buff *skb)
 {
        struct dst_entry *orig_dst = skb_dst(skb);
        struct dst_entry *dst = NULL;
@@ -337,10 +346,41 @@ static int seg6_input(struct sk_buff *skb)
        if (unlikely(err))
                return err;
 
-       return dst_input(skb);
+       if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+               return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+                              dev_net(skb->dev), NULL, skb, NULL,
+                              skb_dst(skb)->dev, seg6_input_finish);
+
+       return seg6_input_finish(dev_net(skb->dev), NULL, skb);
 }
 
-static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int seg6_input_nf(struct sk_buff *skb)
+{
+       struct net_device *dev = skb_dst(skb)->dev;
+       struct net *net = dev_net(skb->dev);
+
+       switch (skb->protocol) {
+       case htons(ETH_P_IP):
+               return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
+                              skb, NULL, dev, seg6_input_core);
+       case htons(ETH_P_IPV6):
+               return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
+                              skb, NULL, dev, seg6_input_core);
+       }
+
+       return -EINVAL;
+}
+
+static int seg6_input(struct sk_buff *skb)
+{
+       if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+               return seg6_input_nf(skb);
+
+       return seg6_input_core(dev_net(skb->dev), NULL, skb);
+}
+
+static int seg6_output_core(struct net *net, struct sock *sk,
+                           struct sk_buff *skb)
 {
        struct dst_entry *orig_dst = skb_dst(skb);
        struct dst_entry *dst = NULL;
@@ -387,12 +427,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
        if (unlikely(err))
                goto drop;
 
+       if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+               return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
+                              NULL, skb_dst(skb)->dev, dst_output);
+
        return dst_output(net, sk, skb);
 drop:
        kfree_skb(skb);
        return err;
 }
 
+static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+       struct net_device *dev = skb_dst(skb)->dev;
+
+       switch (skb->protocol) {
+       case htons(ETH_P_IP):
+               return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
+                              NULL, dev, seg6_output_core);
+       case htons(ETH_P_IPV6):
+               return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
+                              NULL, dev, seg6_output_core);
+       }
+
+       return -EINVAL;
+}
+
+static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+       if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+               return seg6_output_nf(net, sk, skb);
+
+       return seg6_output_core(net, sk, skb);
+}
+
 static int seg6_build_state(struct net *net, struct nlattr *nla,
                            unsigned int family, const void *cfg,
                            struct lwtunnel_state **ts,
index 60bf3b8..2dc40b3 100644 (file)
@@ -30,6 +30,7 @@
 #include <net/seg6_local.h>
 #include <linux/etherdevice.h>
 #include <linux/bpf.h>
+#include <linux/netfilter.h>
 
 #define SEG6_F_ATTR(i)         BIT(i)
 
@@ -413,12 +414,33 @@ drop:
        return -EINVAL;
 }
 
+static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
+                                      struct sk_buff *skb)
+{
+       struct dst_entry *orig_dst = skb_dst(skb);
+       struct in6_addr *nhaddr = NULL;
+       struct seg6_local_lwt *slwt;
+
+       slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
+
+       /* The inner packet is not associated to any local interface,
+        * so we do not call netif_rx().
+        *
+        * If slwt->nh6 is set to ::, then lookup the nexthop for the
+        * inner packet's DA. Otherwise, use the specified nexthop.
+        */
+       if (!ipv6_addr_any(&slwt->nh6))
+               nhaddr = &slwt->nh6;
+
+       seg6_lookup_nexthop(skb, nhaddr, 0);
+
+       return dst_input(skb);
+}
+
 /* decapsulate and forward to specified nexthop */
 static int input_action_end_dx6(struct sk_buff *skb,
                                struct seg6_local_lwt *slwt)
 {
-       struct in6_addr *nhaddr = NULL;
-
        /* this function accepts IPv6 encapsulated packets, with either
         * an SRH with SL=0, or no SRH.
         */
@@ -429,40 +451,30 @@ static int input_action_end_dx6(struct sk_buff *skb,
        if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
                goto drop;
 
-       /* The inner packet is not associated to any local interface,
-        * so we do not call netif_rx().
-        *
-        * If slwt->nh6 is set to ::, then lookup the nexthop for the
-        * inner packet's DA. Otherwise, use the specified nexthop.
-        */
-
-       if (!ipv6_addr_any(&slwt->nh6))
-               nhaddr = &slwt->nh6;
-
        skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+       nf_reset_ct(skb);
 
-       seg6_lookup_nexthop(skb, nhaddr, 0);
+       if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+               return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+                              dev_net(skb->dev), NULL, skb, NULL,
+                              skb_dst(skb)->dev, input_action_end_dx6_finish);
 
-       return dst_input(skb);
+       return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
 drop:
        kfree_skb(skb);
        return -EINVAL;
 }
 
-static int input_action_end_dx4(struct sk_buff *skb,
-                               struct seg6_local_lwt *slwt)
+static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
+                                      struct sk_buff *skb)
 {
+       struct dst_entry *orig_dst = skb_dst(skb);
+       struct seg6_local_lwt *slwt;
        struct iphdr *iph;
        __be32 nhaddr;
        int err;
 
-       if (!decap_and_validate(skb, IPPROTO_IPIP))
-               goto drop;
-
-       if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-               goto drop;
-
-       skb->protocol = htons(ETH_P_IP);
+       slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
 
        iph = ip_hdr(skb);
 
@@ -470,14 +482,34 @@ static int input_action_end_dx4(struct sk_buff *skb,
 
        skb_dst_drop(skb);
 
-       skb_set_transport_header(skb, sizeof(struct iphdr));
-
        err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
-       if (err)
-               goto drop;
+       if (err) {
+               kfree_skb(skb);
+               return -EINVAL;
+       }
 
        return dst_input(skb);
+}
+
+static int input_action_end_dx4(struct sk_buff *skb,
+                               struct seg6_local_lwt *slwt)
+{
+       if (!decap_and_validate(skb, IPPROTO_IPIP))
+               goto drop;
+
+       if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+               goto drop;
+
+       skb->protocol = htons(ETH_P_IP);
+       skb_set_transport_header(skb, sizeof(struct iphdr));
+       nf_reset_ct(skb);
+
+       if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+               return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+                              dev_net(skb->dev), NULL, skb, NULL,
+                              skb_dst(skb)->dev, input_action_end_dx4_finish);
 
+       return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
 drop:
        kfree_skb(skb);
        return -EINVAL;
@@ -645,6 +677,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
        skb_dst_drop(skb);
 
        skb_set_transport_header(skb, hdrlen);
+       nf_reset_ct(skb);
 
        return end_dt_vrf_rcv(skb, family, vrf);
 
@@ -1078,7 +1111,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
        u64_stats_update_end(&pcounters->syncp);
 }
 
-static int seg6_local_input(struct sk_buff *skb)
+static int seg6_local_input_core(struct net *net, struct sock *sk,
+                                struct sk_buff *skb)
 {
        struct dst_entry *orig_dst = skb_dst(skb);
        struct seg6_action_desc *desc;
@@ -1086,11 +1120,6 @@ static int seg6_local_input(struct sk_buff *skb)
        unsigned int len = skb->len;
        int rc;
 
-       if (skb->protocol != htons(ETH_P_IPV6)) {
-               kfree_skb(skb);
-               return -EINVAL;
-       }
-
        slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
        desc = slwt->desc;
 
@@ -1104,6 +1133,21 @@ static int seg6_local_input(struct sk_buff *skb)
        return rc;
 }
 
+static int seg6_local_input(struct sk_buff *skb)
+{
+       if (skb->protocol != htons(ETH_P_IPV6)) {
+               kfree_skb(skb);
+               return -EINVAL;
+       }
+
+       if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+               return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+                              dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+                              seg6_local_input_core);
+
+       return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
+}
+
 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
        [SEG6_LOCAL_ACTION]     = { .type = NLA_U32 },
        [SEG6_LOCAL_SRH]        = { .type = NLA_BINARY },
index df5bea8..ef0c7a7 100644 (file)
@@ -299,9 +299,8 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
 
 }
 
-static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
+static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __user *a)
 {
-       struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data;
        struct ip_tunnel *t = netdev_priv(dev);
        struct ip_tunnel_prl kprl, *kp;
        struct ip_tunnel_prl_entry *prl;
@@ -321,7 +320,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
         * we try harder to allocate.
         */
        kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
-               kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
+               kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
                NULL;
 
        rcu_read_lock();
@@ -334,7 +333,8 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
                 * For root users, retry allocating enough memory for
                 * the answer.
                 */
-               kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
+               kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT |
+                                             __GFP_NOWARN);
                if (!kp) {
                        ret = -ENOMEM;
                        goto out;
@@ -453,8 +453,8 @@ out:
        return err;
 }
 
-static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr,
-               int cmd)
+static int ipip6_tunnel_prl_ctl(struct net_device *dev,
+                               struct ip_tunnel_prl __user *data, int cmd)
 {
        struct ip_tunnel *t = netdev_priv(dev);
        struct ip_tunnel_prl prl;
@@ -465,7 +465,7 @@ static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr,
        if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
                return -EINVAL;
 
-       if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
+       if (copy_from_user(&prl, data, sizeof(prl)))
                return -EFAULT;
 
        switch (cmd) {
@@ -1197,14 +1197,14 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
 }
 
 static int
-ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr)
+ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data)
 {
        struct ip_tunnel *t = netdev_priv(dev);
        struct ip_tunnel_6rd ip6rd;
        struct ip_tunnel_parm p;
 
        if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
-               if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+               if (copy_from_user(&p, data, sizeof(p)))
                        return -EFAULT;
                t = ipip6_tunnel_locate(t->net, &p, 0);
        }
@@ -1215,13 +1215,14 @@ ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr)
        ip6rd.relay_prefix = t->ip6rd.relay_prefix;
        ip6rd.prefixlen = t->ip6rd.prefixlen;
        ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
-       if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd)))
+       if (copy_to_user(data, &ip6rd, sizeof(ip6rd)))
                return -EFAULT;
        return 0;
 }
 
 static int
-ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip6_tunnel_6rdctl(struct net_device *dev, struct ip_tunnel_6rd __user *data,
+                   int cmd)
 {
        struct ip_tunnel *t = netdev_priv(dev);
        struct ip_tunnel_6rd ip6rd;
@@ -1229,7 +1230,7 @@ ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
        if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
                return -EPERM;
-       if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd)))
+       if (copy_from_user(&ip6rd, data, sizeof(ip6rd)))
                return -EFAULT;
 
        if (cmd != SIOCDEL6RD) {
@@ -1368,27 +1369,28 @@ ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 }
 
 static int
-ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip6_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+                           void __user *data, int cmd)
 {
        switch (cmd) {
        case SIOCGETTUNNEL:
        case SIOCADDTUNNEL:
        case SIOCCHGTUNNEL:
        case SIOCDELTUNNEL:
-               return ip_tunnel_ioctl(dev, ifr, cmd);
+               return ip_tunnel_siocdevprivate(dev, ifr, data, cmd);
        case SIOCGETPRL:
-               return ipip6_tunnel_get_prl(dev, ifr);
+               return ipip6_tunnel_get_prl(dev, data);
        case SIOCADDPRL:
        case SIOCDELPRL:
        case SIOCCHGPRL:
-               return ipip6_tunnel_prl_ctl(dev, ifr, cmd);
+               return ipip6_tunnel_prl_ctl(dev, data, cmd);
 #ifdef CONFIG_IPV6_SIT_6RD
        case SIOCGET6RD:
-               return ipip6_tunnel_get6rd(dev, ifr);
+               return ipip6_tunnel_get6rd(dev, data);
        case SIOCADD6RD:
        case SIOCCHG6RD:
        case SIOCDEL6RD:
-               return ipip6_tunnel_6rdctl(dev, ifr, cmd);
+               return ipip6_tunnel_6rdctl(dev, data, cmd);
 #endif
        default:
                return -EINVAL;
@@ -1399,7 +1401,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
        .ndo_init       = ipip6_tunnel_init,
        .ndo_uninit     = ipip6_tunnel_uninit,
        .ndo_start_xmit = sit_tunnel_xmit,
-       .ndo_do_ioctl   = ipip6_tunnel_ioctl,
+       .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate,
        .ndo_get_stats64 = dev_get_tstats64,
        .ndo_get_iflink = ip_tunnel_get_iflink,
        .ndo_tunnel_ctl = ipip6_tunnel_ctl,
index d7cf26f..d53dd14 100644 (file)
@@ -21,6 +21,7 @@
 #ifdef CONFIG_NETLABEL
 #include <net/calipso.h>
 #endif
+#include <linux/ioam6.h>
 
 static int two = 2;
 static int three = 3;
@@ -28,6 +29,8 @@ static int flowlabel_reflect_max = 0x7;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 static u32 rt6_multipath_hash_fields_all_mask =
        FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
+static u32 ioam6_id_max = IOAM6_DEFAULT_ID;
+static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE;
 
 static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
                                          void *buffer, size_t *lenp, loff_t *ppos)
@@ -196,6 +199,22 @@ static struct ctl_table ipv6_table_template[] = {
                .extra1         = SYSCTL_ZERO,
                .extra2         = &two,
        },
+       {
+               .procname       = "ioam6_id",
+               .data           = &init_net.ipv6.sysctl.ioam6_id,
+               .maxlen         = sizeof(u32),
+               .mode           = 0644,
+               .proc_handler   = proc_douintvec_minmax,
+               .extra2         = &ioam6_id_max,
+       },
+       {
+               .procname       = "ioam6_id_wide",
+               .data           = &init_net.ipv6.sysctl.ioam6_id_wide,
+               .maxlen         = sizeof(u64),
+               .mode           = 0644,
+               .proc_handler   = proc_doulongvec_minmax,
+               .extra2         = &ioam6_id_wide_max,
+       },
        { }
 };
 
index c5e15e9..ea53847 100644 (file)
@@ -1475,7 +1475,7 @@ do_udp_sendmsg:
                fl6.saddr = np->saddr;
        fl6.fl6_sport = inet->inet_sport;
 
-       if (cgroup_bpf_enabled(BPF_CGROUP_UDP6_SENDMSG) && !connected) {
+       if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
                err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
                                           (struct sockaddr *)sin6, &fl6.saddr);
                if (err)
index 44453b3..18316ee 100644 (file)
@@ -1044,7 +1044,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
                        if (err == 0) {
                                atomic_dec(&iucv->skbs_in_xmit);
                                skb_unlink(skb, &iucv->send_skb_q);
-                               kfree_skb(skb);
+                               consume_skb(skb);
                        }
 
                        /* this error should never happen since the     */
@@ -1293,7 +1293,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
                        }
                }
 
-               kfree_skb(skb);
+               consume_skb(skb);
                if (iucv->transport == AF_IUCV_TRANS_HIPER) {
                        atomic_inc(&iucv->msg_recv);
                        if (atomic_read(&iucv->msg_recv) > iucv->msglimit) {
@@ -1756,7 +1756,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
        spin_unlock_irqrestore(&list->lock, flags);
 
        if (this) {
-               kfree_skb(this);
+               consume_skb(this);
                /* wake up any process waiting for sending */
                iucv_sock_wake_msglim(sk);
        }
@@ -1903,17 +1903,17 @@ static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb)
 {
        struct iucv_sock *iucv = iucv_sk(sk);
 
-       if (!iucv)
-               goto out;
-       if (sk->sk_state != IUCV_BOUND)
-               goto out;
+       if (!iucv || sk->sk_state != IUCV_BOUND) {
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
        bh_lock_sock(sk);
        iucv->msglimit_peer = iucv_trans_hdr(skb)->window;
        sk->sk_state = IUCV_CONNECTED;
        sk->sk_state_change(sk);
        bh_unlock_sock(sk);
-out:
-       kfree_skb(skb);
+       consume_skb(skb);
        return NET_RX_SUCCESS;
 }
 
@@ -1924,16 +1924,16 @@ static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb)
 {
        struct iucv_sock *iucv = iucv_sk(sk);
 
-       if (!iucv)
-               goto out;
-       if (sk->sk_state != IUCV_BOUND)
-               goto out;
+       if (!iucv || sk->sk_state != IUCV_BOUND) {
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
        bh_lock_sock(sk);
        sk->sk_state = IUCV_DISCONN;
        sk->sk_state_change(sk);
        bh_unlock_sock(sk);
-out:
-       kfree_skb(skb);
+       consume_skb(skb);
        return NET_RX_SUCCESS;
 }
 
@@ -1945,16 +1945,18 @@ static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb)
        struct iucv_sock *iucv = iucv_sk(sk);
 
        /* other end of connection closed */
-       if (!iucv)
-               goto out;
+       if (!iucv) {
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
        bh_lock_sock(sk);
        if (sk->sk_state == IUCV_CONNECTED) {
                sk->sk_state = IUCV_DISCONN;
                sk->sk_state_change(sk);
        }
        bh_unlock_sock(sk);
-out:
-       kfree_skb(skb);
+       consume_skb(skb);
        return NET_RX_SUCCESS;
 }
 
@@ -2107,7 +2109,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
        case (AF_IUCV_FLAG_WIN):
                err = afiucv_hs_callback_win(sk, skb);
                if (skb->len == sizeof(struct af_iucv_trans_hdr)) {
-                       kfree_skb(skb);
+                       consume_skb(skb);
                        break;
                }
                fallthrough;    /* and receive non-zero length data */
@@ -2262,21 +2264,11 @@ static struct packet_type iucv_packet_type = {
        .func = afiucv_hs_rcv,
 };
 
-static int afiucv_iucv_init(void)
-{
-       return pr_iucv->iucv_register(&af_iucv_handler, 0);
-}
-
-static void afiucv_iucv_exit(void)
-{
-       pr_iucv->iucv_unregister(&af_iucv_handler, 0);
-}
-
 static int __init afiucv_init(void)
 {
        int err;
 
-       if (MACHINE_IS_VM) {
+       if (MACHINE_IS_VM && IS_ENABLED(CONFIG_IUCV)) {
                cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
                if (unlikely(err)) {
                        WARN_ON(err);
@@ -2284,11 +2276,7 @@ static int __init afiucv_init(void)
                        goto out;
                }
 
-               pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
-               if (!pr_iucv) {
-                       printk(KERN_WARNING "iucv_if lookup failed\n");
-                       memset(&iucv_userid, 0, sizeof(iucv_userid));
-               }
+               pr_iucv = &iucv_if;
        } else {
                memset(&iucv_userid, 0, sizeof(iucv_userid));
                pr_iucv = NULL;
@@ -2302,7 +2290,7 @@ static int __init afiucv_init(void)
                goto out_proto;
 
        if (pr_iucv) {
-               err = afiucv_iucv_init();
+               err = pr_iucv->iucv_register(&af_iucv_handler, 0);
                if (err)
                        goto out_sock;
        }
@@ -2316,23 +2304,19 @@ static int __init afiucv_init(void)
 
 out_notifier:
        if (pr_iucv)
-               afiucv_iucv_exit();
+               pr_iucv->iucv_unregister(&af_iucv_handler, 0);
 out_sock:
        sock_unregister(PF_IUCV);
 out_proto:
        proto_unregister(&iucv_proto);
 out:
-       if (pr_iucv)
-               symbol_put(iucv_if);
        return err;
 }
 
 static void __exit afiucv_exit(void)
 {
-       if (pr_iucv) {
-               afiucv_iucv_exit();
-               symbol_put(iucv_if);
-       }
+       if (pr_iucv)
+               pr_iucv->iucv_unregister(&af_iucv_handler, 0);
 
        unregister_netdevice_notifier(&afiucv_netdev_notifier);
        dev_remove_pack(&iucv_packet_type);
index e6795d5..f3343a8 100644 (file)
@@ -286,19 +286,19 @@ static union iucv_param *iucv_param_irq[NR_CPUS];
  */
 static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
 {
-       register unsigned long reg0 asm ("0");
-       register unsigned long reg1 asm ("1");
-       int ccode;
+       int cc;
 
-       reg0 = command;
-       reg1 = (unsigned long)parm;
        asm volatile(
-               "       .long 0xb2f01000\n"
-               "       ipm     %0\n"
-               "       srl     %0,28\n"
-               : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1)
-               :  "m" (*parm) : "cc");
-       return ccode;
+               "       lgr     0,%[reg0]\n"
+               "       lgr     1,%[reg1]\n"
+               "       .long   0xb2f01000\n"
+               "       ipm     %[cc]\n"
+               "       srl     %[cc],28\n"
+               : [cc] "=&d" (cc), "+m" (*parm)
+               : [reg0] "d" ((unsigned long)command),
+                 [reg1] "d" ((unsigned long)parm)
+               : "cc", "0", "1");
+       return cc;
 }
 
 static inline int iucv_call_b2f0(int command, union iucv_param *parm)
@@ -319,19 +319,21 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm)
  */
 static int __iucv_query_maxconn(void *param, unsigned long *max_pathid)
 {
-       register unsigned long reg0 asm ("0");
-       register unsigned long reg1 asm ("1");
-       int ccode;
+       unsigned long reg1 = (unsigned long)param;
+       int cc;
 
-       reg0 = IUCV_QUERY;
-       reg1 = (unsigned long) param;
        asm volatile (
+               "       lghi    0,%[cmd]\n"
+               "       lgr     1,%[reg1]\n"
                "       .long   0xb2f01000\n"
-               "       ipm     %0\n"
-               "       srl     %0,28\n"
-               : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc");
+               "       ipm     %[cc]\n"
+               "       srl     %[cc],28\n"
+               "       lgr     %[reg1],1\n"
+               : [cc] "=&d" (cc), [reg1] "+&d" (reg1)
+               : [cmd] "K" (IUCV_QUERY)
+               : "cc", "0", "1");
        *max_pathid = reg1;
-       return ccode;
+       return cc;
 }
 
 static int iucv_query_maxconn(void)
@@ -500,14 +502,14 @@ static void iucv_setmask_mp(void)
 {
        int cpu;
 
-       get_online_cpus();
+       cpus_read_lock();
        for_each_online_cpu(cpu)
                /* Enable all cpus with a declared buffer. */
                if (cpumask_test_cpu(cpu, &iucv_buffer_cpumask) &&
                    !cpumask_test_cpu(cpu, &iucv_irq_cpumask))
                        smp_call_function_single(cpu, iucv_allow_cpu,
                                                 NULL, 1);
-       put_online_cpus();
+       cpus_read_unlock();
 }
 
 /**
@@ -540,7 +542,7 @@ static int iucv_enable(void)
        size_t alloc_size;
        int cpu, rc;
 
-       get_online_cpus();
+       cpus_read_lock();
        rc = -ENOMEM;
        alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
        iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
@@ -553,12 +555,12 @@ static int iucv_enable(void)
        if (cpumask_empty(&iucv_buffer_cpumask))
                /* No cpu could declare an iucv buffer. */
                goto out;
-       put_online_cpus();
+       cpus_read_unlock();
        return 0;
 out:
        kfree(iucv_path_table);
        iucv_path_table = NULL;
-       put_online_cpus();
+       cpus_read_unlock();
        return rc;
 }
 
@@ -571,11 +573,11 @@ out:
  */
 static void iucv_disable(void)
 {
-       get_online_cpus();
+       cpus_read_lock();
        on_each_cpu(iucv_retrieve_cpu, NULL, 1);
        kfree(iucv_path_table);
        iucv_path_table = NULL;
-       put_online_cpus();
+       cpus_read_unlock();
 }
 
 static int iucv_cpu_dead(unsigned int cpu)
@@ -784,7 +786,7 @@ static int iucv_reboot_event(struct notifier_block *this,
        if (cpumask_empty(&iucv_irq_cpumask))
                return NOTIFY_DONE;
 
-       get_online_cpus();
+       cpus_read_lock();
        on_each_cpu_mask(&iucv_irq_cpumask, iucv_block_cpu, NULL, 1);
        preempt_disable();
        for (i = 0; i < iucv_max_pathid; i++) {
@@ -792,7 +794,7 @@ static int iucv_reboot_event(struct notifier_block *this,
                        iucv_sever_pathid(i, NULL);
        }
        preempt_enable();
-       put_online_cpus();
+       cpus_read_unlock();
        iucv_disable();
        return NOTIFY_DONE;
 }
index ac5cadd..3086f4a 100644 (file)
@@ -224,8 +224,7 @@ static int llc_ui_release(struct socket *sock)
        } else {
                release_sock(sk);
        }
-       if (llc->dev)
-               dev_put(llc->dev);
+       dev_put(llc->dev);
        sock_put(sk);
        llc_sk_free(sk);
 out:
@@ -363,8 +362,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
        } else
                llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
                                           addr->sllc_mac);
-       if (llc->dev)
-               dev_hold(llc->dev);
+       dev_hold(llc->dev);
        rcu_read_unlock();
        if (!llc->dev)
                goto out;
index 4e6f11e..d69b31c 100644 (file)
@@ -828,9 +828,11 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
        return ret;
 }
 
-static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
-                                   const u8 *resp, size_t resp_len,
-                                   const struct ieee80211_csa_settings *csa)
+static int
+ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
+                        const u8 *resp, size_t resp_len,
+                        const struct ieee80211_csa_settings *csa,
+                        const struct ieee80211_color_change_settings *cca)
 {
        struct probe_resp *new, *old;
 
@@ -850,6 +852,8 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
                memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_presp,
                       csa->n_counter_offsets_presp *
                       sizeof(new->cntdwn_counter_offsets[0]));
+       else if (cca)
+               new->cntdwn_counter_offsets[0] = cca->counter_offset_presp;
 
        rcu_assign_pointer(sdata->u.ap.probe_resp, new);
        if (old)
@@ -955,7 +959,8 @@ static int ieee80211_set_ftm_responder_params(
 
 static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
                                   struct cfg80211_beacon_data *params,
-                                  const struct ieee80211_csa_settings *csa)
+                                  const struct ieee80211_csa_settings *csa,
+                                  const struct ieee80211_color_change_settings *cca)
 {
        struct beacon_data *new, *old;
        int new_head_len, new_tail_len;
@@ -1004,6 +1009,9 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
                memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_beacon,
                       csa->n_counter_offsets_beacon *
                       sizeof(new->cntdwn_counter_offsets[0]));
+       } else if (cca) {
+               new->cntdwn_current_counter = cca->count;
+               new->cntdwn_counter_offsets[0] = cca->counter_offset_beacon;
        }
 
        /* copy in head */
@@ -1020,7 +1028,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
                        memcpy(new->tail, old->tail, new_tail_len);
 
        err = ieee80211_set_probe_resp(sdata, params->probe_resp,
-                                      params->probe_resp_len, csa);
+                                      params->probe_resp_len, csa, cca);
        if (err < 0) {
                kfree(new);
                return err;
@@ -1175,7 +1183,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
        if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
                sdata->vif.bss_conf.beacon_tx_rate = params->beacon_rate;
 
-       err = ieee80211_assign_beacon(sdata, &params->beacon, NULL);
+       err = ieee80211_assign_beacon(sdata, &params->beacon, NULL, NULL);
        if (err < 0)
                goto error;
        changed |= err;
@@ -1230,17 +1238,17 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
        sdata = IEEE80211_DEV_TO_SUB_IF(dev);
        sdata_assert_lock(sdata);
 
-       /* don't allow changing the beacon while CSA is in place - offset
+       /* don't allow changing the beacon while a countdown is in place - offset
         * of channel switch counter may change
         */
-       if (sdata->vif.csa_active)
+       if (sdata->vif.csa_active || sdata->vif.color_change_active)
                return -EBUSY;
 
        old = sdata_dereference(sdata->u.ap.beacon, sdata);
        if (!old)
                return -ENOENT;
 
-       err = ieee80211_assign_beacon(sdata, params, NULL);
+       err = ieee80211_assign_beacon(sdata, params, NULL, NULL);
        if (err < 0)
                return err;
        ieee80211_bss_info_change_notify(sdata, err);
@@ -3156,7 +3164,7 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
        switch (sdata->vif.type) {
        case NL80211_IFTYPE_AP:
                err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon,
-                                             NULL);
+                                             NULL, NULL);
                kfree(sdata->u.ap.next_beacon);
                sdata->u.ap.next_beacon = NULL;
 
@@ -3322,7 +3330,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
                csa.n_counter_offsets_presp = params->n_counter_offsets_presp;
                csa.count = params->count;
 
-               err = ieee80211_assign_beacon(sdata, &params->beacon_csa, &csa);
+               err = ieee80211_assign_beacon(sdata, &params->beacon_csa, &csa, NULL);
                if (err < 0) {
                        kfree(sdata->u.ap.next_beacon);
                        return err;
@@ -3411,6 +3419,15 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
        return 0;
 }
 
+static void ieee80211_color_change_abort(struct ieee80211_sub_if_data  *sdata)
+{
+       sdata->vif.color_change_active = false;
+       kfree(sdata->u.ap.next_beacon);
+       sdata->u.ap.next_beacon = NULL;
+
+       cfg80211_color_change_aborted_notify(sdata->dev);
+}
+
 static int
 __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
                           struct cfg80211_csa_settings *params)
@@ -3479,6 +3496,10 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
                goto out;
        }
 
+       /* if there is a color change in progress, abort it */
+       if (sdata->vif.color_change_active)
+               ieee80211_color_change_abort(sdata);
+
        err = ieee80211_set_csa_beacon(sdata, params, &changed);
        if (err) {
                ieee80211_vif_unreserve_chanctx(sdata);
@@ -4130,6 +4151,196 @@ static int ieee80211_set_sar_specs(struct wiphy *wiphy,
        return local->ops->set_sar_specs(&local->hw, sar);
 }
 
+static int
+ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
+                                       u32 *changed)
+{
+       switch (sdata->vif.type) {
+       case NL80211_IFTYPE_AP: {
+               int ret;
+
+               ret = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon,
+                                             NULL, NULL);
+               kfree(sdata->u.ap.next_beacon);
+               sdata->u.ap.next_beacon = NULL;
+
+               if (ret < 0)
+                       return ret;
+
+               *changed |= ret;
+               break;
+       }
+       default:
+               WARN_ON_ONCE(1);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int
+ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
+                                 struct cfg80211_color_change_settings *params,
+                                 u32 *changed)
+{
+       struct ieee80211_color_change_settings color_change = {};
+       int err;
+
+       switch (sdata->vif.type) {
+       case NL80211_IFTYPE_AP:
+               sdata->u.ap.next_beacon =
+                       cfg80211_beacon_dup(&params->beacon_next);
+               if (!sdata->u.ap.next_beacon)
+                       return -ENOMEM;
+
+               if (params->count <= 1)
+                       break;
+
+               color_change.counter_offset_beacon =
+                       params->counter_offset_beacon;
+               color_change.counter_offset_presp =
+                       params->counter_offset_presp;
+               color_change.count = params->count;
+
+               err = ieee80211_assign_beacon(sdata, &params->beacon_color_change,
+                                             NULL, &color_change);
+               if (err < 0) {
+                       kfree(sdata->u.ap.next_beacon);
+                       return err;
+               }
+               *changed |= err;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static void
+ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
+                                        u8 color, int enable, u32 changed)
+{
+       sdata->vif.bss_conf.he_bss_color.color = color;
+       sdata->vif.bss_conf.he_bss_color.enabled = enable;
+       changed |= BSS_CHANGED_HE_BSS_COLOR;
+
+       ieee80211_bss_info_change_notify(sdata, changed);
+}
+
+static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
+{
+       struct ieee80211_local *local = sdata->local;
+       u32 changed = 0;
+       int err;
+
+       sdata_assert_lock(sdata);
+       lockdep_assert_held(&local->mtx);
+
+       sdata->vif.color_change_active = false;
+
+       err = ieee80211_set_after_color_change_beacon(sdata, &changed);
+       if (err) {
+               cfg80211_color_change_aborted_notify(sdata->dev);
+               return err;
+       }
+
+       ieee80211_color_change_bss_config_notify(sdata,
+                                                sdata->vif.color_change_color,
+                                                1, changed);
+       cfg80211_color_change_notify(sdata->dev);
+
+       return 0;
+}
+
+void ieee80211_color_change_finalize_work(struct work_struct *work)
+{
+       struct ieee80211_sub_if_data *sdata =
+               container_of(work, struct ieee80211_sub_if_data,
+                            color_change_finalize_work);
+       struct ieee80211_local *local = sdata->local;
+
+       sdata_lock(sdata);
+       mutex_lock(&local->mtx);
+
+       /* AP might have been stopped while waiting for the lock. */
+       if (!sdata->vif.color_change_active)
+               goto unlock;
+
+       if (!ieee80211_sdata_running(sdata))
+               goto unlock;
+
+       ieee80211_color_change_finalize(sdata);
+
+unlock:
+       mutex_unlock(&local->mtx);
+       sdata_unlock(sdata);
+}
+
+void ieee80211_color_change_finish(struct ieee80211_vif *vif)
+{
+       struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+       ieee80211_queue_work(&sdata->local->hw,
+                            &sdata->color_change_finalize_work);
+}
+EXPORT_SYMBOL_GPL(ieee80211_color_change_finish);
+
+void
+ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
+                                      u64 color_bitmap)
+{
+       struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+       if (sdata->vif.color_change_active || sdata->vif.csa_active)
+               return;
+
+       cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap);
+}
+EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify);
+
+static int
+ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
+                      struct cfg80211_color_change_settings *params)
+{
+       struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+       struct ieee80211_local *local = sdata->local;
+       u32 changed = 0;
+       int err;
+
+       sdata_assert_lock(sdata);
+
+       mutex_lock(&local->mtx);
+
+       /* don't allow another color change if one is already active or if csa
+        * is active
+        */
+       if (sdata->vif.color_change_active || sdata->vif.csa_active) {
+               err = -EBUSY;
+               goto out;
+       }
+
+       err = ieee80211_set_color_change_beacon(sdata, params, &changed);
+       if (err)
+               goto out;
+
+       sdata->vif.color_change_active = true;
+       sdata->vif.color_change_color = params->color;
+
+       cfg80211_color_change_started_notify(sdata->dev, params->count);
+
+       if (changed)
+               ieee80211_color_change_bss_config_notify(sdata, 0, 0, changed);
+       else
+               /* if the beacon didn't change, we can finalize immediately */
+               ieee80211_color_change_finalize(sdata);
+
+out:
+       mutex_unlock(&local->mtx);
+
+       return err;
+}
+
 const struct cfg80211_ops mac80211_config_ops = {
        .add_virtual_intf = ieee80211_add_iface,
        .del_virtual_intf = ieee80211_del_iface,
@@ -4233,4 +4444,5 @@ const struct cfg80211_ops mac80211_config_ops = {
        .set_tid_config = ieee80211_set_tid_config,
        .reset_tid_config = ieee80211_reset_tid_config,
        .set_sar_specs = ieee80211_set_sar_specs,
+       .color_change = ieee80211_color_change,
 };
index bcb7cc0..cd3731c 100644 (file)
@@ -1447,4 +1447,40 @@ static inline void drv_sta_set_decap_offload(struct ieee80211_local *local,
        trace_drv_return_void(local);
 }
 
+static inline void drv_add_twt_setup(struct ieee80211_local *local,
+                                    struct ieee80211_sub_if_data *sdata,
+                                    struct ieee80211_sta *sta,
+                                    struct ieee80211_twt_setup *twt)
+{
+       struct ieee80211_twt_params *twt_agrt;
+
+       might_sleep();
+
+       if (!check_sdata_in_driver(sdata))
+               return;
+
+       twt_agrt = (void *)twt->params;
+
+       trace_drv_add_twt_setup(local, sta, twt, twt_agrt);
+       local->ops->add_twt_setup(&local->hw, sta, twt);
+       trace_drv_return_void(local);
+}
+
+static inline void drv_twt_teardown_request(struct ieee80211_local *local,
+                                           struct ieee80211_sub_if_data *sdata,
+                                           struct ieee80211_sta *sta,
+                                           u8 flowid)
+{
+       might_sleep();
+       if (!check_sdata_in_driver(sdata))
+               return;
+
+       if (!local->ops->twt_teardown_request)
+               return;
+
+       trace_drv_twt_teardown_request(local, sta, flowid);
+       local->ops->twt_teardown_request(&local->hw, sta, flowid);
+       trace_drv_return_void(local);
+}
+
 #endif /* __MAC80211_DRIVER_OPS */
index a7ac53a..5d6ca4c 100644 (file)
@@ -489,7 +489,6 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
        const struct cfg80211_bss_ies *ies;
        u16 capability = WLAN_CAPABILITY_IBSS;
        u64 tsf;
-       int ret = 0;
 
        sdata_assert_lock(sdata);
 
@@ -501,10 +500,8 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
                                ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS,
                                IEEE80211_PRIVACY(ifibss->privacy));
 
-       if (WARN_ON(!cbss)) {
-               ret = -EINVAL;
-               goto out;
-       }
+       if (WARN_ON(!cbss))
+               return -EINVAL;
 
        rcu_read_lock();
        ies = rcu_dereference(cbss->ies);
@@ -520,18 +517,14 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
                                           sdata->vif.bss_conf.basic_rates,
                                           capability, tsf, &ifibss->chandef,
                                           NULL, csa_settings);
-       if (!presp) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       if (!presp)
+               return -ENOMEM;
 
        rcu_assign_pointer(ifibss->presp, presp);
        if (old_presp)
                kfree_rcu(old_presp, rcu_head);
 
        return BSS_CHANGED_BEACON;
- out:
-       return ret;
 }
 
 int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
index 30ce6d2..159af6c 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/leds.h>
 #include <linux/idr.h>
 #include <linux/rhashtable.h>
+#include <linux/rbtree.h>
 #include <net/ieee80211_radiotap.h>
 #include <net/cfg80211.h>
 #include <net/mac80211.h>
@@ -244,6 +245,12 @@ struct ieee80211_csa_settings {
        u8 count;
 };
 
+struct ieee80211_color_change_settings {
+       u16 counter_offset_beacon;
+       u16 counter_offset_presp;
+       u8 count;
+};
+
 struct beacon_data {
        u8 *head, *tail;
        int head_len, tail_len;
@@ -923,6 +930,8 @@ struct ieee80211_sub_if_data {
        bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
        struct cfg80211_chan_def csa_chandef;
 
+       struct work_struct color_change_finalize_work;
+
        struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */
        struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */
 
@@ -937,6 +946,7 @@ struct ieee80211_sub_if_data {
 
        struct work_struct work;
        struct sk_buff_head skb_queue;
+       struct sk_buff_head status_queue;
 
        u8 needed_rx_chains;
        enum ieee80211_smps_mode smps_mode;
@@ -1524,6 +1534,7 @@ struct ieee802_11_elems {
        const struct ieee80211_he_spr *he_spr;
        const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
        const struct ieee80211_he_6ghz_capa *he_6ghz_capa;
+       const struct ieee80211_tx_pwr_env *tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT];
        const u8 *uora_element;
        const u8 *mesh_id;
        const u8 *peering;
@@ -1574,6 +1585,8 @@ struct ieee802_11_elems {
        u8 perr_len;
        u8 country_elem_len;
        u8 bssid_index_len;
+       u8 tx_pwr_env_len[IEEE80211_TPE_MAX_IE_COUNT];
+       u8 tx_pwr_env_num;
 
        /* whether a parse error occurred while retrieving these elements */
        bool parse_error;
@@ -1887,6 +1900,9 @@ void ieee80211_csa_finalize_work(struct work_struct *work);
 int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
                             struct cfg80211_csa_settings *params);
 
+/* color change handling */
+void ieee80211_color_change_finalize_work(struct work_struct *work);
+
 /* interface handling */
 #define MAC80211_SUPPORTED_FEATURES_TX (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \
                                         NETIF_F_HW_CSUM | NETIF_F_SG | \
@@ -2068,6 +2084,11 @@ ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
 
 /* S1G */
 void ieee80211_s1g_sta_rate_init(struct sta_info *sta);
+bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb);
+void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata,
+                                struct sk_buff *skb);
+void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
+                                    struct sk_buff *skb);
 
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
index 1e5e9fc..62c9559 100644 (file)
@@ -462,6 +462,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
        sdata_unlock(sdata);
 
        cancel_work_sync(&sdata->csa_finalize_work);
+       cancel_work_sync(&sdata->color_change_finalize_work);
 
        cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
 
@@ -551,6 +552,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
                 */
                ieee80211_free_keys(sdata, true);
                skb_queue_purge(&sdata->skb_queue);
+               skb_queue_purge(&sdata->status_queue);
        }
 
        spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
@@ -983,6 +985,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
        }
 
        skb_queue_head_init(&sdata->skb_queue);
+       skb_queue_head_init(&sdata->status_queue);
        INIT_WORK(&sdata->work, ieee80211_iface_work);
 
        return 0;
@@ -1381,6 +1384,16 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
                        WARN_ON(1);
                        break;
                }
+       } else if (ieee80211_is_action(mgmt->frame_control) &&
+                  mgmt->u.action.category == WLAN_CATEGORY_S1G) {
+               switch (mgmt->u.action.u.s1g.action_code) {
+               case WLAN_S1G_TWT_TEARDOWN:
+               case WLAN_S1G_TWT_SETUP:
+                       ieee80211_s1g_rx_twt_action(sdata, skb);
+                       break;
+               default:
+                       break;
+               }
        } else if (ieee80211_is_ext(mgmt->frame_control)) {
                if (sdata->vif.type == NL80211_IFTYPE_STATION)
                        ieee80211_sta_rx_queued_ext(sdata, skb);
@@ -1436,6 +1449,24 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local,
        }
 }
 
+static void ieee80211_iface_process_status(struct ieee80211_sub_if_data *sdata,
+                                          struct sk_buff *skb)
+{
+       struct ieee80211_mgmt *mgmt = (void *)skb->data;
+
+       if (ieee80211_is_action(mgmt->frame_control) &&
+           mgmt->u.action.category == WLAN_CATEGORY_S1G) {
+               switch (mgmt->u.action.u.s1g.action_code) {
+               case WLAN_S1G_TWT_TEARDOWN:
+               case WLAN_S1G_TWT_SETUP:
+                       ieee80211_s1g_status_twt_action(sdata, skb);
+                       break;
+               default:
+                       break;
+               }
+       }
+}
+
 static void ieee80211_iface_work(struct work_struct *work)
 {
        struct ieee80211_sub_if_data *sdata =
@@ -1465,6 +1496,16 @@ static void ieee80211_iface_work(struct work_struct *work)
                kcov_remote_stop();
        }
 
+       /* process status queue */
+       while ((skb = skb_dequeue(&sdata->status_queue))) {
+               kcov_remote_start_common(skb_get_kcov_handle(skb));
+
+               ieee80211_iface_process_status(sdata, skb);
+               kfree_skb(skb);
+
+               kcov_remote_stop();
+       }
+
        /* then other type-dependent work */
        switch (sdata->vif.type) {
        case NL80211_IFTYPE_STATION:
@@ -1528,9 +1569,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
        }
 
        skb_queue_head_init(&sdata->skb_queue);
+       skb_queue_head_init(&sdata->status_queue);
        INIT_WORK(&sdata->work, ieee80211_iface_work);
        INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work);
        INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work);
+       INIT_WORK(&sdata->color_change_finalize_work, ieee80211_color_change_finalize_work);
        INIT_LIST_HEAD(&sdata->assigned_chanctx_list);
        INIT_LIST_HEAD(&sdata->reserved_chanctx_list);
 
@@ -2001,9 +2044,16 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
                netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops);
 
-               /* MTU range: 256 - 2304 */
+               /* MTU range is normally 256 - 2304, where the upper limit is
+                * the maximum MSDU size. Monitor interfaces send and receive
+                * MPDU and A-MSDU frames which may be much larger so we do
+                * not impose an upper limit in that case.
+                */
                ndev->min_mtu = 256;
-               ndev->max_mtu = local->hw.max_mtu;
+               if (type == NL80211_IFTYPE_MONITOR)
+                       ndev->max_mtu = 0;
+               else
+                       ndev->max_mtu = local->hw.max_mtu;
 
                ret = cfg80211_register_netdevice(ndev);
                if (ret) {
index fcae76d..45fb517 100644 (file)
@@ -1020,7 +1020,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
                        iftd = &sband->iftype_data[i];
 
-                       supp_he = supp_he || (iftd && iftd->he_cap.has_he);
+                       supp_he = supp_he || iftd->he_cap.has_he;
                }
 
                /* HT, VHT, HE require QoS, thus >= 4 queues */
index 2563473..99ed68f 100644 (file)
@@ -359,7 +359,12 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 
        put_unaligned_le32(it_present_val, it_present);
 
-       pos = (void *)(it_present + 1);
+       /* This references through an offset into it_optional[] rather
+        * than via it_present otherwise later uses of pos will cause
+        * the compiler to think we have walked past the end of the
+        * struct member.
+        */
+       pos = (void *)&rthdr->it_optional[it_present - rthdr->it_optional];
 
        /* the order of the following fields is important */
 
@@ -372,7 +377,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
                        ieee80211_calculate_rx_timestamp(local, status,
                                                         mpdulen, 0),
                        pos);
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_TSFT));
                pos += 8;
        }
 
@@ -396,7 +401,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
                *pos = 0;
        } else {
                int shift = 0;
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE));
                if (status->bw == RATE_INFO_BW_10)
                        shift = 1;
                else if (status->bw == RATE_INFO_BW_5)
@@ -433,7 +438,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
            !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
                *pos = status->signal;
                rthdr->it_present |=
-                       cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL);
+                       cpu_to_le32(BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL));
                pos++;
        }
 
@@ -459,7 +464,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
        if (status->encoding == RX_ENC_HT) {
                unsigned int stbc;
 
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS));
                *pos++ = local->hw.radiotap_mcs_details;
                *pos = 0;
                if (status->enc_flags & RX_ENC_FLAG_SHORT_GI)
@@ -483,7 +488,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
                while ((pos - (u8 *)rthdr) & 3)
                        pos++;
                rthdr->it_present |=
-                       cpu_to_le32(1 << IEEE80211_RADIOTAP_AMPDU_STATUS);
+                       cpu_to_le32(BIT(IEEE80211_RADIOTAP_AMPDU_STATUS));
                put_unaligned_le32(status->ampdu_reference, pos);
                pos += 4;
                if (status->flag & RX_FLAG_AMPDU_LAST_KNOWN)
@@ -510,7 +515,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
        if (status->encoding == RX_ENC_VHT) {
                u16 known = local->hw.radiotap_vht_details;
 
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT));
                put_unaligned_le16(known, pos);
                pos += 2;
                /* flags */
@@ -554,7 +559,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
                u8 flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT;
 
                rthdr->it_present |=
-                       cpu_to_le32(1 << IEEE80211_RADIOTAP_TIMESTAMP);
+                       cpu_to_le32(BIT(IEEE80211_RADIOTAP_TIMESTAMP));
 
                /* ensure 8 byte alignment */
                while ((pos - (u8 *)rthdr) & 7)
@@ -642,7 +647,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
                /* ensure 2 byte alignment */
                while ((pos - (u8 *)rthdr) & 1)
                        pos++;
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE));
                memcpy(pos, &he, sizeof(he));
                pos += sizeof(he);
        }
@@ -652,14 +657,14 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
                /* ensure 2 byte alignment */
                while ((pos - (u8 *)rthdr) & 1)
                        pos++;
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE_MU);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE_MU));
                memcpy(pos, &he_mu, sizeof(he_mu));
                pos += sizeof(he_mu);
        }
 
        if (status->flag & RX_FLAG_NO_PSDU) {
                rthdr->it_present |=
-                       cpu_to_le32(1 << IEEE80211_RADIOTAP_ZERO_LEN_PSDU);
+                       cpu_to_le32(BIT(IEEE80211_RADIOTAP_ZERO_LEN_PSDU));
                *pos++ = status->zero_length_psdu_type;
        }
 
@@ -667,7 +672,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
                /* ensure 2 byte alignment */
                while ((pos - (u8 *)rthdr) & 1)
                        pos++;
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_LSIG);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_LSIG));
                memcpy(pos, &lsig, sizeof(lsig));
                pos += sizeof(lsig);
        }
@@ -3207,6 +3212,68 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
        return RX_CONTINUE;
 }
 
+static bool
+ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx)
+{
+       struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)rx->skb->data;
+       struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
+       struct ieee80211_sub_if_data *sdata = rx->sdata;
+       const struct ieee80211_sta_he_cap *hecap;
+       struct ieee80211_supported_band *sband;
+
+       /* TWT actions are only supported in AP for the moment */
+       if (sdata->vif.type != NL80211_IFTYPE_AP)
+               return false;
+
+       if (!rx->local->ops->add_twt_setup)
+               return false;
+
+       sband = rx->local->hw.wiphy->bands[status->band];
+       hecap = ieee80211_get_he_iftype_cap(sband,
+                                           ieee80211_vif_type_p2p(&sdata->vif));
+       if (!hecap)
+               return false;
+
+       if (!(hecap->he_cap_elem.mac_cap_info[0] &
+             IEEE80211_HE_MAC_CAP0_TWT_RES))
+               return false;
+
+       if (!rx->sta)
+               return false;
+
+       switch (mgmt->u.action.u.s1g.action_code) {
+       case WLAN_S1G_TWT_SETUP: {
+               struct ieee80211_twt_setup *twt;
+
+               if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE +
+                                  1 + /* action code */
+                                  sizeof(struct ieee80211_twt_setup) +
+                                  2 /* TWT req_type agrt */)
+                       break;
+
+               twt = (void *)mgmt->u.action.u.s1g.variable;
+               if (twt->element_id != WLAN_EID_S1G_TWT)
+                       break;
+
+               if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE +
+                                  4 + /* action code + token + tlv */
+                                  twt->length)
+                       break;
+
+               return true; /* queue the frame */
+       }
+       case WLAN_S1G_TWT_TEARDOWN:
+               if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + 2)
+                       break;
+
+               return true; /* queue the frame */
+       default:
+               break;
+       }
+
+       return false;
+}
+
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 {
@@ -3486,6 +3553,17 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
                    !mesh_path_sel_is_hwmp(sdata))
                        break;
                goto queue;
+       case WLAN_CATEGORY_S1G:
+               switch (mgmt->u.action.u.s1g.action_code) {
+               case WLAN_S1G_TWT_SETUP:
+               case WLAN_S1G_TWT_TEARDOWN:
+                       if (ieee80211_process_rx_twt_action(rx))
+                               goto queue;
+                       break;
+               default:
+                       break;
+               }
+               break;
        }
 
        return RX_CONTINUE;
index c33f332..7e35ab5 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/ieee80211.h>
 #include <net/mac80211.h>
 #include "ieee80211_i.h"
+#include "driver-ops.h"
 
 void ieee80211_s1g_sta_rate_init(struct sta_info *sta)
 {
@@ -14,3 +15,182 @@ void ieee80211_s1g_sta_rate_init(struct sta_info *sta)
        sta->rx_stats.last_rate =
                        STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_S1G);
 }
+
+bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb)
+{
+       struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+
+       if (likely(!ieee80211_is_action(mgmt->frame_control)))
+               return false;
+
+       if (likely(mgmt->u.action.category != WLAN_CATEGORY_S1G))
+               return false;
+
+       return mgmt->u.action.u.s1g.action_code == WLAN_S1G_TWT_SETUP;
+}
+
+static void
+ieee80211_s1g_send_twt_setup(struct ieee80211_sub_if_data *sdata, const u8 *da,
+                            const u8 *bssid, struct ieee80211_twt_setup *twt)
+{
+       int len = IEEE80211_MIN_ACTION_SIZE + 4 + twt->length;
+       struct ieee80211_local *local = sdata->local;
+       struct ieee80211_mgmt *mgmt;
+       struct sk_buff *skb;
+
+       skb = dev_alloc_skb(local->hw.extra_tx_headroom + len);
+       if (!skb)
+               return;
+
+       skb_reserve(skb, local->hw.extra_tx_headroom);
+       mgmt = skb_put_zero(skb, len);
+       mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+                                         IEEE80211_STYPE_ACTION);
+       memcpy(mgmt->da, da, ETH_ALEN);
+       memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+       memcpy(mgmt->bssid, bssid, ETH_ALEN);
+
+       mgmt->u.action.category = WLAN_CATEGORY_S1G;
+       mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_SETUP;
+       memcpy(mgmt->u.action.u.s1g.variable, twt, 3 + twt->length);
+
+       IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
+                                       IEEE80211_TX_INTFL_MLME_CONN_TX |
+                                       IEEE80211_TX_CTL_REQ_TX_STATUS;
+       ieee80211_tx_skb(sdata, skb);
+}
+
+static void
+ieee80211_s1g_send_twt_teardown(struct ieee80211_sub_if_data *sdata,
+                               const u8 *da, const u8 *bssid, u8 flowid)
+{
+       struct ieee80211_local *local = sdata->local;
+       struct ieee80211_mgmt *mgmt;
+       struct sk_buff *skb;
+       u8 *id;
+
+       skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+                           IEEE80211_MIN_ACTION_SIZE + 2);
+       if (!skb)
+               return;
+
+       skb_reserve(skb, local->hw.extra_tx_headroom);
+       mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE + 2);
+       mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+                                         IEEE80211_STYPE_ACTION);
+       memcpy(mgmt->da, da, ETH_ALEN);
+       memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+       memcpy(mgmt->bssid, bssid, ETH_ALEN);
+
+       mgmt->u.action.category = WLAN_CATEGORY_S1G;
+       mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_TEARDOWN;
+       id = (u8 *)mgmt->u.action.u.s1g.variable;
+       *id = flowid;
+
+       IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
+                                       IEEE80211_TX_CTL_REQ_TX_STATUS;
+       ieee80211_tx_skb(sdata, skb);
+}
+
+static void
+ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata,
+                          struct sta_info *sta, struct sk_buff *skb)
+{
+       struct ieee80211_mgmt *mgmt = (void *)skb->data;
+       struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable;
+       struct ieee80211_twt_params *twt_agrt = (void *)twt->params;
+
+       twt_agrt->req_type &= cpu_to_le16(~IEEE80211_TWT_REQTYPE_REQUEST);
+
+       /* broadcast TWT not supported yet */
+       if (twt->control & IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST) {
+               le16p_replace_bits(&twt_agrt->req_type,
+                                  TWT_SETUP_CMD_REJECT,
+                                  IEEE80211_TWT_REQTYPE_SETUP_CMD);
+               goto out;
+       }
+
+       drv_add_twt_setup(sdata->local, sdata, &sta->sta, twt);
+out:
+       ieee80211_s1g_send_twt_setup(sdata, mgmt->sa, sdata->vif.addr, twt);
+}
+
+static void
+ieee80211_s1g_rx_twt_teardown(struct ieee80211_sub_if_data *sdata,
+                             struct sta_info *sta, struct sk_buff *skb)
+{
+       struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+
+       drv_twt_teardown_request(sdata->local, sdata, &sta->sta,
+                                mgmt->u.action.u.s1g.variable[0]);
+}
+
+static void
+ieee80211_s1g_tx_twt_setup_fail(struct ieee80211_sub_if_data *sdata,
+                               struct sta_info *sta, struct sk_buff *skb)
+{
+       struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+       struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable;
+       struct ieee80211_twt_params *twt_agrt = (void *)twt->params;
+       u8 flowid = le16_get_bits(twt_agrt->req_type,
+                                 IEEE80211_TWT_REQTYPE_FLOWID);
+
+       drv_twt_teardown_request(sdata->local, sdata, &sta->sta, flowid);
+
+       ieee80211_s1g_send_twt_teardown(sdata, mgmt->sa, sdata->vif.addr,
+                                       flowid);
+}
+
+void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata,
+                                struct sk_buff *skb)
+{
+       struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+       struct ieee80211_local *local = sdata->local;
+       struct sta_info *sta;
+
+       mutex_lock(&local->sta_mtx);
+
+       sta = sta_info_get_bss(sdata, mgmt->sa);
+       if (!sta)
+               goto out;
+
+       switch (mgmt->u.action.u.s1g.action_code) {
+       case WLAN_S1G_TWT_SETUP:
+               ieee80211_s1g_rx_twt_setup(sdata, sta, skb);
+               break;
+       case WLAN_S1G_TWT_TEARDOWN:
+               ieee80211_s1g_rx_twt_teardown(sdata, sta, skb);
+               break;
+       default:
+               break;
+       }
+
+out:
+       mutex_unlock(&local->sta_mtx);
+}
+
+void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
+                                    struct sk_buff *skb)
+{
+       struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+       struct ieee80211_local *local = sdata->local;
+       struct sta_info *sta;
+
+       mutex_lock(&local->sta_mtx);
+
+       sta = sta_info_get_bss(sdata, mgmt->da);
+       if (!sta)
+               goto out;
+
+       switch (mgmt->u.action.u.s1g.action_code) {
+       case WLAN_S1G_TWT_SETUP:
+               /* process failed twt setup frames */
+               ieee80211_s1g_tx_twt_setup_fail(sdata, sta, skb);
+               break;
+       default:
+               break;
+       }
+
+out:
+       mutex_unlock(&local->sta_mtx);
+}
index a5505ee..2b5acb3 100644 (file)
@@ -543,7 +543,7 @@ static int sta_info_insert_check(struct sta_info *sta)
                return -ENETDOWN;
 
        if (WARN_ON(ether_addr_equal(sta->sta.addr, sdata->vif.addr) ||
-                   is_multicast_ether_addr(sta->sta.addr)))
+                   !is_valid_ether_addr(sta->sta.addr)))
                return -EINVAL;
 
        /* The RCU read lock is required by rhashtable due to
index bae321f..f6f63a0 100644 (file)
@@ -305,8 +305,8 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
        memset(rthdr, 0, rtap_len);
        rthdr->it_len = cpu_to_le16(rtap_len);
        rthdr->it_present =
-               cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) |
-                           (1 << IEEE80211_RADIOTAP_DATA_RETRIES));
+               cpu_to_le32(BIT(IEEE80211_RADIOTAP_TX_FLAGS) |
+                           BIT(IEEE80211_RADIOTAP_DATA_RETRIES));
        pos = (unsigned char *)(rthdr + 1);
 
        /*
@@ -331,7 +331,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
                        sband->bitrates[info->status.rates[0].idx].bitrate;
 
        if (legacy_rate) {
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE));
                *pos = DIV_ROUND_UP(legacy_rate, 5 * (1 << shift));
                /* padding for tx flags */
                pos += 2;
@@ -358,7 +358,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
 
        if (status && status->rate &&
            (status->rate->flags & RATE_INFO_FLAGS_MCS)) {
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS));
                pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
                         IEEE80211_RADIOTAP_MCS_HAVE_GI |
                         IEEE80211_RADIOTAP_MCS_HAVE_BW;
@@ -374,7 +374,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
                        (IEEE80211_RADIOTAP_VHT_KNOWN_GI |
                         IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
 
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT));
 
                /* required alignment from rthdr */
                pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
@@ -419,7 +419,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
                   (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)) {
                struct ieee80211_radiotap_he *he;
 
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE));
 
                /* required alignment from rthdr */
                pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
@@ -495,7 +495,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
        /* IEEE80211_RADIOTAP_MCS
         * IEEE80211_RADIOTAP_VHT */
        if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS) {
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS));
                pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
                         IEEE80211_RADIOTAP_MCS_HAVE_GI |
                         IEEE80211_RADIOTAP_MCS_HAVE_BW;
@@ -512,7 +512,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
                        (IEEE80211_RADIOTAP_VHT_KNOWN_GI |
                         IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
 
-               rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+               rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT));
 
                /* required alignment from rthdr */
                pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
@@ -705,13 +705,26 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
                        /* Check to see if packet is a TDLS teardown packet */
                        if (ieee80211_is_data(hdr->frame_control) &&
                            (ieee80211_get_tdls_action(skb, hdr_size) ==
-                            WLAN_TDLS_TEARDOWN))
+                            WLAN_TDLS_TEARDOWN)) {
                                ieee80211_tdls_td_tx_handle(local, sdata, skb,
                                                            info->flags);
-                       else
+                       } else if (ieee80211_s1g_is_twt_setup(skb)) {
+                               if (!acked) {
+                                       struct sk_buff *qskb;
+
+                                       qskb = skb_clone(skb, GFP_ATOMIC);
+                                       if (qskb) {
+                                               skb_queue_tail(&sdata->status_queue,
+                                                              qskb);
+                                               ieee80211_queue_work(&local->hw,
+                                                                    &sdata->work);
+                                       }
+                               }
+                       } else {
                                ieee80211_mgd_conn_tx_status(sdata,
                                                             hdr->frame_control,
                                                             acked);
+                       }
                }
 
                rcu_read_unlock();
index f6ef153..9e8381b 100644 (file)
@@ -2825,6 +2825,73 @@ DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload,
        TP_ARGS(local, sdata, sta, enabled)
 );
 
+TRACE_EVENT(drv_add_twt_setup,
+       TP_PROTO(struct ieee80211_local *local,
+                struct ieee80211_sta *sta,
+                struct ieee80211_twt_setup *twt,
+                struct ieee80211_twt_params *twt_agrt),
+
+       TP_ARGS(local, sta, twt, twt_agrt),
+
+       TP_STRUCT__entry(
+               LOCAL_ENTRY
+               STA_ENTRY
+               __field(u8, dialog_token)
+               __field(u8, control)
+               __field(__le16, req_type)
+               __field(__le64, twt)
+               __field(u8, duration)
+               __field(__le16, mantissa)
+               __field(u8, channel)
+       ),
+
+       TP_fast_assign(
+               LOCAL_ASSIGN;
+               STA_ASSIGN;
+               __entry->dialog_token = twt->dialog_token;
+               __entry->control = twt->control;
+               __entry->req_type = twt_agrt->req_type;
+               __entry->twt = twt_agrt->twt;
+               __entry->duration = twt_agrt->min_twt_dur;
+               __entry->mantissa = twt_agrt->mantissa;
+               __entry->channel = twt_agrt->channel;
+       ),
+
+       TP_printk(
+               LOCAL_PR_FMT STA_PR_FMT
+               " token:%d control:0x%02x req_type:0x%04x"
+               " twt:%llu duration:%d mantissa:%d channel:%d",
+               LOCAL_PR_ARG, STA_PR_ARG, __entry->dialog_token,
+               __entry->control, le16_to_cpu(__entry->req_type),
+               le64_to_cpu(__entry->twt), __entry->duration,
+               le16_to_cpu(__entry->mantissa), __entry->channel
+       )
+);
+
+TRACE_EVENT(drv_twt_teardown_request,
+       TP_PROTO(struct ieee80211_local *local,
+                struct ieee80211_sta *sta, u8 flowid),
+
+       TP_ARGS(local, sta, flowid),
+
+       TP_STRUCT__entry(
+               LOCAL_ENTRY
+               STA_ENTRY
+               __field(u8, flowid)
+       ),
+
+       TP_fast_assign(
+               LOCAL_ASSIGN;
+               STA_ASSIGN;
+               __entry->flowid = flowid;
+       ),
+
+       TP_printk(
+               LOCAL_PR_FMT STA_PR_FMT " flowid:%d",
+               LOCAL_PR_ARG, STA_PR_ARG, __entry->flowid
+       )
+);
+
 #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
index 8509778..2d1193e 100644 (file)
@@ -3242,7 +3242,9 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
        if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
                return true;
 
-       if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr)))
+       if (!ieee80211_amsdu_realloc_pad(local, skb,
+                                        sizeof(*amsdu_hdr) +
+                                        local->hw.extra_tx_headroom))
                return false;
 
        data = skb_push(skb, sizeof(*amsdu_hdr));
@@ -4782,11 +4784,11 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
 static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
                                        struct beacon_data *beacon)
 {
+       u8 *beacon_data, count, max_count = 1;
        struct probe_resp *resp;
-       u8 *beacon_data;
        size_t beacon_data_len;
+       u16 *bcn_offsets;
        int i;
-       u8 count = beacon->cntdwn_current_counter;
 
        switch (sdata->vif.type) {
        case NL80211_IFTYPE_AP:
@@ -4806,21 +4808,27 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
        }
 
        rcu_read_lock();
-       for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; ++i) {
-               resp = rcu_dereference(sdata->u.ap.probe_resp);
+       resp = rcu_dereference(sdata->u.ap.probe_resp);
+
+       bcn_offsets = beacon->cntdwn_counter_offsets;
+       count = beacon->cntdwn_current_counter;
+       if (sdata->vif.csa_active)
+               max_count = IEEE80211_MAX_CNTDWN_COUNTERS_NUM;
 
-               if (beacon->cntdwn_counter_offsets[i]) {
-                       if (WARN_ON_ONCE(beacon->cntdwn_counter_offsets[i] >=
-                                        beacon_data_len)) {
+       for (i = 0; i < max_count; ++i) {
+               if (bcn_offsets[i]) {
+                       if (WARN_ON_ONCE(bcn_offsets[i] >= beacon_data_len)) {
                                rcu_read_unlock();
                                return;
                        }
-
-                       beacon_data[beacon->cntdwn_counter_offsets[i]] = count;
+                       beacon_data[bcn_offsets[i]] = count;
                }
 
-               if (sdata->vif.type == NL80211_IFTYPE_AP && resp)
-                       resp->data[resp->cntdwn_counter_offsets[i]] = count;
+               if (sdata->vif.type == NL80211_IFTYPE_AP && resp) {
+                       u16 *resp_offsets = resp->cntdwn_counter_offsets;
+
+                       resp->data[resp_offsets[i]] = count;
+               }
        }
        rcu_read_unlock();
 }
@@ -5030,6 +5038,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
                        if (offs) {
                                offs->tim_offset = beacon->head_len;
                                offs->tim_length = skb->len - beacon->head_len;
+                               offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0];
 
                                /* for AP the csa offsets are from tail */
                                csa_off_base = skb->len;
index 05e9621..49cb96d 100644 (file)
@@ -1336,6 +1336,18 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                        elems->rsnx = pos;
                        elems->rsnx_len = elen;
                        break;
+               case WLAN_EID_TX_POWER_ENVELOPE:
+                       if (elen < 1 ||
+                           elen > sizeof(struct ieee80211_tx_pwr_env))
+                               break;
+
+                       if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env))
+                               break;
+
+                       elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos;
+                       elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen;
+                       elems->tx_pwr_env_num++;
+                       break;
                case WLAN_EID_EXTENSION:
                        ieee80211_parse_extension_element(calc_crc ?
                                                                &crc : NULL,
diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig
new file mode 100644 (file)
index 0000000..2cdf3d0
--- /dev/null
@@ -0,0 +1,13 @@
+
+menuconfig MCTP
+       depends on NET
+       tristate "MCTP core protocol support"
+       help
+         Management Component Transport Protocol (MCTP) is an in-system
+         protocol for communicating between management controllers and
+         their managed devices (peripherals, host processors, etc.). The
+         protocol is defined by DMTF specification DSP0236.
+
+         This option enables core MCTP support. For communicating with other
+         devices, you'll want to enable a driver for a specific hardware
+         channel.
diff --git a/net/mctp/Makefile b/net/mctp/Makefile
new file mode 100644 (file)
index 0000000..0171333
--- /dev/null
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_MCTP) += mctp.o
+mctp-objs := af_mctp.o device.o route.o neigh.o
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
new file mode 100644 (file)
index 0000000..a9526ac
--- /dev/null
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/if_arp.h>
+#include <linux/net.h>
+#include <linux/mctp.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/sock.h>
+
+/* socket implementation */
+
+static int mctp_release(struct socket *sock)
+{
+       struct sock *sk = sock->sk;
+
+       if (sk) {
+               sock->sk = NULL;
+               sk->sk_prot->close(sk, 0);
+       }
+
+       return 0;
+}
+
+static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+{
+       struct sock *sk = sock->sk;
+       struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+       struct sockaddr_mctp *smctp;
+       int rc;
+
+       if (addrlen < sizeof(*smctp))
+               return -EINVAL;
+
+       if (addr->sa_family != AF_MCTP)
+               return -EAFNOSUPPORT;
+
+       if (!capable(CAP_NET_BIND_SERVICE))
+               return -EACCES;
+
+       /* it's a valid sockaddr for MCTP, cast and do protocol checks */
+       smctp = (struct sockaddr_mctp *)addr;
+
+       lock_sock(sk);
+
+       /* TODO: allow rebind */
+       if (sk_hashed(sk)) {
+               rc = -EADDRINUSE;
+               goto out_release;
+       }
+       msk->bind_net = smctp->smctp_network;
+       msk->bind_addr = smctp->smctp_addr.s_addr;
+       msk->bind_type = smctp->smctp_type & 0x7f; /* ignore the IC bit */
+
+       rc = sk->sk_prot->hash(sk);
+
+out_release:
+       release_sock(sk);
+
+       return rc;
+}
+
+static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+{
+       DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
+       const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr);
+       int rc, addrlen = msg->msg_namelen;
+       struct sock *sk = sock->sk;
+       struct mctp_skb_cb *cb;
+       struct mctp_route *rt;
+       struct sk_buff *skb;
+
+       if (addr) {
+               if (addrlen < sizeof(struct sockaddr_mctp))
+                       return -EINVAL;
+               if (addr->smctp_family != AF_MCTP)
+                       return -EINVAL;
+               if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
+                       return -EINVAL;
+
+       } else {
+               /* TODO: connect()ed sockets */
+               return -EDESTADDRREQ;
+       }
+
+       if (!capable(CAP_NET_RAW))
+               return -EACCES;
+
+       if (addr->smctp_network == MCTP_NET_ANY)
+               addr->smctp_network = mctp_default_net(sock_net(sk));
+
+       rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
+                              addr->smctp_addr.s_addr);
+       if (!rt)
+               return -EHOSTUNREACH;
+
+       skb = sock_alloc_send_skb(sk, hlen + 1 + len,
+                                 msg->msg_flags & MSG_DONTWAIT, &rc);
+       if (!skb)
+               return rc;
+
+       skb_reserve(skb, hlen);
+
+       /* set type as fist byte in payload */
+       *(u8 *)skb_put(skb, 1) = addr->smctp_type;
+
+       rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len);
+       if (rc < 0) {
+               kfree_skb(skb);
+               return rc;
+       }
+
+       /* set up cb */
+       cb = __mctp_cb(skb);
+       cb->net = addr->smctp_network;
+
+       rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
+                              addr->smctp_tag);
+
+       return rc ? : len;
+}
+
+static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+                       int flags)
+{
+       DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
+       struct sock *sk = sock->sk;
+       struct sk_buff *skb;
+       size_t msglen;
+       u8 type;
+       int rc;
+
+       if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
+               return -EOPNOTSUPP;
+
+       skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc);
+       if (!skb)
+               return rc;
+
+       if (!skb->len) {
+               rc = 0;
+               goto out_free;
+       }
+
+       /* extract message type, remove from data */
+       type = *((u8 *)skb->data);
+       msglen = skb->len - 1;
+
+       if (len < msglen)
+               msg->msg_flags |= MSG_TRUNC;
+       else
+               len = msglen;
+
+       rc = skb_copy_datagram_msg(skb, 1, msg, len);
+       if (rc < 0)
+               goto out_free;
+
+       sock_recv_ts_and_drops(msg, sk, skb);
+
+       if (addr) {
+               struct mctp_skb_cb *cb = mctp_cb(skb);
+               /* TODO: expand mctp_skb_cb for header fields? */
+               struct mctp_hdr *hdr = mctp_hdr(skb);
+
+               addr = msg->msg_name;
+               addr->smctp_family = AF_MCTP;
+               addr->smctp_network = cb->net;
+               addr->smctp_addr.s_addr = hdr->src;
+               addr->smctp_type = type;
+               addr->smctp_tag = hdr->flags_seq_tag &
+                                       (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+               msg->msg_namelen = sizeof(*addr);
+       }
+
+       rc = len;
+
+       if (flags & MSG_TRUNC)
+               rc = msglen;
+
+out_free:
+       skb_free_datagram(sk, skb);
+       return rc;
+}
+
+static int mctp_setsockopt(struct socket *sock, int level, int optname,
+                          sockptr_t optval, unsigned int optlen)
+{
+       return -EINVAL;
+}
+
+static int mctp_getsockopt(struct socket *sock, int level, int optname,
+                          char __user *optval, int __user *optlen)
+{
+       return -EINVAL;
+}
+
+static const struct proto_ops mctp_dgram_ops = {
+       .family         = PF_MCTP,
+       .release        = mctp_release,
+       .bind           = mctp_bind,
+       .connect        = sock_no_connect,
+       .socketpair     = sock_no_socketpair,
+       .accept         = sock_no_accept,
+       .getname        = sock_no_getname,
+       .poll           = datagram_poll,
+       .ioctl          = sock_no_ioctl,
+       .gettstamp      = sock_gettstamp,
+       .listen         = sock_no_listen,
+       .shutdown       = sock_no_shutdown,
+       .setsockopt     = mctp_setsockopt,
+       .getsockopt     = mctp_getsockopt,
+       .sendmsg        = mctp_sendmsg,
+       .recvmsg        = mctp_recvmsg,
+       .mmap           = sock_no_mmap,
+       .sendpage       = sock_no_sendpage,
+};
+
+static int mctp_sk_init(struct sock *sk)
+{
+       struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+
+       INIT_HLIST_HEAD(&msk->keys);
+       return 0;
+}
+
+static void mctp_sk_close(struct sock *sk, long timeout)
+{
+       sk_common_release(sk);
+}
+
+static int mctp_sk_hash(struct sock *sk)
+{
+       struct net *net = sock_net(sk);
+
+       mutex_lock(&net->mctp.bind_lock);
+       sk_add_node_rcu(sk, &net->mctp.binds);
+       mutex_unlock(&net->mctp.bind_lock);
+
+       return 0;
+}
+
+static void mctp_sk_unhash(struct sock *sk)
+{
+       struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+       struct net *net = sock_net(sk);
+       struct mctp_sk_key *key;
+       struct hlist_node *tmp;
+       unsigned long flags;
+
+       /* remove from any type-based binds */
+       mutex_lock(&net->mctp.bind_lock);
+       sk_del_node_init_rcu(sk);
+       mutex_unlock(&net->mctp.bind_lock);
+
+       /* remove tag allocations */
+       spin_lock_irqsave(&net->mctp.keys_lock, flags);
+       hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
+               hlist_del_rcu(&key->sklist);
+               hlist_del_rcu(&key->hlist);
+
+               spin_lock(&key->reasm_lock);
+               if (key->reasm_head)
+                       kfree_skb(key->reasm_head);
+               key->reasm_head = NULL;
+               key->reasm_dead = true;
+               spin_unlock(&key->reasm_lock);
+
+               kfree_rcu(key, rcu);
+       }
+       spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+       synchronize_rcu();
+}
+
+static struct proto mctp_proto = {
+       .name           = "MCTP",
+       .owner          = THIS_MODULE,
+       .obj_size       = sizeof(struct mctp_sock),
+       .init           = mctp_sk_init,
+       .close          = mctp_sk_close,
+       .hash           = mctp_sk_hash,
+       .unhash         = mctp_sk_unhash,
+};
+
+static int mctp_pf_create(struct net *net, struct socket *sock,
+                         int protocol, int kern)
+{
+       const struct proto_ops *ops;
+       struct proto *proto;
+       struct sock *sk;
+       int rc;
+
+       if (protocol)
+               return -EPROTONOSUPPORT;
+
+       /* only datagram sockets are supported */
+       if (sock->type != SOCK_DGRAM)
+               return -ESOCKTNOSUPPORT;
+
+       proto = &mctp_proto;
+       ops = &mctp_dgram_ops;
+
+       sock->state = SS_UNCONNECTED;
+       sock->ops = ops;
+
+       sk = sk_alloc(net, PF_MCTP, GFP_KERNEL, proto, kern);
+       if (!sk)
+               return -ENOMEM;
+
+       sock_init_data(sock, sk);
+
+       rc = 0;
+       if (sk->sk_prot->init)
+               rc = sk->sk_prot->init(sk);
+
+       if (rc)
+               goto err_sk_put;
+
+       return 0;
+
+err_sk_put:
+       sock_orphan(sk);
+       sock_put(sk);
+       return rc;
+}
+
+static struct net_proto_family mctp_pf = {
+       .family = PF_MCTP,
+       .create = mctp_pf_create,
+       .owner = THIS_MODULE,
+};
+
+static __init int mctp_init(void)
+{
+       int rc;
+
+       /* ensure our uapi tag definitions match the header format */
+       BUILD_BUG_ON(MCTP_TAG_OWNER != MCTP_HDR_FLAG_TO);
+       BUILD_BUG_ON(MCTP_TAG_MASK != MCTP_HDR_TAG_MASK);
+
+       pr_info("mctp: management component transport protocol core\n");
+
+       rc = sock_register(&mctp_pf);
+       if (rc)
+               return rc;
+
+       rc = proto_register(&mctp_proto, 0);
+       if (rc)
+               goto err_unreg_sock;
+
+       rc = mctp_routes_init();
+       if (rc)
+               goto err_unreg_proto;
+
+       rc = mctp_neigh_init();
+       if (rc)
+               goto err_unreg_proto;
+
+       mctp_device_init();
+
+       return 0;
+
+err_unreg_proto:
+       proto_unregister(&mctp_proto);
+err_unreg_sock:
+       sock_unregister(PF_MCTP);
+
+       return rc;
+}
+
+static __exit void mctp_exit(void)
+{
+       mctp_device_exit();
+       mctp_neigh_exit();
+       mctp_routes_exit();
+       proto_unregister(&mctp_proto);
+       sock_unregister(PF_MCTP);
+}
+
+module_init(mctp_init);
+module_exit(mctp_exit);
+
+MODULE_DESCRIPTION("MCTP core");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>");
+
+MODULE_ALIAS_NETPROTO(PF_MCTP);
diff --git a/net/mctp/device.c b/net/mctp/device.c
new file mode 100644 (file)
index 0000000..b9f38e7
--- /dev/null
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - device implementation.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/if_link.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
+
+#include <net/addrconf.h>
+#include <net/netlink.h>
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/sock.h>
+
+struct mctp_dump_cb {
+       int h;
+       int idx;
+       size_t a_idx;
+};
+
+/* unlocked: caller must hold rcu_read_lock */
+struct mctp_dev *__mctp_dev_get(const struct net_device *dev)
+{
+       return rcu_dereference(dev->mctp_ptr);
+}
+
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev)
+{
+       return rtnl_dereference(dev->mctp_ptr);
+}
+
+static void mctp_dev_destroy(struct mctp_dev *mdev)
+{
+       struct net_device *dev = mdev->dev;
+
+       dev_put(dev);
+       kfree_rcu(mdev, rcu);
+}
+
+static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb,
+                             struct mctp_dev *mdev, mctp_eid_t eid)
+{
+       struct ifaddrmsg *hdr;
+       struct nlmsghdr *nlh;
+
+       nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+                       RTM_NEWADDR, sizeof(*hdr), NLM_F_MULTI);
+       if (!nlh)
+               return -EMSGSIZE;
+
+       hdr = nlmsg_data(nlh);
+       hdr->ifa_family = AF_MCTP;
+       hdr->ifa_prefixlen = 0;
+       hdr->ifa_flags = 0;
+       hdr->ifa_scope = 0;
+       hdr->ifa_index = mdev->dev->ifindex;
+
+       if (nla_put_u8(skb, IFA_LOCAL, eid))
+               goto cancel;
+
+       if (nla_put_u8(skb, IFA_ADDRESS, eid))
+               goto cancel;
+
+       nlmsg_end(skb, nlh);
+
+       return 0;
+
+cancel:
+       nlmsg_cancel(skb, nlh);
+       return -EMSGSIZE;
+}
+
+static int mctp_dump_dev_addrinfo(struct mctp_dev *mdev, struct sk_buff *skb,
+                                 struct netlink_callback *cb)
+{
+       struct mctp_dump_cb *mcb = (void *)cb->ctx;
+       int rc = 0;
+
+       for (; mcb->a_idx < mdev->num_addrs; mcb->a_idx++) {
+               rc = mctp_fill_addrinfo(skb, cb, mdev, mdev->addrs[mcb->a_idx]);
+               if (rc < 0)
+                       break;
+       }
+
+       return rc;
+}
+
+static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct mctp_dump_cb *mcb = (void *)cb->ctx;
+       struct net *net = sock_net(skb->sk);
+       struct hlist_head *head;
+       struct net_device *dev;
+       struct ifaddrmsg *hdr;
+       struct mctp_dev *mdev;
+       int ifindex;
+       int idx, rc;
+
+       hdr = nlmsg_data(cb->nlh);
+       // filter by ifindex if requested
+       ifindex = hdr->ifa_index;
+
+       rcu_read_lock();
+       for (; mcb->h < NETDEV_HASHENTRIES; mcb->h++, mcb->idx = 0) {
+               idx = 0;
+               head = &net->dev_index_head[mcb->h];
+               hlist_for_each_entry_rcu(dev, head, index_hlist) {
+                       if (idx >= mcb->idx &&
+                           (ifindex == 0 || ifindex == dev->ifindex)) {
+                               mdev = __mctp_dev_get(dev);
+                               if (mdev) {
+                                       rc = mctp_dump_dev_addrinfo(mdev,
+                                                                   skb, cb);
+                                       // Error indicates full buffer, this
+                                       // callback will get retried.
+                                       if (rc < 0)
+                                               goto out;
+                               }
+                       }
+                       idx++;
+                       // reset for next iteration
+                       mcb->a_idx = 0;
+               }
+       }
+out:
+       rcu_read_unlock();
+       mcb->idx = idx;
+
+       return skb->len;
+}
+
+static const struct nla_policy ifa_mctp_policy[IFA_MAX + 1] = {
+       [IFA_ADDRESS]           = { .type = NLA_U8 },
+       [IFA_LOCAL]             = { .type = NLA_U8 },
+};
+
+static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+                           struct netlink_ext_ack *extack)
+{
+       struct net *net = sock_net(skb->sk);
+       struct nlattr *tb[IFA_MAX + 1];
+       struct net_device *dev;
+       struct mctp_addr *addr;
+       struct mctp_dev *mdev;
+       struct ifaddrmsg *ifm;
+       unsigned long flags;
+       u8 *tmp_addrs;
+       int rc;
+
+       rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy,
+                        extack);
+       if (rc < 0)
+               return rc;
+
+       ifm = nlmsg_data(nlh);
+
+       if (tb[IFA_LOCAL])
+               addr = nla_data(tb[IFA_LOCAL]);
+       else if (tb[IFA_ADDRESS])
+               addr = nla_data(tb[IFA_ADDRESS]);
+       else
+               return -EINVAL;
+
+       /* find device */
+       dev = __dev_get_by_index(net, ifm->ifa_index);
+       if (!dev)
+               return -ENODEV;
+
+       mdev = mctp_dev_get_rtnl(dev);
+       if (!mdev)
+               return -ENODEV;
+
+       if (!mctp_address_ok(addr->s_addr))
+               return -EINVAL;
+
+       /* Prevent duplicates. Under RTNL so don't need to lock for reading */
+       if (memchr(mdev->addrs, addr->s_addr, mdev->num_addrs))
+               return -EEXIST;
+
+       tmp_addrs = kmalloc(mdev->num_addrs + 1, GFP_KERNEL);
+       if (!tmp_addrs)
+               return -ENOMEM;
+       memcpy(tmp_addrs, mdev->addrs, mdev->num_addrs);
+       tmp_addrs[mdev->num_addrs] = addr->s_addr;
+
+       /* Lock to write */
+       spin_lock_irqsave(&mdev->addrs_lock, flags);
+       mdev->num_addrs++;
+       swap(mdev->addrs, tmp_addrs);
+       spin_unlock_irqrestore(&mdev->addrs_lock, flags);
+
+       kfree(tmp_addrs);
+
+       mctp_route_add_local(mdev, addr->s_addr);
+
+       return 0;
+}
+
+static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
+                           struct netlink_ext_ack *extack)
+{
+       struct net *net = sock_net(skb->sk);
+       struct nlattr *tb[IFA_MAX + 1];
+       struct net_device *dev;
+       struct mctp_addr *addr;
+       struct mctp_dev *mdev;
+       struct ifaddrmsg *ifm;
+       unsigned long flags;
+       u8 *pos;
+       int rc;
+
+       rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy,
+                        extack);
+       if (rc < 0)
+               return rc;
+
+       ifm = nlmsg_data(nlh);
+
+       if (tb[IFA_LOCAL])
+               addr = nla_data(tb[IFA_LOCAL]);
+       else if (tb[IFA_ADDRESS])
+               addr = nla_data(tb[IFA_ADDRESS]);
+       else
+               return -EINVAL;
+
+       /* find device */
+       dev = __dev_get_by_index(net, ifm->ifa_index);
+       if (!dev)
+               return -ENODEV;
+
+       mdev = mctp_dev_get_rtnl(dev);
+       if (!mdev)
+               return -ENODEV;
+
+       pos = memchr(mdev->addrs, addr->s_addr, mdev->num_addrs);
+       if (!pos)
+               return -ENOENT;
+
+       rc = mctp_route_remove_local(mdev, addr->s_addr);
+       // we can ignore -ENOENT in the case a route was already removed
+       if (rc < 0 && rc != -ENOENT)
+               return rc;
+
+       spin_lock_irqsave(&mdev->addrs_lock, flags);
+       memmove(pos, pos + 1, mdev->num_addrs - 1 - (pos - mdev->addrs));
+       mdev->num_addrs--;
+       spin_unlock_irqrestore(&mdev->addrs_lock, flags);
+
+       return 0;
+}
+
+static struct mctp_dev *mctp_add_dev(struct net_device *dev)
+{
+       struct mctp_dev *mdev;
+
+       ASSERT_RTNL();
+
+       mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
+       if (!mdev)
+               return ERR_PTR(-ENOMEM);
+
+       spin_lock_init(&mdev->addrs_lock);
+
+       mdev->net = mctp_default_net(dev_net(dev));
+
+       /* associate to net_device */
+       rcu_assign_pointer(dev->mctp_ptr, mdev);
+       dev_hold(dev);
+       mdev->dev = dev;
+
+       return mdev;
+}
+
+static int mctp_fill_link_af(struct sk_buff *skb,
+                            const struct net_device *dev, u32 ext_filter_mask)
+{
+       struct mctp_dev *mdev;
+
+       mdev = mctp_dev_get_rtnl(dev);
+       if (!mdev)
+               return -ENODATA;
+       if (nla_put_u32(skb, IFLA_MCTP_NET, mdev->net))
+               return -EMSGSIZE;
+       return 0;
+}
+
+static size_t mctp_get_link_af_size(const struct net_device *dev,
+                                   u32 ext_filter_mask)
+{
+       struct mctp_dev *mdev;
+       unsigned int ret;
+
+       /* caller holds RCU */
+       mdev = __mctp_dev_get(dev);
+       if (!mdev)
+               return 0;
+       ret = nla_total_size(4); /* IFLA_MCTP_NET */
+       return ret;
+}
+
+static const struct nla_policy ifla_af_mctp_policy[IFLA_MCTP_MAX + 1] = {
+       [IFLA_MCTP_NET]         = { .type = NLA_U32 },
+};
+
+static int mctp_set_link_af(struct net_device *dev, const struct nlattr *attr,
+                           struct netlink_ext_ack *extack)
+{
+       struct nlattr *tb[IFLA_MCTP_MAX + 1];
+       struct mctp_dev *mdev;
+       int rc;
+
+       rc = nla_parse_nested(tb, IFLA_MCTP_MAX, attr, ifla_af_mctp_policy,
+                             NULL);
+       if (rc)
+               return rc;
+
+       mdev = mctp_dev_get_rtnl(dev);
+       if (!mdev)
+               return 0;
+
+       if (tb[IFLA_MCTP_NET])
+               WRITE_ONCE(mdev->net, nla_get_u32(tb[IFLA_MCTP_NET]));
+
+       return 0;
+}
+
+static void mctp_unregister(struct net_device *dev)
+{
+       struct mctp_dev *mdev;
+
+       mdev = mctp_dev_get_rtnl(dev);
+
+       if (!mdev)
+               return;
+
+       RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL);
+
+       mctp_route_remove_dev(mdev);
+       mctp_neigh_remove_dev(mdev);
+       kfree(mdev->addrs);
+
+       mctp_dev_destroy(mdev);
+}
+
+static int mctp_register(struct net_device *dev)
+{
+       struct mctp_dev *mdev;
+
+       /* Already registered? */
+       if (rtnl_dereference(dev->mctp_ptr))
+               return 0;
+
+       /* only register specific types; MCTP-specific and loopback for now */
+       if (dev->type != ARPHRD_MCTP && dev->type != ARPHRD_LOOPBACK)
+               return 0;
+
+       mdev = mctp_add_dev(dev);
+       if (IS_ERR(mdev))
+               return PTR_ERR(mdev);
+
+       return 0;
+}
+
+static int mctp_dev_notify(struct notifier_block *this, unsigned long event,
+                          void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       int rc;
+
+       switch (event) {
+       case NETDEV_REGISTER:
+               rc = mctp_register(dev);
+               if (rc)
+                       return notifier_from_errno(rc);
+               break;
+       case NETDEV_UNREGISTER:
+               mctp_unregister(dev);
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct rtnl_af_ops mctp_af_ops = {
+       .family = AF_MCTP,
+       .fill_link_af = mctp_fill_link_af,
+       .get_link_af_size = mctp_get_link_af_size,
+       .set_link_af = mctp_set_link_af,
+};
+
+static struct notifier_block mctp_dev_nb = {
+       .notifier_call = mctp_dev_notify,
+       .priority = ADDRCONF_NOTIFY_PRIORITY,
+};
+
+void __init mctp_device_init(void)
+{
+       register_netdevice_notifier(&mctp_dev_nb);
+
+       rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETADDR,
+                            NULL, mctp_dump_addrinfo, 0);
+       rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWADDR,
+                            mctp_rtm_newaddr, NULL, 0);
+       rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELADDR,
+                            mctp_rtm_deladdr, NULL, 0);
+       rtnl_af_register(&mctp_af_ops);
+}
+
+void __exit mctp_device_exit(void)
+{
+       rtnl_af_unregister(&mctp_af_ops);
+       rtnl_unregister(PF_MCTP, RTM_DELADDR);
+       rtnl_unregister(PF_MCTP, RTM_NEWADDR);
+       rtnl_unregister(PF_MCTP, RTM_GETADDR);
+
+       unregister_netdevice_notifier(&mctp_dev_nb);
+}
diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c
new file mode 100644 (file)
index 0000000..90ed2f0
--- /dev/null
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - routing
+ * implementation.
+ *
+ * This is currently based on a simple routing table, with no dst cache. The
+ * number of routes should stay fairly small, so the lookup cost is small.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/idr.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+static int mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid,
+                         enum mctp_neigh_source source,
+                         size_t lladdr_len, const void *lladdr)
+{
+       struct net *net = dev_net(mdev->dev);
+       struct mctp_neigh *neigh;
+       int rc;
+
+       mutex_lock(&net->mctp.neigh_lock);
+       if (mctp_neigh_lookup(mdev, eid, NULL) == 0) {
+               rc = -EEXIST;
+               goto out;
+       }
+
+       if (lladdr_len > sizeof(neigh->ha)) {
+               rc = -EINVAL;
+               goto out;
+       }
+
+       neigh = kzalloc(sizeof(*neigh), GFP_KERNEL);
+       if (!neigh) {
+               rc = -ENOMEM;
+               goto out;
+       }
+       INIT_LIST_HEAD(&neigh->list);
+       neigh->dev = mdev;
+       dev_hold(neigh->dev->dev);
+       neigh->eid = eid;
+       neigh->source = source;
+       memcpy(neigh->ha, lladdr, lladdr_len);
+
+       list_add_rcu(&neigh->list, &net->mctp.neighbours);
+       rc = 0;
+out:
+       mutex_unlock(&net->mctp.neigh_lock);
+       return rc;
+}
+
+static void __mctp_neigh_free(struct rcu_head *rcu)
+{
+       struct mctp_neigh *neigh = container_of(rcu, struct mctp_neigh, rcu);
+
+       dev_put(neigh->dev->dev);
+       kfree(neigh);
+}
+
+/* Removes all neighbour entries referring to a device */
+void mctp_neigh_remove_dev(struct mctp_dev *mdev)
+{
+       struct net *net = dev_net(mdev->dev);
+       struct mctp_neigh *neigh, *tmp;
+
+       mutex_lock(&net->mctp.neigh_lock);
+       list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) {
+               if (neigh->dev == mdev) {
+                       list_del_rcu(&neigh->list);
+                       /* TODO: immediate RTM_DELNEIGH */
+                       call_rcu(&neigh->rcu, __mctp_neigh_free);
+               }
+       }
+
+       mutex_unlock(&net->mctp.neigh_lock);
+}
+
+// TODO: add a "source" flag so netlink can only delete static neighbours?
+static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid)
+{
+       struct net *net = dev_net(mdev->dev);
+       struct mctp_neigh *neigh, *tmp;
+       bool dropped = false;
+
+       mutex_lock(&net->mctp.neigh_lock);
+       list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) {
+               if (neigh->dev == mdev && neigh->eid == eid) {
+                       list_del_rcu(&neigh->list);
+                       /* TODO: immediate RTM_DELNEIGH */
+                       call_rcu(&neigh->rcu, __mctp_neigh_free);
+                       dropped = true;
+               }
+       }
+
+       mutex_unlock(&net->mctp.neigh_lock);
+       return dropped ? 0 : -ENOENT;
+}
+
+static const struct nla_policy nd_mctp_policy[NDA_MAX + 1] = {
+       [NDA_DST]               = { .type = NLA_U8 },
+       [NDA_LLADDR]            = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+};
+
+static int mctp_rtm_newneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
+                            struct netlink_ext_ack *extack)
+{
+       struct net *net = sock_net(skb->sk);
+       struct net_device *dev;
+       struct mctp_dev *mdev;
+       struct ndmsg *ndm;
+       struct nlattr *tb[NDA_MAX + 1];
+       int rc;
+       mctp_eid_t eid;
+       void *lladdr;
+       int lladdr_len;
+
+       rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy,
+                        extack);
+       if (rc < 0) {
+               NL_SET_ERR_MSG(extack, "lladdr too large?");
+               return rc;
+       }
+
+       if (!tb[NDA_DST]) {
+               NL_SET_ERR_MSG(extack, "Neighbour EID must be specified");
+               return -EINVAL;
+       }
+
+       if (!tb[NDA_LLADDR]) {
+               NL_SET_ERR_MSG(extack, "Neighbour lladdr must be specified");
+               return -EINVAL;
+       }
+
+       eid = nla_get_u8(tb[NDA_DST]);
+       if (!mctp_address_ok(eid)) {
+               NL_SET_ERR_MSG(extack, "Invalid neighbour EID");
+               return -EINVAL;
+       }
+
+       lladdr = nla_data(tb[NDA_LLADDR]);
+       lladdr_len = nla_len(tb[NDA_LLADDR]);
+
+       ndm = nlmsg_data(nlh);
+
+       dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+       if (!dev)
+               return -ENODEV;
+
+       mdev = mctp_dev_get_rtnl(dev);
+       if (!mdev)
+               return -ENODEV;
+
+       if (lladdr_len != dev->addr_len) {
+               NL_SET_ERR_MSG(extack, "Wrong lladdr length");
+               return -EINVAL;
+       }
+
+       return mctp_neigh_add(mdev, eid, MCTP_NEIGH_STATIC,
+                       lladdr_len, lladdr);
+}
+
+static int mctp_rtm_delneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
+                            struct netlink_ext_ack *extack)
+{
+       struct net *net = sock_net(skb->sk);
+       struct nlattr *tb[NDA_MAX + 1];
+       struct net_device *dev;
+       struct mctp_dev *mdev;
+       struct ndmsg *ndm;
+       int rc;
+       mctp_eid_t eid;
+
+       rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy,
+                        extack);
+       if (rc < 0) {
+               NL_SET_ERR_MSG(extack, "incorrect format");
+               return rc;
+       }
+
+       if (!tb[NDA_DST]) {
+               NL_SET_ERR_MSG(extack, "Neighbour EID must be specified");
+               return -EINVAL;
+       }
+       eid = nla_get_u8(tb[NDA_DST]);
+
+       ndm = nlmsg_data(nlh);
+       dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+       if (!dev)
+               return -ENODEV;
+
+       mdev = mctp_dev_get_rtnl(dev);
+       if (!mdev)
+               return -ENODEV;
+
+       return mctp_neigh_remove(mdev, eid);
+}
+
+static int mctp_fill_neigh(struct sk_buff *skb, u32 portid, u32 seq, int event,
+                          unsigned int flags, struct mctp_neigh *neigh)
+{
+       struct net_device *dev = neigh->dev->dev;
+       struct nlmsghdr *nlh;
+       struct ndmsg *hdr;
+
+       nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
+       if (!nlh)
+               return -EMSGSIZE;
+
+       hdr = nlmsg_data(nlh);
+       hdr->ndm_family = AF_MCTP;
+       hdr->ndm_ifindex = dev->ifindex;
+       hdr->ndm_state = 0; // TODO other state bits?
+       if (neigh->source == MCTP_NEIGH_STATIC)
+               hdr->ndm_state |= NUD_PERMANENT;
+       hdr->ndm_flags = 0;
+       hdr->ndm_type = RTN_UNICAST; // TODO: is loopback RTN_LOCAL?
+
+       if (nla_put_u8(skb, NDA_DST, neigh->eid))
+               goto cancel;
+
+       if (nla_put(skb, NDA_LLADDR, dev->addr_len, neigh->ha))
+               goto cancel;
+
+       nlmsg_end(skb, nlh);
+
+       return 0;
+cancel:
+       nlmsg_cancel(skb, nlh);
+       return -EMSGSIZE;
+}
+
+static int mctp_rtm_getneigh(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct net *net = sock_net(skb->sk);
+       int rc, idx, req_ifindex;
+       struct mctp_neigh *neigh;
+       struct ndmsg *ndmsg;
+       struct {
+               int idx;
+       } *cbctx = (void *)cb->ctx;
+
+       ndmsg = nlmsg_data(cb->nlh);
+       req_ifindex = ndmsg->ndm_ifindex;
+
+       idx = 0;
+       rcu_read_lock();
+       list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) {
+               if (idx < cbctx->idx)
+                       goto cont;
+
+               rc = 0;
+               if (req_ifindex == 0 || req_ifindex == neigh->dev->dev->ifindex)
+                       rc = mctp_fill_neigh(skb, NETLINK_CB(cb->skb).portid,
+                                            cb->nlh->nlmsg_seq,
+                                            RTM_NEWNEIGH, NLM_F_MULTI, neigh);
+
+               if (rc)
+                       break;
+cont:
+               idx++;
+       }
+       rcu_read_unlock();
+
+       cbctx->idx = idx;
+       return skb->len;
+}
+
+int mctp_neigh_lookup(struct mctp_dev *mdev, mctp_eid_t eid, void *ret_hwaddr)
+{
+       struct net *net = dev_net(mdev->dev);
+       struct mctp_neigh *neigh;
+       int rc = -EHOSTUNREACH; // TODO: or ENOENT?
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) {
+               if (mdev == neigh->dev && eid == neigh->eid) {
+                       if (ret_hwaddr)
+                               memcpy(ret_hwaddr, neigh->ha,
+                                      sizeof(neigh->ha));
+                       rc = 0;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return rc;
+}
+
+/* namespace registration */
+static int __net_init mctp_neigh_net_init(struct net *net)
+{
+       struct netns_mctp *ns = &net->mctp;
+
+       INIT_LIST_HEAD(&ns->neighbours);
+       mutex_init(&ns->neigh_lock);
+       return 0;
+}
+
+static void __net_exit mctp_neigh_net_exit(struct net *net)
+{
+       struct netns_mctp *ns = &net->mctp;
+       struct mctp_neigh *neigh;
+
+       list_for_each_entry(neigh, &ns->neighbours, list)
+               call_rcu(&neigh->rcu, __mctp_neigh_free);
+}
+
+/* net namespace implementation */
+
+static struct pernet_operations mctp_net_ops = {
+       .init = mctp_neigh_net_init,
+       .exit = mctp_neigh_net_exit,
+};
+
+int __init mctp_neigh_init(void)
+{
+       rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWNEIGH,
+                            mctp_rtm_newneigh, NULL, 0);
+       rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELNEIGH,
+                            mctp_rtm_delneigh, NULL, 0);
+       rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETNEIGH,
+                            NULL, mctp_rtm_getneigh, 0);
+
+       return register_pernet_subsys(&mctp_net_ops);
+}
+
+void __exit mctp_neigh_exit(void)
+{
+       unregister_pernet_subsys(&mctp_net_ops);
+       rtnl_unregister(PF_MCTP, RTM_GETNEIGH);
+       rtnl_unregister(PF_MCTP, RTM_DELNEIGH);
+       rtnl_unregister(PF_MCTP, RTM_NEWNEIGH);
+}
diff --git a/net/mctp/route.c b/net/mctp/route.c
new file mode 100644 (file)
index 0000000..5265525
--- /dev/null
@@ -0,0 +1,1116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - routing
+ * implementation.
+ *
+ * This is currently based on a simple routing table, with no dst cache. The
+ * number of routes should stay fairly small, so the lookup cost is small.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/idr.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <uapi/linux/if_arp.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+static const unsigned int mctp_message_maxlen = 64 * 1024;
+
+/* route output callbacks */
+static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
+{
+       kfree_skb(skb);
+       return 0;
+}
+
+static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
+{
+       struct mctp_skb_cb *cb = mctp_cb(skb);
+       struct mctp_hdr *mh;
+       struct sock *sk;
+       u8 type;
+
+       WARN_ON(!rcu_read_lock_held());
+
+       /* TODO: look up in skb->cb? */
+       mh = mctp_hdr(skb);
+
+       if (!skb_headlen(skb))
+               return NULL;
+
+       type = (*(u8 *)skb->data) & 0x7f;
+
+       sk_for_each_rcu(sk, &net->mctp.binds) {
+               struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+
+               if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
+                       continue;
+
+               if (msk->bind_type != type)
+                       continue;
+
+               if (msk->bind_addr != MCTP_ADDR_ANY &&
+                   msk->bind_addr != mh->dest)
+                       continue;
+
+               return msk;
+       }
+
+       return NULL;
+}
+
+static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
+                          mctp_eid_t peer, u8 tag)
+{
+       if (key->local_addr != local)
+               return false;
+
+       if (key->peer_addr != peer)
+               return false;
+
+       if (key->tag != tag)
+               return false;
+
+       return true;
+}
+
+static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
+                                          mctp_eid_t peer)
+{
+       struct mctp_sk_key *key, *ret;
+       struct mctp_hdr *mh;
+       u8 tag;
+
+       WARN_ON(!rcu_read_lock_held());
+
+       mh = mctp_hdr(skb);
+       tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+       ret = NULL;
+
+       hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
+               if (mctp_key_match(key, mh->dest, peer, tag)) {
+                       ret = key;
+                       break;
+               }
+       }
+
+       return ret;
+}
+
+static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
+                                         mctp_eid_t local, mctp_eid_t peer,
+                                         u8 tag, gfp_t gfp)
+{
+       struct mctp_sk_key *key;
+
+       key = kzalloc(sizeof(*key), gfp);
+       if (!key)
+               return NULL;
+
+       key->peer_addr = peer;
+       key->local_addr = local;
+       key->tag = tag;
+       key->sk = &msk->sk;
+       spin_lock_init(&key->reasm_lock);
+
+       return key;
+}
+
+static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
+{
+       struct net *net = sock_net(&msk->sk);
+       struct mctp_sk_key *tmp;
+       unsigned long flags;
+       int rc = 0;
+
+       spin_lock_irqsave(&net->mctp.keys_lock, flags);
+
+       hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
+               if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
+                                  key->tag)) {
+                       rc = -EEXIST;
+                       break;
+               }
+       }
+
+       if (!rc) {
+               hlist_add_head(&key->hlist, &net->mctp.keys);
+               hlist_add_head(&key->sklist, &msk->keys);
+       }
+
+       spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+       return rc;
+}
+
+/* Must be called with key->reasm_lock, which it will release. Will schedule
+ * the key for an RCU free.
+ */
+static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
+                                  unsigned long flags)
+       __releases(&key->reasm_lock)
+{
+       struct sk_buff *skb;
+
+       skb = key->reasm_head;
+       key->reasm_head = NULL;
+       key->reasm_dead = true;
+       spin_unlock_irqrestore(&key->reasm_lock, flags);
+
+       spin_lock_irqsave(&net->mctp.keys_lock, flags);
+       hlist_del_rcu(&key->hlist);
+       hlist_del_rcu(&key->sklist);
+       spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+       kfree_rcu(key, rcu);
+
+       if (skb)
+               kfree_skb(skb);
+}
+
+static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
+{
+       struct mctp_hdr *hdr = mctp_hdr(skb);
+       u8 exp_seq, this_seq;
+
+       this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
+               & MCTP_HDR_SEQ_MASK;
+
+       if (!key->reasm_head) {
+               key->reasm_head = skb;
+               key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
+               key->last_seq = this_seq;
+               return 0;
+       }
+
+       exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
+
+       if (this_seq != exp_seq)
+               return -EINVAL;
+
+       if (key->reasm_head->len + skb->len > mctp_message_maxlen)
+               return -EINVAL;
+
+       skb->next = NULL;
+       skb->sk = NULL;
+       *key->reasm_tailp = skb;
+       key->reasm_tailp = &skb->next;
+
+       key->last_seq = this_seq;
+
+       key->reasm_head->data_len += skb->len;
+       key->reasm_head->len += skb->len;
+       key->reasm_head->truesize += skb->truesize;
+
+       return 0;
+}
+
+static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+{
+       struct net *net = dev_net(skb->dev);
+       struct mctp_sk_key *key;
+       struct mctp_sock *msk;
+       struct mctp_hdr *mh;
+       unsigned long f;
+       u8 tag, flags;
+       int rc;
+
+       msk = NULL;
+       rc = -EINVAL;
+
+       /* we may be receiving a locally-routed packet; drop source sk
+        * accounting
+        */
+       skb_orphan(skb);
+
+       /* ensure we have enough data for a header and a type */
+       if (skb->len < sizeof(struct mctp_hdr) + 1)
+               goto out;
+
+       /* grab header, advance data ptr */
+       mh = mctp_hdr(skb);
+       skb_pull(skb, sizeof(struct mctp_hdr));
+
+       if (mh->ver != 1)
+               goto out;
+
+       flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
+       tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+       rcu_read_lock();
+
+       /* lookup socket / reasm context, exactly matching (src,dest,tag) */
+       key = mctp_lookup_key(net, skb, mh->src);
+
+       if (flags & MCTP_HDR_FLAG_SOM) {
+               if (key) {
+                       msk = container_of(key->sk, struct mctp_sock, sk);
+               } else {
+                       /* first response to a broadcast? do a more general
+                        * key lookup to find the socket, but don't use this
+                        * key for reassembly - we'll create a more specific
+                        * one for future packets if required (ie, !EOM).
+                        */
+                       key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
+                       if (key) {
+                               msk = container_of(key->sk,
+                                                  struct mctp_sock, sk);
+                               key = NULL;
+                       }
+               }
+
+               if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
+                       msk = mctp_lookup_bind(net, skb);
+
+               if (!msk) {
+                       rc = -ENOENT;
+                       goto out_unlock;
+               }
+
+               /* single-packet message? deliver to socket, clean up any
+                * pending key.
+                */
+               if (flags & MCTP_HDR_FLAG_EOM) {
+                       sock_queue_rcv_skb(&msk->sk, skb);
+                       if (key) {
+                               spin_lock_irqsave(&key->reasm_lock, f);
+                               /* we've hit a pending reassembly; not much we
+                                * can do but drop it
+                                */
+                               __mctp_key_unlock_drop(key, net, f);
+                       }
+                       rc = 0;
+                       goto out_unlock;
+               }
+
+               /* broadcast response or a bind() - create a key for further
+                * packets for this message
+                */
+               if (!key) {
+                       key = mctp_key_alloc(msk, mh->dest, mh->src,
+                                            tag, GFP_ATOMIC);
+                       if (!key) {
+                               rc = -ENOMEM;
+                               goto out_unlock;
+                       }
+
+                       /* we can queue without the reasm lock here, as the
+                        * key isn't observable yet
+                        */
+                       mctp_frag_queue(key, skb);
+
+                       /* if the key_add fails, we've raced with another
+                        * SOM packet with the same src, dest and tag. There's
+                        * no way to distinguish future packets, so all we
+                        * can do is drop; we'll free the skb on exit from
+                        * this function.
+                        */
+                       rc = mctp_key_add(key, msk);
+                       if (rc)
+                               kfree(key);
+
+               } else {
+                       /* existing key: start reassembly */
+                       spin_lock_irqsave(&key->reasm_lock, f);
+
+                       if (key->reasm_head || key->reasm_dead) {
+                               /* duplicate start? drop everything */
+                               __mctp_key_unlock_drop(key, net, f);
+                               rc = -EEXIST;
+                       } else {
+                               rc = mctp_frag_queue(key, skb);
+                               spin_unlock_irqrestore(&key->reasm_lock, f);
+                       }
+               }
+
+       } else if (key) {
+               /* this packet continues a previous message; reassemble
+                * using the message-specific key
+                */
+
+               spin_lock_irqsave(&key->reasm_lock, f);
+
+               /* we need to be continuing an existing reassembly... */
+               if (!key->reasm_head)
+                       rc = -EINVAL;
+               else
+                       rc = mctp_frag_queue(key, skb);
+
+               /* end of message? deliver to socket, and we're done with
+                * the reassembly/response key
+                */
+               if (!rc && flags & MCTP_HDR_FLAG_EOM) {
+                       sock_queue_rcv_skb(key->sk, key->reasm_head);
+                       key->reasm_head = NULL;
+                       __mctp_key_unlock_drop(key, net, f);
+               } else {
+                       spin_unlock_irqrestore(&key->reasm_lock, f);
+               }
+
+       } else {
+               /* not a start, no matching key */
+               rc = -ENOENT;
+       }
+
+out_unlock:
+       rcu_read_unlock();
+out:
+       if (rc)
+               kfree_skb(skb);
+       return rc;
+}
+
+static unsigned int mctp_route_mtu(struct mctp_route *rt)
+{
+       return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
+}
+
+static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
+{
+       struct mctp_hdr *hdr = mctp_hdr(skb);
+       char daddr_buf[MAX_ADDR_LEN];
+       char *daddr = NULL;
+       unsigned int mtu;
+       int rc;
+
+       skb->protocol = htons(ETH_P_MCTP);
+
+       mtu = READ_ONCE(skb->dev->mtu);
+       if (skb->len > mtu) {
+               kfree_skb(skb);
+               return -EMSGSIZE;
+       }
+
+       /* If lookup fails let the device handle daddr==NULL */
+       if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
+               daddr = daddr_buf;
+
+       rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
+                            daddr, skb->dev->dev_addr, skb->len);
+       if (rc) {
+               kfree_skb(skb);
+               return -EHOSTUNREACH;
+       }
+
+       rc = dev_queue_xmit(skb);
+       if (rc)
+               rc = net_xmit_errno(rc);
+
+       return rc;
+}
+
+/* route alloc/release */
+static void mctp_route_release(struct mctp_route *rt)
+{
+       if (refcount_dec_and_test(&rt->refs)) {
+               dev_put(rt->dev->dev);
+               kfree_rcu(rt, rcu);
+       }
+}
+
+/* returns a route with the refcount at 1 */
+static struct mctp_route *mctp_route_alloc(void)
+{
+       struct mctp_route *rt;
+
+       rt = kzalloc(sizeof(*rt), GFP_KERNEL);
+       if (!rt)
+               return NULL;
+
+       INIT_LIST_HEAD(&rt->list);
+       refcount_set(&rt->refs, 1);
+       rt->output = mctp_route_discard;
+
+       return rt;
+}
+
+unsigned int mctp_default_net(struct net *net)
+{
+       return READ_ONCE(net->mctp.default_net);
+}
+
+int mctp_default_net_set(struct net *net, unsigned int index)
+{
+       if (index == 0)
+               return -EINVAL;
+       WRITE_ONCE(net->mctp.default_net, index);
+       return 0;
+}
+
+/* tag management */
+static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
+                            struct mctp_sock *msk)
+{
+       struct netns_mctp *mns = &net->mctp;
+
+       lockdep_assert_held(&mns->keys_lock);
+
+       /* we hold the net->key_lock here, allowing updates to both
+        * then net and sk
+        */
+       hlist_add_head_rcu(&key->hlist, &mns->keys);
+       hlist_add_head_rcu(&key->sklist, &msk->keys);
+}
+
+/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
+ * it for the socket msk
+ */
+static int mctp_alloc_local_tag(struct mctp_sock *msk,
+                               mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
+{
+       struct net *net = sock_net(&msk->sk);
+       struct netns_mctp *mns = &net->mctp;
+       struct mctp_sk_key *key, *tmp;
+       unsigned long flags;
+       int rc = -EAGAIN;
+       u8 tagbits;
+
+       /* be optimistic, alloc now */
+       key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
+       if (!key)
+               return -ENOMEM;
+
+       /* 8 possible tag values */
+       tagbits = 0xff;
+
+       spin_lock_irqsave(&mns->keys_lock, flags);
+
+       /* Walk through the existing keys, looking for potential conflicting
+        * tags. If we find a conflict, clear that bit from tagbits
+        */
+       hlist_for_each_entry(tmp, &mns->keys, hlist) {
+               /* if we don't own the tag, it can't conflict */
+               if (tmp->tag & MCTP_HDR_FLAG_TO)
+                       continue;
+
+               if ((tmp->peer_addr == daddr ||
+                    tmp->peer_addr == MCTP_ADDR_ANY) &&
+                   tmp->local_addr == saddr)
+                       tagbits &= ~(1 << tmp->tag);
+
+               if (!tagbits)
+                       break;
+       }
+
+       if (tagbits) {
+               key->tag = __ffs(tagbits);
+               mctp_reserve_tag(net, key, msk);
+               *tagp = key->tag;
+               rc = 0;
+       }
+
+       spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+       if (!tagbits)
+               kfree(key);
+
+       return rc;
+}
+
+/* routing lookups */
+static bool mctp_rt_match_eid(struct mctp_route *rt,
+                             unsigned int net, mctp_eid_t eid)
+{
+       return READ_ONCE(rt->dev->net) == net &&
+               rt->min <= eid && rt->max >= eid;
+}
+
+/* compares match, used for duplicate prevention */
+static bool mctp_rt_compare_exact(struct mctp_route *rt1,
+                                 struct mctp_route *rt2)
+{
+       ASSERT_RTNL();
+       return rt1->dev->net == rt2->dev->net &&
+               rt1->min == rt2->min &&
+               rt1->max == rt2->max;
+}
+
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+                                    mctp_eid_t daddr)
+{
+       struct mctp_route *tmp, *rt = NULL;
+
+       list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
+               /* TODO: add metrics */
+               if (mctp_rt_match_eid(tmp, dnet, daddr)) {
+                       if (refcount_inc_not_zero(&tmp->refs)) {
+                               rt = tmp;
+                               break;
+                       }
+               }
+       }
+
+       return rt;
+}
+
+/* sends a skb to rt and releases the route. */
+int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
+{
+       int rc;
+
+       rc = rt->output(rt, skb);
+       mctp_route_release(rt);
+       return rc;
+}
+
+static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
+                                 unsigned int mtu, u8 tag)
+{
+       const unsigned int hlen = sizeof(struct mctp_hdr);
+       struct mctp_hdr *hdr, *hdr2;
+       unsigned int pos, size;
+       struct sk_buff *skb2;
+       int rc;
+       u8 seq;
+
+       hdr = mctp_hdr(skb);
+       seq = 0;
+       rc = 0;
+
+       if (mtu < hlen + 1) {
+               kfree_skb(skb);
+               return -EMSGSIZE;
+       }
+
+       /* we've got the header */
+       skb_pull(skb, hlen);
+
+       for (pos = 0; pos < skb->len;) {
+               /* size of message payload */
+               size = min(mtu - hlen, skb->len - pos);
+
+               skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
+               if (!skb2) {
+                       rc = -ENOMEM;
+                       break;
+               }
+
+               /* generic skb copy */
+               skb2->protocol = skb->protocol;
+               skb2->priority = skb->priority;
+               skb2->dev = skb->dev;
+               memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
+
+               if (skb->sk)
+                       skb_set_owner_w(skb2, skb->sk);
+
+               /* establish packet */
+               skb_reserve(skb2, MCTP_HEADER_MAXLEN);
+               skb_reset_network_header(skb2);
+               skb_put(skb2, hlen + size);
+               skb2->transport_header = skb2->network_header + hlen;
+
+               /* copy header fields, calculate SOM/EOM flags & seq */
+               hdr2 = mctp_hdr(skb2);
+               hdr2->ver = hdr->ver;
+               hdr2->dest = hdr->dest;
+               hdr2->src = hdr->src;
+               hdr2->flags_seq_tag = tag &
+                       (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+               if (pos == 0)
+                       hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
+
+               if (pos + size == skb->len)
+                       hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
+
+               hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
+
+               /* copy message payload */
+               skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
+
+               /* do route, but don't drop the rt reference */
+               rc = rt->output(rt, skb2);
+               if (rc)
+                       break;
+
+               seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
+               pos += size;
+       }
+
+       mctp_route_release(rt);
+       consume_skb(skb);
+       return rc;
+}
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+                     struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
+{
+       struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+       struct mctp_skb_cb *cb = mctp_cb(skb);
+       struct mctp_hdr *hdr;
+       unsigned long flags;
+       unsigned int mtu;
+       mctp_eid_t saddr;
+       int rc;
+       u8 tag;
+
+       if (WARN_ON(!rt->dev))
+               return -EINVAL;
+
+       spin_lock_irqsave(&rt->dev->addrs_lock, flags);
+       if (rt->dev->num_addrs == 0) {
+               rc = -EHOSTUNREACH;
+       } else {
+               /* use the outbound interface's first address as our source */
+               saddr = rt->dev->addrs[0];
+               rc = 0;
+       }
+       spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
+
+       if (rc)
+               return rc;
+
+       if (req_tag & MCTP_HDR_FLAG_TO) {
+               rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
+               if (rc)
+                       return rc;
+               tag |= MCTP_HDR_FLAG_TO;
+       } else {
+               tag = req_tag;
+       }
+
+
+       skb->protocol = htons(ETH_P_MCTP);
+       skb->priority = 0;
+       skb_reset_transport_header(skb);
+       skb_push(skb, sizeof(struct mctp_hdr));
+       skb_reset_network_header(skb);
+       skb->dev = rt->dev->dev;
+
+       /* cb->net will have been set on initial ingress */
+       cb->src = saddr;
+
+       /* set up common header fields */
+       hdr = mctp_hdr(skb);
+       hdr->ver = 1;
+       hdr->dest = daddr;
+       hdr->src = saddr;
+
+       mtu = mctp_route_mtu(rt);
+
+       if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
+               hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
+                       tag;
+               return mctp_do_route(rt, skb);
+       } else {
+               return mctp_do_fragment_route(rt, skb, mtu, tag);
+       }
+}
+
+/* route management */
+static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+                         unsigned int daddr_extent, unsigned int mtu,
+                         unsigned char type)
+{
+       int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
+       struct net *net = dev_net(mdev->dev);
+       struct mctp_route *rt, *ert;
+
+       if (!mctp_address_ok(daddr_start))
+               return -EINVAL;
+
+       if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
+               return -EINVAL;
+
+       switch (type) {
+       case RTN_LOCAL:
+               rtfn = mctp_route_input;
+               break;
+       case RTN_UNICAST:
+               rtfn = mctp_route_output;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       rt = mctp_route_alloc();
+       if (!rt)
+               return -ENOMEM;
+
+       rt->min = daddr_start;
+       rt->max = daddr_start + daddr_extent;
+       rt->mtu = mtu;
+       rt->dev = mdev;
+       dev_hold(rt->dev->dev);
+       rt->type = type;
+       rt->output = rtfn;
+
+       ASSERT_RTNL();
+       /* Prevent duplicate identical routes. */
+       list_for_each_entry(ert, &net->mctp.routes, list) {
+               if (mctp_rt_compare_exact(rt, ert)) {
+                       mctp_route_release(rt);
+                       return -EEXIST;
+               }
+       }
+
+       list_add_rcu(&rt->list, &net->mctp.routes);
+
+       return 0;
+}
+
+static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+                            unsigned int daddr_extent)
+{
+       struct net *net = dev_net(mdev->dev);
+       struct mctp_route *rt, *tmp;
+       mctp_eid_t daddr_end;
+       bool dropped;
+
+       if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
+               return -EINVAL;
+
+       daddr_end = daddr_start + daddr_extent;
+       dropped = false;
+
+       ASSERT_RTNL();
+
+       list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+               if (rt->dev == mdev &&
+                   rt->min == daddr_start && rt->max == daddr_end) {
+                       list_del_rcu(&rt->list);
+                       /* TODO: immediate RTM_DELROUTE */
+                       mctp_route_release(rt);
+                       dropped = true;
+               }
+       }
+
+       return dropped ? 0 : -ENOENT;
+}
+
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+       return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
+}
+
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+       return mctp_route_remove(mdev, addr, 0);
+}
+
+/* removes all entries for a given device */
+void mctp_route_remove_dev(struct mctp_dev *mdev)
+{
+       struct net *net = dev_net(mdev->dev);
+       struct mctp_route *rt, *tmp;
+
+       ASSERT_RTNL();
+       list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+               if (rt->dev == mdev) {
+                       list_del_rcu(&rt->list);
+                       /* TODO: immediate RTM_DELROUTE */
+                       mctp_route_release(rt);
+               }
+       }
+}
+
+/* Incoming packet-handling */
+
+static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
+                               struct packet_type *pt,
+                               struct net_device *orig_dev)
+{
+       struct net *net = dev_net(dev);
+       struct mctp_skb_cb *cb;
+       struct mctp_route *rt;
+       struct mctp_hdr *mh;
+
+       /* basic non-data sanity checks */
+       if (dev->type != ARPHRD_MCTP)
+               goto err_drop;
+
+       if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
+               goto err_drop;
+
+       skb_reset_transport_header(skb);
+       skb_reset_network_header(skb);
+
+       /* We have enough for a header; decode and route */
+       mh = mctp_hdr(skb);
+       if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
+               goto err_drop;
+
+       cb = __mctp_cb(skb);
+       rcu_read_lock();
+       cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
+       rcu_read_unlock();
+
+       rt = mctp_route_lookup(net, cb->net, mh->dest);
+       if (!rt)
+               goto err_drop;
+
+       mctp_do_route(rt, skb);
+
+       return NET_RX_SUCCESS;
+
+err_drop:
+       kfree_skb(skb);
+       return NET_RX_DROP;
+}
+
+static struct packet_type mctp_packet_type = {
+       .type = cpu_to_be16(ETH_P_MCTP),
+       .func = mctp_pkttype_receive,
+};
+
+/* netlink interface */
+
+static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
+       [RTA_DST]               = { .type = NLA_U8 },
+       [RTA_METRICS]           = { .type = NLA_NESTED },
+       [RTA_OIF]               = { .type = NLA_U32 },
+};
+
+/* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
+ * tb must hold RTA_MAX+1 elements.
+ */
+static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
+                             struct netlink_ext_ack *extack,
+                             struct nlattr **tb, struct rtmsg **rtm,
+                             struct mctp_dev **mdev, mctp_eid_t *daddr_start)
+{
+       struct net *net = sock_net(skb->sk);
+       struct net_device *dev;
+       unsigned int ifindex;
+       int rc;
+
+       rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
+                        rta_mctp_policy, extack);
+       if (rc < 0) {
+               NL_SET_ERR_MSG(extack, "incorrect format");
+               return rc;
+       }
+
+       if (!tb[RTA_DST]) {
+               NL_SET_ERR_MSG(extack, "dst EID missing");
+               return -EINVAL;
+       }
+       *daddr_start = nla_get_u8(tb[RTA_DST]);
+
+       if (!tb[RTA_OIF]) {
+               NL_SET_ERR_MSG(extack, "ifindex missing");
+               return -EINVAL;
+       }
+       ifindex = nla_get_u32(tb[RTA_OIF]);
+
+       *rtm = nlmsg_data(nlh);
+       if ((*rtm)->rtm_family != AF_MCTP) {
+               NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
+               return -EINVAL;
+       }
+
+       dev = __dev_get_by_index(net, ifindex);
+       if (!dev) {
+               NL_SET_ERR_MSG(extack, "bad ifindex");
+               return -ENODEV;
+       }
+       *mdev = mctp_dev_get_rtnl(dev);
+       if (!*mdev)
+               return -ENODEV;
+
+       if (dev->flags & IFF_LOOPBACK) {
+               NL_SET_ERR_MSG(extack, "no routes to loopback");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+                        struct netlink_ext_ack *extack)
+{
+       struct nlattr *tb[RTA_MAX + 1];
+       mctp_eid_t daddr_start;
+       struct mctp_dev *mdev;
+       struct rtmsg *rtm;
+       unsigned int mtu;
+       int rc;
+
+       rc = mctp_route_nlparse(skb, nlh, extack, tb,
+                               &rtm, &mdev, &daddr_start);
+       if (rc < 0)
+               return rc;
+
+       if (rtm->rtm_type != RTN_UNICAST) {
+               NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
+               return -EINVAL;
+       }
+
+       /* TODO: parse mtu from nlparse */
+       mtu = 0;
+
+       if (rtm->rtm_type != RTN_UNICAST)
+               return -EINVAL;
+
+       rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
+                           rtm->rtm_type);
+       return rc;
+}
+
+static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+                        struct netlink_ext_ack *extack)
+{
+       struct nlattr *tb[RTA_MAX + 1];
+       mctp_eid_t daddr_start;
+       struct mctp_dev *mdev;
+       struct rtmsg *rtm;
+       int rc;
+
+       rc = mctp_route_nlparse(skb, nlh, extack, tb,
+                               &rtm, &mdev, &daddr_start);
+       if (rc < 0)
+               return rc;
+
+       /* we only have unicast routes */
+       if (rtm->rtm_type != RTN_UNICAST)
+               return -EINVAL;
+
+       rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
+       return rc;
+}
+
+static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
+                           u32 portid, u32 seq, int event, unsigned int flags)
+{
+       struct nlmsghdr *nlh;
+       struct rtmsg *hdr;
+       void *metrics;
+
+       nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
+       if (!nlh)
+               return -EMSGSIZE;
+
+       hdr = nlmsg_data(nlh);
+       hdr->rtm_family = AF_MCTP;
+
+       /* we use the _len fields as a number of EIDs, rather than
+        * a number of bits in the address
+        */
+       hdr->rtm_dst_len = rt->max - rt->min;
+       hdr->rtm_src_len = 0;
+       hdr->rtm_tos = 0;
+       hdr->rtm_table = RT_TABLE_DEFAULT;
+       hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
+       hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
+       hdr->rtm_type = rt->type;
+
+       if (nla_put_u8(skb, RTA_DST, rt->min))
+               goto cancel;
+
+       metrics = nla_nest_start_noflag(skb, RTA_METRICS);
+       if (!metrics)
+               goto cancel;
+
+       if (rt->mtu) {
+               if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
+                       goto cancel;
+       }
+
+       nla_nest_end(skb, metrics);
+
+       if (rt->dev) {
+               if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
+                       goto cancel;
+       }
+
+       /* TODO: conditional neighbour physaddr? */
+
+       nlmsg_end(skb, nlh);
+
+       return 0;
+
+cancel:
+       nlmsg_cancel(skb, nlh);
+       return -EMSGSIZE;
+}
+
+static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct net *net = sock_net(skb->sk);
+       struct mctp_route *rt;
+       int s_idx, idx;
+
+       /* TODO: allow filtering on route data, possibly under
+        * cb->strict_check
+        */
+
+       /* TODO: change to struct overlay */
+       s_idx = cb->args[0];
+       idx = 0;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
+               if (idx++ < s_idx)
+                       continue;
+               if (mctp_fill_rtinfo(skb, rt,
+                                    NETLINK_CB(cb->skb).portid,
+                                    cb->nlh->nlmsg_seq,
+                                    RTM_NEWROUTE, NLM_F_MULTI) < 0)
+                       break;
+       }
+
+       rcu_read_unlock();
+       cb->args[0] = idx;
+
+       return skb->len;
+}
+
+/* net namespace implementation */
+static int __net_init mctp_routes_net_init(struct net *net)
+{
+       struct netns_mctp *ns = &net->mctp;
+
+       INIT_LIST_HEAD(&ns->routes);
+       INIT_HLIST_HEAD(&ns->binds);
+       mutex_init(&ns->bind_lock);
+       INIT_HLIST_HEAD(&ns->keys);
+       spin_lock_init(&ns->keys_lock);
+       WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
+       return 0;
+}
+
+static void __net_exit mctp_routes_net_exit(struct net *net)
+{
+       struct mctp_route *rt;
+
+       list_for_each_entry_rcu(rt, &net->mctp.routes, list)
+               mctp_route_release(rt);
+}
+
+static struct pernet_operations mctp_net_ops = {
+       .init = mctp_routes_net_init,
+       .exit = mctp_routes_net_exit,
+};
+
+int __init mctp_routes_init(void)
+{
+       dev_add_pack(&mctp_packet_type);
+
+       rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
+                            NULL, mctp_dump_rtinfo, 0);
+       rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
+                            mctp_newroute, NULL, 0);
+       rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
+                            mctp_delroute, NULL, 0);
+
+       return register_pernet_subsys(&mctp_net_ops);
+}
+
+void __exit mctp_routes_exit(void)
+{
+       unregister_pernet_subsys(&mctp_net_ops);
+       rtnl_unregister(PF_MCTP, RTM_DELROUTE);
+       rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
+       rtnl_unregister(PF_MCTP, RTM_GETROUTE);
+       dev_remove_pack(&mctp_packet_type);
+}
index 05a21dd..ffeb2df 100644 (file)
@@ -407,7 +407,6 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
        /* Verify ttl is valid */
        if (dec.ttl <= 1)
                goto err;
-       dec.ttl -= 1;
 
        /* Find the output device */
        out_dev = rcu_dereference(nh->nh_dev);
@@ -431,6 +430,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
        skb->dev = out_dev;
        skb->protocol = htons(ETH_P_MPLS_UC);
 
+       dec.ttl -= 1;
        if (unlikely(!new_header_size && dec.bos)) {
                /* Penultimate hop popping */
                if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
index 7d738bd..8b23546 100644 (file)
@@ -21,43 +21,50 @@ struct mptcp_pernet {
        struct ctl_table_header *ctl_table_hdr;
 #endif
 
-       u8 mptcp_enabled;
        unsigned int add_addr_timeout;
+       unsigned int stale_loss_cnt;
+       u8 mptcp_enabled;
        u8 checksum_enabled;
        u8 allow_join_initial_addr_port;
 };
 
-static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
+static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
 {
        return net_generic(net, mptcp_pernet_id);
 }
 
-int mptcp_is_enabled(struct net *net)
+int mptcp_is_enabled(const struct net *net)
 {
        return mptcp_get_pernet(net)->mptcp_enabled;
 }
 
-unsigned int mptcp_get_add_addr_timeout(struct net *net)
+unsigned int mptcp_get_add_addr_timeout(const struct net *net)
 {
        return mptcp_get_pernet(net)->add_addr_timeout;
 }
 
-int mptcp_is_checksum_enabled(struct net *net)
+int mptcp_is_checksum_enabled(const struct net *net)
 {
        return mptcp_get_pernet(net)->checksum_enabled;
 }
 
-int mptcp_allow_join_id0(struct net *net)
+int mptcp_allow_join_id0(const struct net *net)
 {
        return mptcp_get_pernet(net)->allow_join_initial_addr_port;
 }
 
+unsigned int mptcp_stale_loss_cnt(const struct net *net)
+{
+       return mptcp_get_pernet(net)->stale_loss_cnt;
+}
+
 static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
 {
        pernet->mptcp_enabled = 1;
        pernet->add_addr_timeout = TCP_RTO_MAX;
        pernet->checksum_enabled = 0;
        pernet->allow_join_initial_addr_port = 1;
+       pernet->stale_loss_cnt = 4;
 }
 
 #ifdef CONFIG_SYSCTL
@@ -95,6 +102,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
                .extra1       = SYSCTL_ZERO,
                .extra2       = SYSCTL_ONE
        },
+       {
+               .procname = "stale_loss_cnt",
+               .maxlen = sizeof(unsigned int),
+               .mode = 0644,
+               .proc_handler = proc_douintvec_minmax,
+       },
        {}
 };
 
@@ -114,6 +127,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
        table[1].data = &pernet->add_addr_timeout;
        table[2].data = &pernet->checksum_enabled;
        table[3].data = &pernet->allow_join_initial_addr_port;
+       table[4].data = &pernet->stale_loss_cnt;
 
        hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
        if (!hdr)
index ff2cc0e..b21ff9b 100644 (file)
@@ -44,7 +44,11 @@ static const struct snmp_mib mptcp_snmp_list[] = {
        SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
        SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
        SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
+       SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX),
+       SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX),
        SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
+       SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
+       SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
        SNMP_MIB_SENTINEL
 };
 
index 0663cb1..ecd3d8b 100644 (file)
@@ -37,7 +37,11 @@ enum linux_mptcp_mib_field {
        MPTCP_MIB_RMSUBFLOW,            /* Remove a subflow */
        MPTCP_MIB_MPPRIOTX,             /* Transmit a MP_PRIO */
        MPTCP_MIB_MPPRIORX,             /* Received a MP_PRIO */
+       MPTCP_MIB_MPFAILTX,             /* Transmit a MP_FAIL */
+       MPTCP_MIB_MPFAILRX,             /* Received a MP_FAIL */
        MPTCP_MIB_RCVPRUNED,            /* Incoming packet dropped due to memory limit */
+       MPTCP_MIB_SUBFLOWSTALE,         /* Subflows entered 'stale' status */
+       MPTCP_MIB_SUBFLOWRECOVER,       /* Subflows returned to active status after being stale */
        __MPTCP_MIB_MAX
 };
 
index 7adcbc1..c41273c 100644 (file)
@@ -81,12 +81,11 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                 * is if both hosts in their SYNs set A=0."
                 */
                if (flags & MPTCP_CAP_CHECKSUM_REQD)
-                       mp_opt->csum_reqd = 1;
+                       mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
 
-               if (flags & MPTCP_CAP_DENY_JOIN_ID0)
-                       mp_opt->deny_join_id0 = 1;
+               mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0);
 
-               mp_opt->mp_capable = 1;
+               mp_opt->suboptions |= OPTIONS_MPTCP_MPC;
                if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
                        mp_opt->sndr_key = get_unaligned_be64(ptr);
                        ptr += 8;
@@ -101,7 +100,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                         * equivalent to those in a DSS option and can be used
                         * interchangeably."
                         */
-                       mp_opt->dss = 1;
+                       mp_opt->suboptions |= OPTION_MPTCP_DSS;
                        mp_opt->use_map = 1;
                        mp_opt->mpc_map = 1;
                        mp_opt->data_len = get_unaligned_be16(ptr);
@@ -109,7 +108,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                }
                if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
                        mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
-                       mp_opt->csum_reqd = 1;
+                       mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
                        ptr += 2;
                }
                pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
@@ -118,7 +117,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                break;
 
        case MPTCPOPT_MP_JOIN:
-               mp_opt->mp_join = 1;
+               mp_opt->suboptions |= OPTIONS_MPTCP_MPJ;
                if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
                        mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
                        mp_opt->join_id = *ptr++;
@@ -144,7 +143,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                        memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
                        pr_debug("MP_JOIN hmac");
                } else {
-                       mp_opt->mp_join = 0;
+                       mp_opt->suboptions &= ~OPTIONS_MPTCP_MPJ;
                }
                break;
 
@@ -192,8 +191,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                    opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
                        break;
 
-               mp_opt->dss = 1;
-
+               mp_opt->suboptions |= OPTION_MPTCP_DSS;
                if (mp_opt->use_ack) {
                        if (mp_opt->ack64) {
                                mp_opt->data_ack = get_unaligned_be64(ptr);
@@ -222,14 +220,15 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                        ptr += 2;
 
                        if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
-                               mp_opt->csum_reqd = 1;
+                               mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
                                mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
                                ptr += 2;
                        }
 
                        pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
                                 mp_opt->data_seq, mp_opt->subflow_seq,
-                                mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum);
+                                mp_opt->data_len, !!(mp_opt->suboptions & OPTION_MPTCP_CSUMREQD),
+                                mp_opt->csum);
                }
 
                break;
@@ -260,8 +259,10 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                                break;
                }
 
-               mp_opt->add_addr = 1;
+               mp_opt->suboptions |= OPTION_MPTCP_ADD_ADDR;
                mp_opt->addr.id = *ptr++;
+               mp_opt->addr.port = 0;
+               mp_opt->ahmac = 0;
                if (mp_opt->addr.family == AF_INET) {
                        memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4);
                        ptr += 4;
@@ -298,7 +299,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 
                ptr++;
 
-               mp_opt->rm_addr = 1;
+               mp_opt->suboptions |= OPTION_MPTCP_RM_ADDR;
                mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE;
                for (i = 0; i < mp_opt->rm_list.nr; i++)
                        mp_opt->rm_list.ids[i] = *ptr++;
@@ -309,7 +310,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                if (opsize != TCPOLEN_MPTCP_PRIO)
                        break;
 
-               mp_opt->mp_prio = 1;
+               mp_opt->suboptions |= OPTION_MPTCP_PRIO;
                mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP;
                pr_debug("MP_PRIO: prio=%d", mp_opt->backup);
                break;
@@ -321,7 +322,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
                ptr += 2;
                mp_opt->rcvr_key = get_unaligned_be64(ptr);
                ptr += 8;
-               mp_opt->fastclose = 1;
+               mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE;
                break;
 
        case MPTCPOPT_RST:
@@ -330,12 +331,23 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 
                if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
                        break;
-               mp_opt->reset = 1;
+
+               mp_opt->suboptions |= OPTION_MPTCP_RST;
                flags = *ptr++;
                mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
                mp_opt->reset_reason = *ptr;
                break;
 
+       case MPTCPOPT_MP_FAIL:
+               if (opsize != TCPOLEN_MPTCP_FAIL)
+                       break;
+
+               ptr += 2;
+               mp_opt->suboptions |= OPTION_MPTCP_FAIL;
+               mp_opt->fail_seq = get_unaligned_be64(ptr);
+               pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
+               break;
+
        default:
                break;
        }
@@ -345,25 +357,12 @@ void mptcp_get_options(const struct sock *sk,
                       const struct sk_buff *skb,
                       struct mptcp_options_received *mp_opt)
 {
-       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
-       struct mptcp_sock *msk = mptcp_sk(subflow->conn);
        const struct tcphdr *th = tcp_hdr(skb);
        const unsigned char *ptr;
        int length;
 
        /* initialize option status */
-       mp_opt->mp_capable = 0;
-       mp_opt->mp_join = 0;
-       mp_opt->add_addr = 0;
-       mp_opt->ahmac = 0;
-       mp_opt->fastclose = 0;
-       mp_opt->addr.port = 0;
-       mp_opt->rm_addr = 0;
-       mp_opt->dss = 0;
-       mp_opt->mp_prio = 0;
-       mp_opt->reset = 0;
-       mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
-       mp_opt->deny_join_id0 = 0;
+       mp_opt->suboptions = 0;
 
        length = (th->doff * 4) - sizeof(struct tcphdr);
        ptr = (const unsigned char *)(th + 1);
@@ -592,6 +591,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
                dss_size = map_size;
                if (skb && snd_data_fin_enable)
                        mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
+               opts->suboptions = OPTION_MPTCP_DSS;
                ret = true;
        }
 
@@ -615,6 +615,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
                opts->ext_copy.ack64 = 0;
        }
        opts->ext_copy.use_ack = 1;
+       opts->suboptions = OPTION_MPTCP_DSS;
        WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
 
        /* Add kind/length/subtype/flag overhead if mapping is not populated */
@@ -667,29 +668,34 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
        bool port;
        int len;
 
-       if ((mptcp_pm_should_add_signal_ipv6(msk) ||
-            mptcp_pm_should_add_signal_port(msk) ||
-            mptcp_pm_should_add_signal_echo(msk)) &&
-           skb && skb_is_tcp_pure_ack(skb)) {
-               pr_debug("drop other suboptions");
-               opts->suboptions = 0;
-               opts->ext_copy.use_ack = 0;
-               opts->ext_copy.use_map = 0;
-               remaining += opt_size;
-               drop_other_suboptions = true;
-       }
-
+       /* add addr will strip the existing options, be sure to avoid breaking
+        * MPC/MPJ handshakes
+        */
        if (!mptcp_pm_should_add_signal(msk) ||
-           !(mptcp_pm_add_addr_signal(msk, remaining, &opts->addr, &echo, &port)))
+           (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) ||
+           !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr,
+                   &echo, &port, &drop_other_suboptions))
                return false;
 
+       if (drop_other_suboptions)
+               remaining += opt_size;
        len = mptcp_add_addr_len(opts->addr.family, echo, port);
        if (remaining < len)
                return false;
 
        *size = len;
-       if (drop_other_suboptions)
+       if (drop_other_suboptions) {
+               pr_debug("drop other suboptions");
+               opts->suboptions = 0;
+
+               /* note that e.g. DSS could have written into the memory
+                * aliased by ahmac, we must reset the field here
+                * to avoid appending the hmac even for ADD_ADDR echo
+                * options
+                */
+               opts->ahmac = 0;
                *size -= opt_size;
+       }
        opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
        if (!echo) {
                opts->ahmac = add_addr_generate_hmac(msk->local_key,
@@ -739,7 +745,12 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 
-       if (!subflow->send_mp_prio)
+       /* can't send MP_PRIO with MPC, as they share the same option space:
+        * 'backup'. Also it makes no sense at all
+        */
+       if (!subflow->send_mp_prio ||
+           ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
+             OPTION_MPTCP_MPC_ACK) & opts->suboptions))
                return false;
 
        /* account for the trailing 'nop' option */
@@ -755,7 +766,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
        return true;
 }
 
-static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
+static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
                                                   unsigned int *size,
                                                   unsigned int remaining,
                                                   struct mptcp_out_options *opts)
@@ -763,12 +774,36 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
        const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 
        if (remaining < TCPOLEN_MPTCP_RST)
-               return;
+               return false;
 
        *size = TCPOLEN_MPTCP_RST;
        opts->suboptions |= OPTION_MPTCP_RST;
        opts->reset_transient = subflow->reset_transient;
        opts->reset_reason = subflow->reset_reason;
+
+       return true;
+}
+
+static bool mptcp_established_options_mp_fail(struct sock *sk,
+                                             unsigned int *size,
+                                             unsigned int remaining,
+                                             struct mptcp_out_options *opts)
+{
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+       if (likely(!subflow->send_mp_fail))
+               return false;
+
+       if (remaining < TCPOLEN_MPTCP_FAIL)
+               return false;
+
+       *size = TCPOLEN_MPTCP_FAIL;
+       opts->suboptions |= OPTION_MPTCP_FAIL;
+       opts->fail_seq = subflow->map_seq;
+
+       pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+
+       return true;
 }
 
 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
@@ -787,15 +822,28 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
                return false;
 
        if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
-               mptcp_established_options_rst(sk, skb, size, remaining, opts);
+               if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+                       *size += opt_size;
+                       remaining -= opt_size;
+               }
+               if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
+                       *size += opt_size;
+                       remaining -= opt_size;
+               }
                return true;
        }
 
        snd_data_fin = mptcp_data_fin_enabled(msk);
        if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
                ret = true;
-       else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
+       else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
                ret = true;
+               if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+                       *size += opt_size;
+                       remaining -= opt_size;
+                       return true;
+               }
+       }
 
        /* we reserved enough space for the above options, and exceeding the
         * TCP option space would be fatal
@@ -868,7 +916,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
                 */
                if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
                    TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
-                   subflow->mp_join && mp_opt->mp_join &&
+                   subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
                    READ_ONCE(msk->pm.server_side))
                        tcp_send_ack(ssk);
                goto fully_established;
@@ -885,8 +933,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
                return subflow->mp_capable;
        }
 
-       if ((mp_opt->dss && mp_opt->use_ack) ||
-           (mp_opt->add_addr && !mp_opt->echo)) {
+       if (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
+           ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo)) {
                /* subflows are fully established as soon as we get any
                 * additional ack, including ADD_ADDR.
                 */
@@ -899,7 +947,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
         * then fallback to TCP. Fallback scenarios requires a reset for
         * MP_JOIN subflows.
         */
-       if (!mp_opt->mp_capable) {
+       if (!(mp_opt->suboptions & OPTIONS_MPTCP_MPC)) {
                if (subflow->mp_join)
                        goto reset;
                subflow->mp_capable = 0;
@@ -971,9 +1019,11 @@ static void ack_update_msk(struct mptcp_sock *msk,
        old_snd_una = msk->snd_una;
        new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
 
-       /* ACK for data not even sent yet? Ignore. */
-       if (after64(new_snd_una, snd_nxt))
-               new_snd_una = old_snd_una;
+       /* ACK for data not even sent yet and even above recovery bound? Ignore.*/
+       if (unlikely(after64(new_snd_una, snd_nxt))) {
+               if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt))
+                       new_snd_una = old_snd_una;
+       }
 
        new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
 
@@ -1061,48 +1111,51 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
        if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
                return sk->sk_state != TCP_CLOSE;
 
-       if (mp_opt.fastclose &&
-           msk->local_key == mp_opt.rcvr_key) {
-               WRITE_ONCE(msk->rcv_fastclose, true);
-               mptcp_schedule_work((struct sock *)msk);
-       }
+       if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) {
+               if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) &&
+                   msk->local_key == mp_opt.rcvr_key) {
+                       WRITE_ONCE(msk->rcv_fastclose, true);
+                       mptcp_schedule_work((struct sock *)msk);
+               }
 
-       if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
-               if (!mp_opt.echo) {
-                       mptcp_pm_add_addr_received(msk, &mp_opt.addr);
-                       MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
-               } else {
-                       mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
-                       mptcp_pm_del_add_timer(msk, &mp_opt.addr, true);
-                       MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
+               if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) &&
+                   add_addr_hmac_valid(msk, &mp_opt)) {
+                       if (!mp_opt.echo) {
+                               mptcp_pm_add_addr_received(msk, &mp_opt.addr);
+                               MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
+                       } else {
+                               mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
+                               mptcp_pm_del_add_timer(msk, &mp_opt.addr, true);
+                               MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
+                       }
+
+                       if (mp_opt.addr.port)
+                               MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
                }
 
-               if (mp_opt.addr.port)
-                       MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
+               if (mp_opt.suboptions & OPTION_MPTCP_RM_ADDR)
+                       mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
 
-               mp_opt.add_addr = 0;
-       }
+               if (mp_opt.suboptions & OPTION_MPTCP_PRIO) {
+                       mptcp_pm_mp_prio_received(sk, mp_opt.backup);
+                       MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
+               }
 
-       if (mp_opt.rm_addr) {
-               mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
-               mp_opt.rm_addr = 0;
-       }
+               if (mp_opt.suboptions & OPTION_MPTCP_FAIL) {
+                       mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
+                       MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX);
+               }
 
-       if (mp_opt.mp_prio) {
-               mptcp_pm_mp_prio_received(sk, mp_opt.backup);
-               MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
-               mp_opt.mp_prio = 0;
-       }
+               if (mp_opt.suboptions & OPTION_MPTCP_RST) {
+                       subflow->reset_seen = 1;
+                       subflow->reset_reason = mp_opt.reset_reason;
+                       subflow->reset_transient = mp_opt.reset_transient;
+               }
 
-       if (mp_opt.reset) {
-               subflow->reset_seen = 1;
-               subflow->reset_reason = mp_opt.reset_reason;
-               subflow->reset_transient = mp_opt.reset_transient;
+               if (!(mp_opt.suboptions & OPTION_MPTCP_DSS))
+                       return true;
        }
 
-       if (!mp_opt.dss)
-               return true;
-
        /* we can't wait for recvmsg() to update the ack_seq, otherwise
         * monodirectional flows will stuck
         */
@@ -1129,7 +1182,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 
        memset(mpext, 0, sizeof(*mpext));
 
-       if (mp_opt.use_map) {
+       if (likely(mp_opt.use_map)) {
                if (mp_opt.mpc_map) {
                        /* this is an MP_CAPABLE carrying MPTCP data
                         * we know this map the first chunk of data
@@ -1149,7 +1202,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
                }
                mpext->data_len = mp_opt.data_len;
                mpext->use_map = 1;
-               mpext->csum_reqd = mp_opt.csum_reqd;
+               mpext->csum_reqd = !!(mp_opt.suboptions & OPTION_MPTCP_CSUMREQD);
 
                if (mpext->csum_reqd)
                        mpext->csum = mp_opt.csum;
@@ -1196,8 +1249,88 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
 void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                         struct mptcp_out_options *opts)
 {
-       if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
-            OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
+       if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) {
+               const struct sock *ssk = (const struct sock *)tp;
+               struct mptcp_subflow_context *subflow;
+
+               subflow = mptcp_subflow_ctx(ssk);
+               subflow->send_mp_fail = 0;
+
+               *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+                                     TCPOLEN_MPTCP_FAIL,
+                                     0, 0);
+               put_unaligned_be64(opts->fail_seq, ptr);
+               ptr += 2;
+       }
+
+       /* RST is mutually exclusive with everything else */
+       if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
+               *ptr++ = mptcp_option(MPTCPOPT_RST,
+                                     TCPOLEN_MPTCP_RST,
+                                     opts->reset_transient,
+                                     opts->reset_reason);
+               return;
+       }
+
+       /* DSS, MPC, MPJ and ADD_ADDR are mutually exclusive, see
+        * mptcp_established_options*()
+        */
+       if (likely(OPTION_MPTCP_DSS & opts->suboptions)) {
+               struct mptcp_ext *mpext = &opts->ext_copy;
+               u8 len = TCPOLEN_MPTCP_DSS_BASE;
+               u8 flags = 0;
+
+               if (mpext->use_ack) {
+                       flags = MPTCP_DSS_HAS_ACK;
+                       if (mpext->ack64) {
+                               len += TCPOLEN_MPTCP_DSS_ACK64;
+                               flags |= MPTCP_DSS_ACK64;
+                       } else {
+                               len += TCPOLEN_MPTCP_DSS_ACK32;
+                       }
+               }
+
+               if (mpext->use_map) {
+                       len += TCPOLEN_MPTCP_DSS_MAP64;
+
+                       /* Use only 64-bit mapping flags for now, add
+                        * support for optional 32-bit mappings later.
+                        */
+                       flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
+                       if (mpext->data_fin)
+                               flags |= MPTCP_DSS_DATA_FIN;
+
+                       if (opts->csum_reqd)
+                               len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+               }
+
+               *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
+
+               if (mpext->use_ack) {
+                       if (mpext->ack64) {
+                               put_unaligned_be64(mpext->data_ack, ptr);
+                               ptr += 2;
+                       } else {
+                               put_unaligned_be32(mpext->data_ack32, ptr);
+                               ptr += 1;
+                       }
+               }
+
+               if (mpext->use_map) {
+                       put_unaligned_be64(mpext->data_seq, ptr);
+                       ptr += 2;
+                       put_unaligned_be32(mpext->subflow_seq, ptr);
+                       ptr += 1;
+                       if (opts->csum_reqd) {
+                               put_unaligned_be32(mpext->data_len << 16 |
+                                                  mptcp_make_csum(mpext), ptr);
+                       } else {
+                               put_unaligned_be32(mpext->data_len << 16 |
+                                                  TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+                       }
+               }
+       } else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
+                   OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
                u8 len, flag = MPTCP_CAP_HMAC_SHA256;
 
                if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
@@ -1244,10 +1377,31 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
                                           TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
                }
                ptr += 1;
-       }
 
-mp_capable_done:
-       if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
+               /* MPC is additionally mutually exclusive with MP_PRIO */
+               goto mp_capable_done;
+       } else if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
+               *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+                                     TCPOLEN_MPTCP_MPJ_SYN,
+                                     opts->backup, opts->join_id);
+               put_unaligned_be32(opts->token, ptr);
+               ptr += 1;
+               put_unaligned_be32(opts->nonce, ptr);
+               ptr += 1;
+       } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
+               *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+                                     TCPOLEN_MPTCP_MPJ_SYNACK,
+                                     opts->backup, opts->join_id);
+               put_unaligned_be64(opts->thmac, ptr);
+               ptr += 2;
+               put_unaligned_be32(opts->nonce, ptr);
+               ptr += 1;
+       } else if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
+               *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+                                     TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
+               memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
+               ptr += 5;
+       } else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
                u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
                u8 echo = MPTCP_ADDR_ECHO;
 
@@ -1305,6 +1459,19 @@ mp_capable_done:
                }
        }
 
+       if (OPTION_MPTCP_PRIO & opts->suboptions) {
+               const struct sock *ssk = (const struct sock *)tp;
+               struct mptcp_subflow_context *subflow;
+
+               subflow = mptcp_subflow_ctx(ssk);
+               subflow->send_mp_prio = 0;
+
+               *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
+                                     TCPOLEN_MPTCP_PRIO,
+                                     opts->backup, TCPOPT_NOP);
+       }
+
+mp_capable_done:
        if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
                u8 i = 1;
 
@@ -1325,107 +1492,6 @@ mp_capable_done:
                }
        }
 
-       if (OPTION_MPTCP_PRIO & opts->suboptions) {
-               const struct sock *ssk = (const struct sock *)tp;
-               struct mptcp_subflow_context *subflow;
-
-               subflow = mptcp_subflow_ctx(ssk);
-               subflow->send_mp_prio = 0;
-
-               *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
-                                     TCPOLEN_MPTCP_PRIO,
-                                     opts->backup, TCPOPT_NOP);
-       }
-
-       if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
-               *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
-                                     TCPOLEN_MPTCP_MPJ_SYN,
-                                     opts->backup, opts->join_id);
-               put_unaligned_be32(opts->token, ptr);
-               ptr += 1;
-               put_unaligned_be32(opts->nonce, ptr);
-               ptr += 1;
-       }
-
-       if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
-               *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
-                                     TCPOLEN_MPTCP_MPJ_SYNACK,
-                                     opts->backup, opts->join_id);
-               put_unaligned_be64(opts->thmac, ptr);
-               ptr += 2;
-               put_unaligned_be32(opts->nonce, ptr);
-               ptr += 1;
-       }
-
-       if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
-               *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
-                                     TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
-               memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
-               ptr += 5;
-       }
-
-       if (OPTION_MPTCP_RST & opts->suboptions)
-               *ptr++ = mptcp_option(MPTCPOPT_RST,
-                                     TCPOLEN_MPTCP_RST,
-                                     opts->reset_transient,
-                                     opts->reset_reason);
-
-       if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
-               struct mptcp_ext *mpext = &opts->ext_copy;
-               u8 len = TCPOLEN_MPTCP_DSS_BASE;
-               u8 flags = 0;
-
-               if (mpext->use_ack) {
-                       flags = MPTCP_DSS_HAS_ACK;
-                       if (mpext->ack64) {
-                               len += TCPOLEN_MPTCP_DSS_ACK64;
-                               flags |= MPTCP_DSS_ACK64;
-                       } else {
-                               len += TCPOLEN_MPTCP_DSS_ACK32;
-                       }
-               }
-
-               if (mpext->use_map) {
-                       len += TCPOLEN_MPTCP_DSS_MAP64;
-
-                       /* Use only 64-bit mapping flags for now, add
-                        * support for optional 32-bit mappings later.
-                        */
-                       flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
-                       if (mpext->data_fin)
-                               flags |= MPTCP_DSS_DATA_FIN;
-
-                       if (opts->csum_reqd)
-                               len += TCPOLEN_MPTCP_DSS_CHECKSUM;
-               }
-
-               *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
-
-               if (mpext->use_ack) {
-                       if (mpext->ack64) {
-                               put_unaligned_be64(mpext->data_ack, ptr);
-                               ptr += 2;
-                       } else {
-                               put_unaligned_be32(mpext->data_ack32, ptr);
-                               ptr += 1;
-                       }
-               }
-
-               if (mpext->use_map) {
-                       put_unaligned_be64(mpext->data_seq, ptr);
-                       ptr += 2;
-                       put_unaligned_be32(mpext->subflow_seq, ptr);
-                       ptr += 1;
-                       if (opts->csum_reqd) {
-                               put_unaligned_be32(mpext->data_len << 16 |
-                                                  mptcp_make_csum(mpext), ptr);
-                       } else {
-                               put_unaligned_be32(mpext->data_len << 16 |
-                                                  TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
-                       }
-               }
-       }
-
        if (tp)
                mptcp_set_rwin(tp);
 }
index 639271e..6ab386f 100644 (file)
@@ -10,6 +10,8 @@
 #include <net/mptcp.h>
 #include "protocol.h"
 
+#include "mib.h"
+
 /* path manager command handlers */
 
 int mptcp_pm_announce_addr(struct mptcp_sock *msk,
@@ -18,23 +20,23 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 {
        u8 add_addr = READ_ONCE(msk->pm.addr_signal);
 
-       pr_debug("msk=%p, local_id=%d", msk, addr->id);
+       pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo);
 
        lockdep_assert_held(&msk->pm.lock);
 
-       if (add_addr) {
-               pr_warn("addr_signal error, add_addr=%d", add_addr);
+       if (add_addr &
+           (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) {
+               pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo);
                return -EINVAL;
        }
 
-       msk->pm.local = *addr;
-       add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
-       if (echo)
+       if (echo) {
+               msk->pm.remote = *addr;
                add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
-       if (addr->family == AF_INET6)
-               add_addr |= BIT(MPTCP_ADD_ADDR_IPV6);
-       if (addr->port)
-               add_addr |= BIT(MPTCP_ADD_ADDR_PORT);
+       } else {
+               msk->pm.local = *addr;
+               add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
+       }
        WRITE_ONCE(msk->pm.addr_signal, add_addr);
        return 0;
 }
@@ -247,12 +249,21 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
        mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
 }
 
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
+{
+       pr_debug("fail_seq=%llu", fail_seq);
+}
+
 /* path manager helpers */
 
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
-                             struct mptcp_addr_info *saddr, bool *echo, bool *port)
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
+                             unsigned int opt_size, unsigned int remaining,
+                             struct mptcp_addr_info *addr, bool *echo,
+                             bool *port, bool *drop_other_suboptions)
 {
        int ret = false;
+       u8 add_addr;
+       u8 family;
 
        spin_lock_bh(&msk->pm.lock);
 
@@ -260,14 +271,30 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
        if (!mptcp_pm_should_add_signal(msk))
                goto out_unlock;
 
+       /* always drop every other options for pure ack ADD_ADDR; this is a
+        * plain dup-ack from TCP perspective. The other MPTCP-relevant info,
+        * if any, will be carried by the 'original' TCP ack
+        */
+       if (skb && skb_is_tcp_pure_ack(skb)) {
+               remaining += opt_size;
+               *drop_other_suboptions = true;
+       }
+
        *echo = mptcp_pm_should_add_signal_echo(msk);
-       *port = mptcp_pm_should_add_signal_port(msk);
+       *port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
 
-       if (remaining < mptcp_add_addr_len(msk->pm.local.family, *echo, *port))
+       family = *echo ? msk->pm.remote.family : msk->pm.local.family;
+       if (remaining < mptcp_add_addr_len(family, *echo, *port))
                goto out_unlock;
 
-       *saddr = msk->pm.local;
-       WRITE_ONCE(msk->pm.addr_signal, 0);
+       if (*echo) {
+               *addr = msk->pm.remote;
+               add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO);
+       } else {
+               *addr = msk->pm.local;
+               add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL);
+       }
+       WRITE_ONCE(msk->pm.addr_signal, add_addr);
        ret = true;
 
 out_unlock:
@@ -279,6 +306,7 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
                             struct mptcp_rm_list *rm_list)
 {
        int ret = false, len;
+       u8 rm_addr;
 
        spin_lock_bh(&msk->pm.lock);
 
@@ -286,16 +314,17 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
        if (!mptcp_pm_should_rm_signal(msk))
                goto out_unlock;
 
+       rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL);
        len = mptcp_rm_addr_len(&msk->pm.rm_list_tx);
        if (len < 0) {
-               WRITE_ONCE(msk->pm.addr_signal, 0);
+               WRITE_ONCE(msk->pm.addr_signal, rm_addr);
                goto out_unlock;
        }
        if (remaining < len)
                goto out_unlock;
 
        *rm_list = msk->pm.rm_list_tx;
-       WRITE_ONCE(msk->pm.addr_signal, 0);
+       WRITE_ONCE(msk->pm.addr_signal, rm_addr);
        ret = true;
 
 out_unlock:
@@ -308,6 +337,25 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
        return mptcp_pm_nl_get_local_id(msk, skc);
 }
 
+void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
+{
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+       u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp);
+
+       /* keep track of rtx periods with no progress */
+       if (!subflow->stale_count) {
+               subflow->stale_rcv_tstamp = rcv_tstamp;
+               subflow->stale_count++;
+       } else if (subflow->stale_rcv_tstamp == rcv_tstamp) {
+               if (subflow->stale_count < U8_MAX)
+                       subflow->stale_count++;
+               mptcp_pm_nl_subflow_chk_stale(msk, ssk);
+       } else {
+               subflow->stale_count = 0;
+               mptcp_subflow_set_active(subflow);
+       }
+}
+
 void mptcp_pm_data_init(struct mptcp_sock *msk)
 {
        msk->pm.add_addr_signaled = 0;
index 7b37944..1e4289c 100644 (file)
@@ -46,6 +46,7 @@ struct pm_nl_pernet {
        spinlock_t              lock;
        struct list_head        local_addr_list;
        unsigned int            addrs;
+       unsigned int            stale_loss_cnt;
        unsigned int            add_addr_signal_max;
        unsigned int            add_addr_accept_max;
        unsigned int            local_addr_max;
@@ -316,14 +317,14 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
        if (!entry->addr.id)
                return;
 
-       if (mptcp_pm_should_add_signal(msk)) {
+       if (mptcp_pm_should_add_signal_addr(msk)) {
                sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
                goto out;
        }
 
        spin_lock_bh(&msk->pm.lock);
 
-       if (!mptcp_pm_should_add_signal(msk)) {
+       if (!mptcp_pm_should_add_signal_addr(msk)) {
                pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id);
                mptcp_pm_announce_addr(msk, &entry->addr, false);
                mptcp_pm_add_addr_send_ack(msk);
@@ -409,6 +410,55 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
        }
 }
 
+static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr,
+                                 struct mptcp_addr_info *addr)
+{
+       int i;
+
+       for (i = 0; i < nr; i++) {
+               if (addresses_equal(&addrs[i], addr, addr->port))
+                       return true;
+       }
+
+       return false;
+}
+
+/* Fill all the remote addresses into the array addrs[],
+ * and return the array size.
+ */
+static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
+                                             struct mptcp_addr_info *addrs)
+{
+       struct sock *sk = (struct sock *)msk, *ssk;
+       struct mptcp_subflow_context *subflow;
+       struct mptcp_addr_info remote = { 0 };
+       unsigned int subflows_max;
+       int i = 0;
+
+       subflows_max = mptcp_pm_get_subflows_max(msk);
+
+       /* Non-fullmesh endpoint, fill in the single entry
+        * corresponding to the primary MPC subflow remote address
+        */
+       if (!fullmesh) {
+               remote_address((struct sock_common *)sk, &remote);
+               msk->pm.subflows++;
+               addrs[i++] = remote;
+       } else {
+               mptcp_for_each_subflow(msk, subflow) {
+                       ssk = mptcp_subflow_tcp_sock(subflow);
+                       remote_address((struct sock_common *)ssk, &remote);
+                       if (!lookup_address_in_vec(addrs, i, &remote) &&
+                           msk->pm.subflows < subflows_max) {
+                               msk->pm.subflows++;
+                               addrs[i++] = remote;
+                       }
+               }
+       }
+
+       return i;
+}
+
 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
 {
        struct sock *sk = (struct sock *)msk;
@@ -454,15 +504,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
            !READ_ONCE(msk->pm.remote_deny_join_id0)) {
                local = select_local_address(pernet, msk);
                if (local) {
-                       struct mptcp_addr_info remote = { 0 };
+                       bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
+                       struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
+                       int i, nr;
 
                        msk->pm.local_addr_used++;
-                       msk->pm.subflows++;
                        check_work_pending(msk);
-                       remote_address((struct sock_common *)sk, &remote);
+                       nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
                        spin_unlock_bh(&msk->pm.lock);
-                       __mptcp_subflow_connect(sk, &local->addr, &remote,
-                                               local->flags, local->ifindex);
+                       for (i = 0; i < nr; i++)
+                               __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
                        spin_lock_bh(&msk->pm.lock);
                        return;
                }
@@ -483,13 +534,67 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
        mptcp_pm_create_subflow_or_signal_addr(msk);
 }
 
+/* Fill all the local addresses into the array addrs[],
+ * and return the array size.
+ */
+static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
+                                            struct mptcp_addr_info *addrs)
+{
+       struct sock *sk = (struct sock *)msk;
+       struct mptcp_pm_addr_entry *entry;
+       struct mptcp_addr_info local;
+       struct pm_nl_pernet *pernet;
+       unsigned int subflows_max;
+       int i = 0;
+
+       pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+       subflows_max = mptcp_pm_get_subflows_max(msk);
+
+       rcu_read_lock();
+       __mptcp_flush_join_list(msk);
+       list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
+               if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
+                       continue;
+
+               if (entry->addr.family != sk->sk_family) {
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+                       if ((entry->addr.family == AF_INET &&
+                            !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
+                           (sk->sk_family == AF_INET &&
+                            !ipv6_addr_v4mapped(&entry->addr.addr6)))
+#endif
+                               continue;
+               }
+
+               if (msk->pm.subflows < subflows_max) {
+                       msk->pm.subflows++;
+                       addrs[i++] = entry->addr;
+               }
+       }
+       rcu_read_unlock();
+
+       /* If the array is empty, fill in the single
+        * 'IPADDRANY' local address
+        */
+       if (!i) {
+               memset(&local, 0, sizeof(local));
+               local.family = msk->pm.remote.family;
+
+               msk->pm.subflows++;
+               addrs[i++] = local;
+       }
+
+       return i;
+}
+
 static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
 {
+       struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
        struct sock *sk = (struct sock *)msk;
        unsigned int add_addr_accept_max;
        struct mptcp_addr_info remote;
-       struct mptcp_addr_info local;
        unsigned int subflows_max;
+       int i, nr;
 
        add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
        subflows_max = mptcp_pm_get_subflows_max(msk);
@@ -501,23 +606,22 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
        if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
                goto add_addr_echo;
 
-       msk->pm.add_addr_accepted++;
-       msk->pm.subflows++;
-       if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
-           msk->pm.subflows >= subflows_max)
-               WRITE_ONCE(msk->pm.accept_addr, false);
-
        /* connect to the specified remote address, using whatever
         * local address the routing configuration will pick.
         */
        remote = msk->pm.remote;
        if (!remote.port)
                remote.port = sk->sk_dport;
-       memset(&local, 0, sizeof(local));
-       local.family = remote.family;
+       nr = fill_local_addresses_vec(msk, addrs);
+
+       msk->pm.add_addr_accepted++;
+       if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
+           msk->pm.subflows >= subflows_max)
+               WRITE_ONCE(msk->pm.accept_addr, false);
 
        spin_unlock_bh(&msk->pm.lock);
-       __mptcp_subflow_connect(sk, &local, &remote, 0, 0);
+       for (i = 0; i < nr; i++)
+               __mptcp_subflow_connect(sk, &addrs[i], &remote);
        spin_lock_bh(&msk->pm.lock);
 
 add_addr_echo:
@@ -543,10 +647,8 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
                bool slow;
 
                spin_unlock_bh(&msk->pm.lock);
-               pr_debug("send ack for %s%s%s",
-                        mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr",
-                        mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
-                        mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");
+               pr_debug("send ack for %s",
+                        mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr");
 
                slow = lock_sock_fast(ssk);
                tcp_send_ack(ssk);
@@ -899,6 +1001,43 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
        [MPTCP_PM_ATTR_SUBFLOWS]        = { .type       = NLA_U32,      },
 };
 
+void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
+{
+       struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk);
+       struct sock *sk = (struct sock *)msk;
+       unsigned int active_max_loss_cnt;
+       struct net *net = sock_net(sk);
+       unsigned int stale_loss_cnt;
+       bool slow;
+
+       stale_loss_cnt = mptcp_stale_loss_cnt(net);
+       if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt)
+               return;
+
+       /* look for another available subflow not in loss state */
+       active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1);
+       mptcp_for_each_subflow(msk, iter) {
+               if (iter != subflow && mptcp_subflow_active(iter) &&
+                   iter->stale_count < active_max_loss_cnt) {
+                       /* we have some alternatives, try to mark this subflow as idle ...*/
+                       slow = lock_sock_fast(ssk);
+                       if (!tcp_rtx_and_write_queues_empty(ssk)) {
+                               subflow->stale = 1;
+                               __mptcp_retransmit_pending_data(sk);
+                               MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE);
+                       }
+                       unlock_sock_fast(ssk, slow);
+
+                       /* always try to push the pending data regarless of re-injections:
+                        * we can possibly use backup subflows now, and subflow selection
+                        * is cheap under the msk socket lock
+                        */
+                       __mptcp_push_pending(sk, 0);
+                       return;
+               }
+       }
+}
+
 static int mptcp_pm_family_to_addr(int family)
 {
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1067,6 +1206,27 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
        return NULL;
 }
 
+int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+                                        u8 *flags, int *ifindex)
+{
+       struct mptcp_pm_addr_entry *entry;
+
+       *flags = 0;
+       *ifindex = 0;
+
+       if (id) {
+               rcu_read_lock();
+               entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id);
+               if (entry) {
+                       *flags = entry->flags;
+                       *ifindex = entry->ifindex;
+               }
+               rcu_read_unlock();
+       }
+
+       return 0;
+}
+
 static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
                                      struct mptcp_addr_info *addr)
 {
@@ -1901,6 +2061,7 @@ static int __net_init pm_nl_init_net(struct net *net)
 
        INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
        pernet->next_id = 1;
+       pernet->stale_loss_cnt = 4;
        spin_lock_init(&pernet->lock);
 
        /* No need to initialize other pernet fields, the struct is zeroed at
index a889249..ade648c 100644 (file)
@@ -411,16 +411,29 @@ static void mptcp_set_datafin_timeout(const struct sock *sk)
                                       TCP_RTO_MIN << icsk->icsk_retransmits);
 }
 
-static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
+static void __mptcp_set_timeout(struct sock *sk, long tout)
 {
-       long tout = ssk && inet_csk(ssk)->icsk_pending ?
-                                     inet_csk(ssk)->icsk_timeout - jiffies : 0;
-
-       if (tout <= 0)
-               tout = mptcp_sk(sk)->timer_ival;
        mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
 }
 
+static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subflow)
+{
+       const struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+       return inet_csk(ssk)->icsk_pending && !subflow->stale_count ?
+              inet_csk(ssk)->icsk_timeout - jiffies : 0;
+}
+
+static void mptcp_set_timeout(struct sock *sk)
+{
+       struct mptcp_subflow_context *subflow;
+       long tout = 0;
+
+       mptcp_for_each_subflow(mptcp_sk(sk), subflow)
+               tout = max(tout, mptcp_timeout_from_subflow(subflow));
+       __mptcp_set_timeout(sk, tout);
+}
+
 static bool tcp_can_send_ack(const struct sock *ssk)
 {
        return !((1 << inet_sk_state_load(ssk)) &
@@ -531,7 +544,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
                }
 
                ret = true;
-               mptcp_set_timeout(sk, NULL);
                mptcp_send_ack(msk);
                mptcp_close_wake_up(sk);
        }
@@ -791,10 +803,7 @@ static void mptcp_reset_timer(struct sock *sk)
        if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
                return;
 
-       /* should never be called with mptcp level timer cleared */
-       tout = READ_ONCE(mptcp_sk(sk)->timer_ival);
-       if (WARN_ON_ONCE(!tout))
-               tout = TCP_RTO_MIN;
+       tout = mptcp_sk(sk)->timer_ival;
        sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout);
 }
 
@@ -1046,8 +1055,14 @@ static void __mptcp_clean_una(struct sock *sk)
                if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
                        break;
 
-               if (WARN_ON_ONCE(dfrag == msk->first_pending))
-                       break;
+               if (unlikely(dfrag == msk->first_pending)) {
+                       /* in recovery mode can see ack after the current snd head */
+                       if (WARN_ON_ONCE(!msk->recovery))
+                               break;
+
+                       WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+               }
+
                dfrag_clear(sk, dfrag);
                cleaned = true;
        }
@@ -1056,8 +1071,14 @@ static void __mptcp_clean_una(struct sock *sk)
        if (dfrag && after64(snd_una, dfrag->data_seq)) {
                u64 delta = snd_una - dfrag->data_seq;
 
-               if (WARN_ON_ONCE(delta > dfrag->already_sent))
-                       goto out;
+               /* prevent wrap around in recovery mode */
+               if (unlikely(delta > dfrag->already_sent)) {
+                       if (WARN_ON_ONCE(!msk->recovery))
+                               goto out;
+                       if (WARN_ON_ONCE(delta > dfrag->data_len))
+                               goto out;
+                       dfrag->already_sent += delta - dfrag->already_sent;
+               }
 
                dfrag->data_seq += delta;
                dfrag->offset += delta;
@@ -1068,6 +1089,10 @@ static void __mptcp_clean_una(struct sock *sk)
                cleaned = true;
        }
 
+       /* all retransmitted data acked, recovery completed */
+       if (unlikely(msk->recovery) && after64(msk->snd_una, msk->recovery_snd_nxt))
+               msk->recovery = false;
+
 out:
        if (cleaned) {
                if (tcp_under_memory_pressure(sk)) {
@@ -1076,8 +1101,8 @@ out:
                }
        }
 
-       if (snd_una == READ_ONCE(msk->snd_nxt)) {
-               if (msk->timer_ival && !mptcp_data_fin_enabled(msk))
+       if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) {
+               if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
                        mptcp_stop_timer(sk);
        } else {
                mptcp_reset_timer(sk);
@@ -1366,16 +1391,44 @@ struct subflow_send_info {
        u64 ratio;
 };
 
+void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow)
+{
+       if (!subflow->stale)
+               return;
+
+       subflow->stale = 0;
+       MPTCP_INC_STATS(sock_net(mptcp_subflow_tcp_sock(subflow)), MPTCP_MIB_SUBFLOWRECOVER);
+}
+
+bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
+{
+       if (unlikely(subflow->stale)) {
+               u32 rcv_tstamp = READ_ONCE(tcp_sk(mptcp_subflow_tcp_sock(subflow))->rcv_tstamp);
+
+               if (subflow->stale_rcv_tstamp == rcv_tstamp)
+                       return false;
+
+               mptcp_subflow_set_active(subflow);
+       }
+       return __mptcp_subflow_active(subflow);
+}
+
+/* implement the mptcp packet scheduler;
+ * returns the subflow that will transmit the next DSS
+ * additionally updates the rtx timeout
+ */
 static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 {
        struct subflow_send_info send_info[2];
        struct mptcp_subflow_context *subflow;
+       struct sock *sk = (struct sock *)msk;
        int i, nr_active = 0;
        struct sock *ssk;
+       long tout = 0;
        u64 ratio;
        u32 pace;
 
-       sock_owned_by_me((struct sock *)msk);
+       sock_owned_by_me(sk);
 
        if (__mptcp_check_fallback(msk)) {
                if (!msk->first)
@@ -1386,8 +1439,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
        /* re-use last subflow, if the burst allow that */
        if (msk->last_snd && msk->snd_burst > 0 &&
            sk_stream_memory_free(msk->last_snd) &&
-           mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd)))
+           mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
+               mptcp_set_timeout(sk);
                return msk->last_snd;
+       }
 
        /* pick the subflow with the lower wmem/wspace ratio */
        for (i = 0; i < 2; ++i) {
@@ -1400,6 +1455,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
                if (!mptcp_subflow_active(subflow))
                        continue;
 
+               tout = max(tout, mptcp_timeout_from_subflow(subflow));
                nr_active += !subflow->backup;
                if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd)
                        continue;
@@ -1415,6 +1471,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
                        send_info[subflow->backup].ratio = ratio;
                }
        }
+       __mptcp_set_timeout(sk, tout);
 
        /* pick the best backup if no other subflow is active */
        if (!nr_active)
@@ -1433,12 +1490,11 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 static void mptcp_push_release(struct sock *sk, struct sock *ssk,
                               struct mptcp_sendmsg_info *info)
 {
-       mptcp_set_timeout(sk, ssk);
        tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal);
        release_sock(ssk);
 }
 
-static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 {
        struct sock *prev_ssk = NULL, *ssk = NULL;
        struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1459,15 +1515,19 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
                        mptcp_flush_join_list(msk);
                        ssk = mptcp_subflow_get_send(msk);
 
-                       /* try to keep the subflow socket lock across
-                        * consecutive xmit on the same socket
+                       /* First check. If the ssk has changed since
+                        * the last round, release prev_ssk
                         */
                        if (ssk != prev_ssk && prev_ssk)
                                mptcp_push_release(sk, prev_ssk, &info);
                        if (!ssk)
                                goto out;
 
-                       if (ssk != prev_ssk || !prev_ssk)
+                       /* Need to lock the new subflow only if different
+                        * from the previous one, otherwise we are still
+                        * helding the relevant lock
+                        */
+                       if (ssk != prev_ssk)
                                lock_sock(ssk);
 
                        /* keep it simple and always provide a new skb for the
@@ -1501,12 +1561,11 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
                mptcp_push_release(sk, ssk, &info);
 
 out:
-       if (copied) {
-               /* start the timer, if it's not pending */
-               if (!mptcp_timer_pending(sk))
-                       mptcp_reset_timer(sk);
+       /* ensure the rtx timer is running */
+       if (!mptcp_timer_pending(sk))
+               mptcp_reset_timer(sk);
+       if (copied)
                __mptcp_check_send_data_fin(sk);
-       }
 }
 
 static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
@@ -1567,7 +1626,6 @@ out:
         */
        __mptcp_update_wmem(sk);
        if (copied) {
-               mptcp_set_timeout(sk, ssk);
                tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
                         info.size_goal);
                if (!mptcp_timer_pending(sk))
@@ -2083,10 +2141,11 @@ static void mptcp_timeout_timer(struct timer_list *t)
  *
  * A backup subflow is returned only if that is the only kind available.
  */
-static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
+static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
 {
+       struct sock *backup = NULL, *pick = NULL;
        struct mptcp_subflow_context *subflow;
-       struct sock *backup = NULL;
+       int min_stale_count = INT_MAX;
 
        sock_owned_by_me((const struct sock *)msk);
 
@@ -2096,14 +2155,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
        mptcp_for_each_subflow(msk, subflow) {
                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 
-               if (!mptcp_subflow_active(subflow))
+               if (!__mptcp_subflow_active(subflow))
                        continue;
 
-               /* still data outstanding at TCP level?  Don't retransmit. */
-               if (!tcp_write_queue_empty(ssk)) {
-                       if (inet_csk(ssk)->icsk_ca_state >= TCP_CA_Loss)
-                               continue;
-                       return NULL;
+               /* still data outstanding at TCP level? skip this */
+               if (!tcp_rtx_and_write_queues_empty(ssk)) {
+                       mptcp_pm_subflow_chk_stale(msk, ssk);
+                       min_stale_count = min_t(int, min_stale_count, subflow->stale_count);
+                       continue;
                }
 
                if (subflow->backup) {
@@ -2112,10 +2171,15 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
                        continue;
                }
 
-               return ssk;
+               if (!pick)
+                       pick = ssk;
        }
 
-       return backup;
+       if (pick)
+               return pick;
+
+       /* use backup only if there are no progresses anywhere */
+       return min_stale_count > 1 ? backup : NULL;
 }
 
 static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
@@ -2126,6 +2190,50 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
        }
 }
 
+bool __mptcp_retransmit_pending_data(struct sock *sk)
+{
+       struct mptcp_data_frag *cur, *rtx_head;
+       struct mptcp_sock *msk = mptcp_sk(sk);
+
+       if (__mptcp_check_fallback(mptcp_sk(sk)))
+               return false;
+
+       if (tcp_rtx_and_write_queues_empty(sk))
+               return false;
+
+       /* the closing socket has some data untransmitted and/or unacked:
+        * some data in the mptcp rtx queue has not really xmitted yet.
+        * keep it simple and re-inject the whole mptcp level rtx queue
+        */
+       mptcp_data_lock(sk);
+       __mptcp_clean_una_wakeup(sk);
+       rtx_head = mptcp_rtx_head(sk);
+       if (!rtx_head) {
+               mptcp_data_unlock(sk);
+               return false;
+       }
+
+       /* will accept ack for reijected data before re-sending them */
+       if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt))
+               msk->recovery_snd_nxt = msk->snd_nxt;
+       msk->recovery = true;
+       mptcp_data_unlock(sk);
+
+       msk->first_pending = rtx_head;
+       msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq;
+       msk->snd_nxt = rtx_head->data_seq;
+       msk->snd_burst = 0;
+
+       /* be sure to clear the "sent status" on all re-injected fragments */
+       list_for_each_entry(cur, &msk->rtx_queue, list) {
+               if (!cur->already_sent)
+                       break;
+               cur->already_sent = 0;
+       }
+
+       return true;
+}
+
 /* subflow sockets can be either outgoing (connect) or incoming
  * (accept).
  *
@@ -2138,6 +2246,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
                              struct mptcp_subflow_context *subflow)
 {
        struct mptcp_sock *msk = mptcp_sk(sk);
+       bool need_push;
 
        list_del(&subflow->node);
 
@@ -2149,6 +2258,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
        if (ssk->sk_socket)
                sock_orphan(ssk);
 
+       need_push = __mptcp_retransmit_pending_data(sk);
        subflow->disposable = 1;
 
        /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2176,6 +2286,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 
        if (msk->subflow && ssk == msk->subflow->sk)
                mptcp_dispose_initial_subflow(msk);
+
+       if (need_push)
+               __mptcp_push_pending(sk, 0);
 }
 
 void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2313,7 +2426,6 @@ static void __mptcp_retrans(struct sock *sk)
                         info.size_goal);
        }
 
-       mptcp_set_timeout(sk, ssk);
        release_sock(ssk);
 
 reset_timer:
@@ -2384,10 +2496,12 @@ static int __mptcp_init_sock(struct sock *sk)
        msk->wmem_reserved = 0;
        WRITE_ONCE(msk->rmem_released, 0);
        msk->tx_pending_data = 0;
+       msk->timer_ival = TCP_RTO_MIN;
 
        msk->first = NULL;
        inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
        WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
+       msk->recovery = false;
 
        mptcp_pm_data_init(msk);
 
@@ -2472,7 +2586,6 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
                        tcp_shutdown(ssk, how);
                } else {
                        pr_debug("Sending DATA_FIN on subflow %p", ssk);
-                       mptcp_set_timeout(sk, ssk);
                        tcp_send_ack(ssk);
                        if (!mptcp_timer_pending(sk))
                                mptcp_reset_timer(sk);
@@ -2723,7 +2836,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
        msk->token = subflow_req->token;
        msk->subflow = NULL;
        WRITE_ONCE(msk->fully_established, false);
-       if (mp_opt->csum_reqd)
+       if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
                WRITE_ONCE(msk->csum_enabled, true);
 
        msk->write_seq = subflow_req->idsn + 1;
@@ -2732,7 +2845,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
        msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
        msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
 
-       if (mp_opt->mp_capable) {
+       if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
                msk->can_ack = true;
                msk->remote_key = mp_opt->sndr_key;
                mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
index 0f0c026..d7aba1c 100644 (file)
 #define OPTION_MPTCP_FASTCLOSE BIT(8)
 #define OPTION_MPTCP_PRIO      BIT(9)
 #define OPTION_MPTCP_RST       BIT(10)
+#define OPTION_MPTCP_DSS       BIT(11)
+#define OPTION_MPTCP_FAIL      BIT(12)
+
+#define OPTION_MPTCP_CSUMREQD  BIT(13)
+
+#define OPTIONS_MPTCP_MPC      (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \
+                                OPTION_MPTCP_MPC_ACK)
+#define OPTIONS_MPTCP_MPJ      (OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \
+                                OPTION_MPTCP_MPJ_SYNACK)
 
 /* MPTCP option subtypes */
 #define MPTCPOPT_MP_CAPABLE    0
@@ -67,6 +76,7 @@
 #define TCPOLEN_MPTCP_PRIO_ALIGN       4
 #define TCPOLEN_MPTCP_FASTCLOSE                12
 #define TCPOLEN_MPTCP_RST              4
+#define TCPOLEN_MPTCP_FAIL             12
 
 #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM        (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
 
@@ -129,35 +139,28 @@ struct mptcp_options_received {
        u32     subflow_seq;
        u16     data_len;
        __sum16 csum;
-       u16     mp_capable : 1,
-               mp_join : 1,
-               fastclose : 1,
-               reset : 1,
-               dss : 1,
-               add_addr : 1,
-               rm_addr : 1,
-               mp_prio : 1,
-               echo : 1,
-               csum_reqd : 1,
-               backup : 1,
-               deny_join_id0 : 1;
+       u16     suboptions;
        u32     token;
        u32     nonce;
-       u64     thmac;
-       u8      hmac[MPTCPOPT_HMAC_LEN];
-       u8      join_id;
-       u8      use_map:1,
+       u16     use_map:1,
                dsn64:1,
                data_fin:1,
                use_ack:1,
                ack64:1,
                mpc_map:1,
+               reset_reason:4,
+               reset_transient:1,
+               echo:1,
+               backup:1,
+               deny_join_id0:1,
                __unused:2;
+       u8      join_id;
+       u64     thmac;
+       u8      hmac[MPTCPOPT_HMAC_LEN];
        struct mptcp_addr_info addr;
        struct mptcp_rm_list rm_list;
        u64     ahmac;
-       u8      reset_reason:4;
-       u8      reset_transient:1;
+       u64     fail_seq;
 };
 
 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -178,8 +181,6 @@ enum mptcp_pm_status {
 enum mptcp_addr_signal_status {
        MPTCP_ADD_ADDR_SIGNAL,
        MPTCP_ADD_ADDR_ECHO,
-       MPTCP_ADD_ADDR_IPV6,
-       MPTCP_ADD_ADDR_PORT,
        MPTCP_RM_ADDR_SIGNAL,
 };
 
@@ -230,12 +231,17 @@ struct mptcp_sock {
        struct sock     *last_snd;
        int             snd_burst;
        int             old_wspace;
+       u64             recovery_snd_nxt;       /* in recovery mode accept up to this seq;
+                                                * recovery related fields are under data_lock
+                                                * protection
+                                                */
        u64             snd_una;
        u64             wnd_end;
        unsigned long   timer_ival;
        u32             token;
        int             rmem_released;
        unsigned long   flags;
+       bool            recovery;               /* closing subflow write queue reinjected */
        bool            can_ack;
        bool            fully_established;
        bool            rcv_data_fin;
@@ -425,9 +431,11 @@ struct mptcp_subflow_context {
                mpc_map : 1,
                backup : 1,
                send_mp_prio : 1,
+               send_mp_fail : 1,
                rx_eof : 1,
                can_ack : 1,        /* only after processing the remote a key */
-               disposable : 1;     /* ctx can be free at ulp release time */
+               disposable : 1,     /* ctx can be free at ulp release time */
+               stale : 1;          /* unable to snd/rcv data, do not use for xmit */
        enum mptcp_data_avail data_avail;
        u32     remote_nonce;
        u64     thmac;
@@ -439,11 +447,13 @@ struct mptcp_subflow_context {
        u8      reset_seen:1;
        u8      reset_transient:1;
        u8      reset_reason:4;
+       u8      stale_count;
 
        long    delegated_status;
        struct  list_head delegated_node;   /* link into delegated_action, protected by local BH */
 
-       u32 setsockopt_seq;
+       u32     setsockopt_seq;
+       u32     stale_rcv_tstamp;
 
        struct  sock *tcp_sock;     /* tcp sk backpointer */
        struct  sock *conn;         /* parent mptcp_sock */
@@ -549,12 +559,15 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
        clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
 }
 
-int mptcp_is_enabled(struct net *net);
-unsigned int mptcp_get_add_addr_timeout(struct net *net);
-int mptcp_is_checksum_enabled(struct net *net);
-int mptcp_allow_join_id0(struct net *net);
+int mptcp_is_enabled(const struct net *net);
+unsigned int mptcp_get_add_addr_timeout(const struct net *net);
+int mptcp_is_checksum_enabled(const struct net *net);
+int mptcp_allow_join_id0(const struct net *net);
+unsigned int mptcp_stale_loss_cnt(const struct net *net);
 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
                                     struct mptcp_options_received *mp_opt);
+bool __mptcp_retransmit_pending_data(struct sock *sk);
+void __mptcp_push_pending(struct sock *sk, unsigned int flags);
 bool mptcp_subflow_data_available(struct sock *sk);
 void __init mptcp_subflow_init(void);
 void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
@@ -566,14 +579,13 @@ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
 
 /* called with sk socket lock held */
 int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
-                           const struct mptcp_addr_info *remote,
-                           u8 flags, int ifindex);
+                           const struct mptcp_addr_info *remote);
 int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
 void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
                         struct sockaddr_storage *addr,
                         unsigned short family);
 
-static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
+static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
 {
        struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 
@@ -585,6 +597,10 @@ static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
        return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
 }
 
+void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
+
+bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
+
 static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
                                              struct mptcp_subflow_context *ctx)
 {
@@ -596,6 +612,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
        inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
 }
 
+static inline bool mptcp_has_another_subflow(struct sock *ssk)
+{
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
+       struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+       mptcp_for_each_subflow(msk, tmp) {
+               if (tmp != subflow)
+                       return true;
+       }
+
+       return false;
+}
+
 void __init mptcp_proto_init(void);
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 int __init mptcp_proto_v6_init(void);
@@ -690,6 +719,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
 
 void __init mptcp_pm_init(void);
 void mptcp_pm_data_init(struct mptcp_sock *msk);
+void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
+void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
 void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
 void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
 bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
@@ -708,6 +739,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
 int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
                                 struct mptcp_addr_info *addr,
                                 u8 bkup);
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
 void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
 bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
 struct mptcp_pm_add_entry *
@@ -716,6 +748,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
 struct mptcp_pm_add_entry *
 mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
                                struct mptcp_addr_info *addr);
+int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+                                        u8 *flags, int *ifindex);
 
 int mptcp_pm_announce_addr(struct mptcp_sock *msk,
                           const struct mptcp_addr_info *addr,
@@ -730,22 +764,18 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
 
 static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
 {
-       return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
+       return READ_ONCE(msk->pm.addr_signal) &
+               (BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO));
 }
 
-static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk)
-{
-       return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO);
-}
-
-static inline bool mptcp_pm_should_add_signal_ipv6(struct mptcp_sock *msk)
+static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk)
 {
-       return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_IPV6);
+       return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
 }
 
-static inline bool mptcp_pm_should_add_signal_port(struct mptcp_sock *msk)
+static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk)
 {
-       return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_PORT);
+       return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO);
 }
 
 static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
@@ -776,8 +806,10 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
        return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
 }
 
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
-                             struct mptcp_addr_info *saddr, bool *echo, bool *port);
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
+                             unsigned int opt_size, unsigned int remaining,
+                             struct mptcp_addr_info *addr, bool *echo,
+                             bool *port, bool *drop_other_suboptions);
 bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
                             struct mptcp_rm_list *rm_list);
 int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
index 966f777..1de7ce8 100644 (file)
@@ -141,6 +141,7 @@ static int subflow_check_req(struct request_sock *req,
        struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
        struct mptcp_options_received mp_opt;
+       bool opt_mp_capable, opt_mp_join;
 
        pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
 
@@ -154,16 +155,18 @@ static int subflow_check_req(struct request_sock *req,
 
        mptcp_get_options(sk_listener, skb, &mp_opt);
 
-       if (mp_opt.mp_capable) {
+       opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
+       opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+       if (opt_mp_capable) {
                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
 
-               if (mp_opt.mp_join)
+               if (opt_mp_join)
                        return 0;
-       } else if (mp_opt.mp_join) {
+       } else if (opt_mp_join) {
                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
        }
 
-       if (mp_opt.mp_capable && listener->request_mptcp) {
+       if (opt_mp_capable && listener->request_mptcp) {
                int err, retries = MPTCP_TOKEN_MAX_RETRIES;
 
                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
@@ -194,7 +197,7 @@ again:
                else
                        SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
 
-       } else if (mp_opt.mp_join && listener->request_mptcp) {
+       } else if (opt_mp_join && listener->request_mptcp) {
                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
                subflow_req->mp_join = 1;
                subflow_req->backup = mp_opt.backup;
@@ -243,15 +246,18 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
        struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
        struct mptcp_options_received mp_opt;
+       bool opt_mp_capable, opt_mp_join;
        int err;
 
        subflow_init_req(req, sk_listener);
        mptcp_get_options(sk_listener, skb, &mp_opt);
 
-       if (mp_opt.mp_capable && mp_opt.mp_join)
+       opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
+       opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+       if (opt_mp_capable && opt_mp_join)
                return -EINVAL;
 
-       if (mp_opt.mp_capable && listener->request_mptcp) {
+       if (opt_mp_capable && listener->request_mptcp) {
                if (mp_opt.sndr_key == 0)
                        return -EINVAL;
 
@@ -262,7 +268,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
 
                subflow_req->mp_capable = 1;
                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
-       } else if (mp_opt.mp_join && listener->request_mptcp) {
+       } else if (opt_mp_join && listener->request_mptcp) {
                if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
                        return -EINVAL;
 
@@ -394,7 +400,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 
        subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
 
-
        /* be sure no special action on any packet other than syn-ack */
        if (subflow->conn_finished)
                return;
@@ -407,7 +412,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 
        mptcp_get_options(sk, skb, &mp_opt);
        if (subflow->request_mptcp) {
-               if (!mp_opt.mp_capable) {
+               if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
                        MPTCP_INC_STATS(sock_net(sk),
                                        MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
                        mptcp_do_fallback(sk);
@@ -415,7 +420,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
                        goto fallback;
                }
 
-               if (mp_opt.csum_reqd)
+               if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD)
                        WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
                if (mp_opt.deny_join_id0)
                        WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
@@ -430,15 +435,17 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
        } else if (subflow->request_join) {
                u8 hmac[SHA256_DIGEST_SIZE];
 
-               if (!mp_opt.mp_join) {
+               if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ)) {
                        subflow->reset_reason = MPTCP_RST_EMPTCP;
                        goto do_reset;
                }
 
+               subflow->backup = mp_opt.backup;
                subflow->thmac = mp_opt.thmac;
                subflow->remote_nonce = mp_opt.nonce;
-               pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
-                        subflow->thmac, subflow->remote_nonce);
+               pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
+                        subflow, subflow->thmac, subflow->remote_nonce,
+                        subflow->backup);
 
                if (!subflow_thmac_valid(subflow)) {
                        MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
@@ -634,10 +641,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 
        pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
 
-       /* After child creation we must look for 'mp_capable' even when options
+       /* After child creation we must look for MPC even when options
         * are not parsed
         */
-       mp_opt.mp_capable = 0;
+       mp_opt.suboptions = 0;
 
        /* hopefully temporary handling for MP_JOIN+syncookie */
        subflow_req = mptcp_subflow_rsk(req);
@@ -657,7 +664,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
                 * options.
                 */
                mptcp_get_options(sk, skb, &mp_opt);
-               if (!mp_opt.mp_capable) {
+               if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
                        fallback = true;
                        goto create_child;
                }
@@ -667,7 +674,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
                        fallback = true;
        } else if (subflow_req->mp_join) {
                mptcp_get_options(sk, skb, &mp_opt);
-               if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
+               if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
+                   !subflow_hmac_valid(req, &mp_opt) ||
                    !mptcp_can_accept_new_subflow(subflow_req->msk)) {
                        SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
                        fallback = true;
@@ -724,7 +732,7 @@ create_child:
                        /* with OoO packets we can reach here without ingress
                         * mpc option
                         */
-                       if (mp_opt.mp_capable)
+                       if (mp_opt.suboptions & OPTIONS_MPTCP_MPC)
                                mptcp_subflow_fully_established(ctx, &mp_opt);
                } else if (ctx->mp_join) {
                        struct mptcp_sock *owner;
@@ -908,6 +916,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
        csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
        if (unlikely(csum_fold(csum))) {
                MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+               subflow->send_mp_fail = 1;
+               MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
                return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
        }
 
@@ -1155,6 +1165,20 @@ no_data:
 
 fallback:
        /* RFC 8684 section 3.7. */
+       if (subflow->send_mp_fail) {
+               if (mptcp_has_another_subflow(ssk)) {
+                       while ((skb = skb_peek(&ssk->sk_receive_queue)))
+                               sk_eat_skb(ssk, skb);
+               }
+               ssk->sk_err = EBADMSG;
+               tcp_set_state(ssk, TCP_CLOSE);
+               subflow->reset_transient = 0;
+               subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+               tcp_send_active_reset(ssk, GFP_ATOMIC);
+               WRITE_ONCE(subflow->data_avail, 0);
+               return true;
+       }
+
        if (subflow->mp_join || subflow->fully_established) {
                /* fatal protocol error, close the socket.
                 * subflow_error_report() will introduce the appropriate barriers
@@ -1353,8 +1377,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
 }
 
 int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
-                           const struct mptcp_addr_info *remote,
-                           u8 flags, int ifindex)
+                           const struct mptcp_addr_info *remote)
 {
        struct mptcp_sock *msk = mptcp_sk(sk);
        struct mptcp_subflow_context *subflow;
@@ -1365,6 +1388,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
        struct sock *ssk;
        u32 remote_token;
        int addrlen;
+       int ifindex;
+       u8 flags;
        int err;
 
        if (!mptcp_is_fully_established(sk))
@@ -1388,6 +1413,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
                local_id = err;
        }
 
+       mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id,
+                                            &flags, &ifindex);
        subflow->remote_key = msk->remote_key;
        subflow->local_key = msk->local_key;
        subflow->token = msk->token;
index 049890e..aab20e5 100644 (file)
@@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/
 
 # IPVS
 obj-$(CONFIG_IP_VS) += ipvs/
+
+# lwtunnel
+obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o
index 296e4a1..41768ff 100644 (file)
@@ -130,58 +130,77 @@ static void ecache_work(struct work_struct *work)
                schedule_delayed_work(&cnet->ecache_dwork, delay);
 }
 
-int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
-                                 u32 portid, int report)
+static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
+                                          const unsigned int events,
+                                          const unsigned long missed,
+                                          const struct nf_ct_event *item)
 {
-       int ret = 0;
-       struct net *net = nf_ct_net(ct);
+       struct nf_conn *ct = item->ct;
+       struct net *net = nf_ct_net(item->ct);
        struct nf_ct_event_notifier *notify;
-       struct nf_conntrack_ecache *e;
+       int ret;
+
+       if (!((events | missed) & e->ctmask))
+               return 0;
 
        rcu_read_lock();
+
        notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
-       if (!notify)
-               goto out_unlock;
+       if (!notify) {
+               rcu_read_unlock();
+               return 0;
+       }
+
+       ret = notify->ct_event(events | missed, item);
+       rcu_read_unlock();
+
+       if (likely(ret >= 0 && missed == 0))
+               return 0;
+
+       spin_lock_bh(&ct->lock);
+       if (ret < 0)
+               e->missed |= events;
+       else
+               e->missed &= ~missed;
+       spin_unlock_bh(&ct->lock);
+
+       return ret;
+}
+
+int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
+                                 u32 portid, int report)
+{
+       struct nf_conntrack_ecache *e;
+       struct nf_ct_event item;
+       unsigned long missed;
+       int ret;
+
+       if (!nf_ct_is_confirmed(ct))
+               return 0;
 
        e = nf_ct_ecache_find(ct);
        if (!e)
-               goto out_unlock;
+               return 0;
 
-       if (nf_ct_is_confirmed(ct)) {
-               struct nf_ct_event item = {
-                       .ct     = ct,
-                       .portid = e->portid ? e->portid : portid,
-                       .report = report
-               };
-               /* This is a resent of a destroy event? If so, skip missed */
-               unsigned long missed = e->portid ? 0 : e->missed;
-
-               if (!((eventmask | missed) & e->ctmask))
-                       goto out_unlock;
-
-               ret = notify->fcn(eventmask | missed, &item);
-               if (unlikely(ret < 0 || missed)) {
-                       spin_lock_bh(&ct->lock);
-                       if (ret < 0) {
-                               /* This is a destroy event that has been
-                                * triggered by a process, we store the PORTID
-                                * to include it in the retransmission.
-                                */
-                               if (eventmask & (1 << IPCT_DESTROY)) {
-                                       if (e->portid == 0 && portid != 0)
-                                               e->portid = portid;
-                                       e->state = NFCT_ECACHE_DESTROY_FAIL;
-                               } else {
-                                       e->missed |= eventmask;
-                               }
-                       } else {
-                               e->missed &= ~missed;
-                       }
-                       spin_unlock_bh(&ct->lock);
-               }
+       memset(&item, 0, sizeof(item));
+
+       item.ct = ct;
+       item.portid = e->portid ? e->portid : portid;
+       item.report = report;
+
+       /* This is a resent of a destroy event? If so, skip missed */
+       missed = e->portid ? 0 : e->missed;
+
+       ret = __nf_conntrack_eventmask_report(e, events, missed, &item);
+       if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) {
+               /* This is a destroy event that has been triggered by a process,
+                * we store the PORTID to include it in the retransmission.
+                */
+               if (e->portid == 0 && portid != 0)
+                       e->portid = portid;
+               e->state = NFCT_ECACHE_DESTROY_FAIL;
        }
-out_unlock:
-       rcu_read_unlock();
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
@@ -190,53 +209,28 @@ EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
  * disabled softirqs */
 void nf_ct_deliver_cached_events(struct nf_conn *ct)
 {
-       struct net *net = nf_ct_net(ct);
-       unsigned long events, missed;
-       struct nf_ct_event_notifier *notify;
        struct nf_conntrack_ecache *e;
        struct nf_ct_event item;
-       int ret;
-
-       rcu_read_lock();
-       notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
-       if (notify == NULL)
-               goto out_unlock;
+       unsigned long events;
 
        if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
-               goto out_unlock;
+               return;
 
        e = nf_ct_ecache_find(ct);
        if (e == NULL)
-               goto out_unlock;
+               return;
 
        events = xchg(&e->cache, 0);
 
-       /* We make a copy of the missed event cache without taking
-        * the lock, thus we may send missed events twice. However,
-        * this does not harm and it happens very rarely. */
-       missed = e->missed;
-
-       if (!((events | missed) & e->ctmask))
-               goto out_unlock;
-
        item.ct = ct;
        item.portid = 0;
        item.report = 0;
 
-       ret = notify->fcn(events | missed, &item);
-
-       if (likely(ret == 0 && !missed))
-               goto out_unlock;
-
-       spin_lock_bh(&ct->lock);
-       if (ret < 0)
-               e->missed |= events;
-       else
-               e->missed &= ~missed;
-       spin_unlock_bh(&ct->lock);
-
-out_unlock:
-       rcu_read_unlock();
+       /* We make a copy of the missed event cache without taking
+        * the lock, thus we may send missed events twice. However,
+        * this does not harm and it happens very rarely.
+        */
+       __nf_conntrack_eventmask_report(e, events, e->missed, &item);
 }
 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 
@@ -246,11 +240,11 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
 
 {
        struct net *net = nf_ct_exp_net(exp);
-       struct nf_exp_event_notifier *notify;
+       struct nf_ct_event_notifier *notify;
        struct nf_conntrack_ecache *e;
 
        rcu_read_lock();
-       notify = rcu_dereference(net->ct.nf_expect_event_cb);
+       notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
        if (!notify)
                goto out_unlock;
 
@@ -264,86 +258,35 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
                        .portid = portid,
                        .report = report
                };
-               notify->fcn(1 << event, &item);
+               notify->exp_event(1 << event, &item);
        }
 out_unlock:
        rcu_read_unlock();
 }
 
-int nf_conntrack_register_notifier(struct net *net,
-                                  struct nf_ct_event_notifier *new)
+void nf_conntrack_register_notifier(struct net *net,
+                                   const struct nf_ct_event_notifier *new)
 {
-       int ret;
        struct nf_ct_event_notifier *notify;
 
        mutex_lock(&nf_ct_ecache_mutex);
        notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
                                           lockdep_is_held(&nf_ct_ecache_mutex));
-       if (notify != NULL) {
-               ret = -EBUSY;
-               goto out_unlock;
-       }
+       WARN_ON_ONCE(notify);
        rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
-       ret = 0;
-
-out_unlock:
        mutex_unlock(&nf_ct_ecache_mutex);
-       return ret;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
 
-void nf_conntrack_unregister_notifier(struct net *net,
-                                     struct nf_ct_event_notifier *new)
+void nf_conntrack_unregister_notifier(struct net *net)
 {
-       struct nf_ct_event_notifier *notify;
-
        mutex_lock(&nf_ct_ecache_mutex);
-       notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
-                                          lockdep_is_held(&nf_ct_ecache_mutex));
-       BUG_ON(notify != new);
        RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
        mutex_unlock(&nf_ct_ecache_mutex);
-       /* synchronize_rcu() is called from ctnetlink_exit. */
+       /* synchronize_rcu() is called after netns pre_exit */
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 
-int nf_ct_expect_register_notifier(struct net *net,
-                                  struct nf_exp_event_notifier *new)
-{
-       int ret;
-       struct nf_exp_event_notifier *notify;
-
-       mutex_lock(&nf_ct_ecache_mutex);
-       notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
-                                          lockdep_is_held(&nf_ct_ecache_mutex));
-       if (notify != NULL) {
-               ret = -EBUSY;
-               goto out_unlock;
-       }
-       rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
-       ret = 0;
-
-out_unlock:
-       mutex_unlock(&nf_ct_ecache_mutex);
-       return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
-
-void nf_ct_expect_unregister_notifier(struct net *net,
-                                     struct nf_exp_event_notifier *new)
-{
-       struct nf_exp_event_notifier *notify;
-
-       mutex_lock(&nf_ct_ecache_mutex);
-       notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
-                                          lockdep_is_held(&nf_ct_ecache_mutex));
-       BUG_ON(notify != new);
-       RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
-       mutex_unlock(&nf_ct_ecache_mutex);
-       /* synchronize_rcu() is called from ctnetlink_exit. */
-}
-EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
-
 void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
 {
        struct nf_conntrack_net *cnet = nf_ct_pernet(net);
index e81af33..5f9fc6b 100644 (file)
@@ -706,7 +706,7 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
 }
 
 static int
-ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
+ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
 {
        const struct nf_conntrack_zone *zone;
        struct net *net;
@@ -852,6 +852,11 @@ static int ctnetlink_done(struct netlink_callback *cb)
        return 0;
 }
 
+struct ctnetlink_filter_u32 {
+       u32 val;
+       u32 mask;
+};
+
 struct ctnetlink_filter {
        u8 family;
 
@@ -862,10 +867,8 @@ struct ctnetlink_filter {
        struct nf_conntrack_tuple reply;
        struct nf_conntrack_zone zone;
 
-       struct {
-               u_int32_t val;
-               u_int32_t mask;
-       } mark;
+       struct ctnetlink_filter_u32 mark;
+       struct ctnetlink_filter_u32 status;
 };
 
 static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = {
@@ -907,6 +910,46 @@ static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
                                         struct nf_conntrack_zone *zone,
                                         u_int32_t flags);
 
+static int ctnetlink_filter_parse_mark(struct ctnetlink_filter_u32 *mark,
+                                      const struct nlattr * const cda[])
+{
+#ifdef CONFIG_NF_CONNTRACK_MARK
+       if (cda[CTA_MARK]) {
+               mark->val = ntohl(nla_get_be32(cda[CTA_MARK]));
+
+               if (cda[CTA_MARK_MASK])
+                       mark->mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+               else
+                       mark->mask = 0xffffffff;
+       } else if (cda[CTA_MARK_MASK]) {
+               return -EINVAL;
+       }
+#endif
+       return 0;
+}
+
+static int ctnetlink_filter_parse_status(struct ctnetlink_filter_u32 *status,
+                                        const struct nlattr * const cda[])
+{
+       if (cda[CTA_STATUS]) {
+               status->val = ntohl(nla_get_be32(cda[CTA_STATUS]));
+               if (cda[CTA_STATUS_MASK])
+                       status->mask = ntohl(nla_get_be32(cda[CTA_STATUS_MASK]));
+               else
+                       status->mask = status->val;
+
+               /* status->val == 0? always true, else always false. */
+               if (status->mask == 0)
+                       return -EINVAL;
+       } else if (cda[CTA_STATUS_MASK]) {
+               return -EINVAL;
+       }
+
+       /* CTA_STATUS is NLA_U32, if this fires UAPI needs to be extended */
+       BUILD_BUG_ON(__IPS_MAX_BIT >= 32);
+       return 0;
+}
+
 static struct ctnetlink_filter *
 ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
 {
@@ -924,18 +967,14 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
 
        filter->family = family;
 
-#ifdef CONFIG_NF_CONNTRACK_MARK
-       if (cda[CTA_MARK]) {
-               filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
-               if (cda[CTA_MARK_MASK])
-                       filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
-               else
-                       filter->mark.mask = 0xffffffff;
-       } else if (cda[CTA_MARK_MASK]) {
-               err = -EINVAL;
+       err = ctnetlink_filter_parse_mark(&filter->mark, cda);
+       if (err)
                goto err_filter;
-       }
-#endif
+
+       err = ctnetlink_filter_parse_status(&filter->status, cda);
+       if (err)
+               goto err_filter;
+
        if (!cda[CTA_FILTER])
                return filter;
 
@@ -989,7 +1028,7 @@ err_filter:
 
 static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda)
 {
-       return family || cda[CTA_MARK] || cda[CTA_FILTER];
+       return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS];
 }
 
 static int ctnetlink_start(struct netlink_callback *cb)
@@ -1082,6 +1121,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
 {
        struct ctnetlink_filter *filter = data;
        struct nf_conntrack_tuple *tuple;
+       u32 status;
 
        if (filter == NULL)
                goto out;
@@ -1113,6 +1153,9 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
        if ((ct->mark & filter->mark.mask) != filter->mark.val)
                goto ignore_entry;
 #endif
+       status = (u32)READ_ONCE(ct->status);
+       if ((status & filter->status.mask) != filter->status.val)
+               goto ignore_entry;
 
 out:
        return 1;
@@ -1495,6 +1538,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
        [CTA_LABELS_MASK]       = { .type = NLA_BINARY,
                                    .len = NF_CT_LABELS_MAX_SIZE },
        [CTA_FILTER]            = { .type = NLA_NESTED },
+       [CTA_STATUS_MASK]       = { .type = NLA_U32 },
 };
 
 static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
@@ -2625,6 +2669,8 @@ ctnetlink_glue_build_size(const struct nf_conn *ct)
               + nla_total_size(0) /* CTA_HELP */
               + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
               + ctnetlink_secctx_size(ct)
+              + ctnetlink_acct_size(ct)
+              + ctnetlink_timestamp_size(ct)
 #if IS_ENABLED(CONFIG_NF_NAT)
               + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
               + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
@@ -2682,6 +2728,10 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
        if (ctnetlink_dump_protoinfo(skb, ct, false) < 0)
                goto nla_put_failure;
 
+       if (ctnetlink_dump_acct(skb, ct, IPCTNL_MSG_CT_GET) < 0 ||
+           ctnetlink_dump_timestamp(skb, ct) < 0)
+               goto nla_put_failure;
+
        if (ctnetlink_dump_helpinfo(skb, ct) < 0)
                goto nla_put_failure;
 
@@ -3060,7 +3110,7 @@ nla_put_failure:
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 static int
-ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
+ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item)
 {
        struct nf_conntrack_expect *exp = item->exp;
        struct net *net = nf_ct_exp_net(exp);
@@ -3711,11 +3761,8 @@ static int ctnetlink_stat_exp_cpu(struct sk_buff *skb,
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 static struct nf_ct_event_notifier ctnl_notifier = {
-       .fcn = ctnetlink_conntrack_event,
-};
-
-static struct nf_exp_event_notifier ctnl_notifier_exp = {
-       .fcn = ctnetlink_expect_event,
+       .ct_event = ctnetlink_conntrack_event,
+       .exp_event = ctnetlink_expect_event,
 };
 #endif
 
@@ -3808,52 +3855,21 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
 static int __net_init ctnetlink_net_init(struct net *net)
 {
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-       int ret;
-
-       ret = nf_conntrack_register_notifier(net, &ctnl_notifier);
-       if (ret < 0) {
-               pr_err("ctnetlink_init: cannot register notifier.\n");
-               goto err_out;
-       }
-
-       ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp);
-       if (ret < 0) {
-               pr_err("ctnetlink_init: cannot expect register notifier.\n");
-               goto err_unreg_notifier;
-       }
+       nf_conntrack_register_notifier(net, &ctnl_notifier);
 #endif
        return 0;
-
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-err_unreg_notifier:
-       nf_conntrack_unregister_notifier(net, &ctnl_notifier);
-err_out:
-       return ret;
-#endif
 }
 
-static void ctnetlink_net_exit(struct net *net)
+static void ctnetlink_net_pre_exit(struct net *net)
 {
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-       nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp);
-       nf_conntrack_unregister_notifier(net, &ctnl_notifier);
+       nf_conntrack_unregister_notifier(net);
 #endif
 }
 
-static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
-{
-       struct net *net;
-
-       list_for_each_entry(net, net_exit_list, exit_list)
-               ctnetlink_net_exit(net);
-
-       /* wait for other cpus until they are done with ctnl_notifiers */
-       synchronize_rcu();
-}
-
 static struct pernet_operations ctnetlink_net_ops = {
        .init           = ctnetlink_net_init,
-       .exit_batch     = ctnetlink_net_exit_batch,
+       .pre_exit       = ctnetlink_net_pre_exit,
 };
 
 static int __init ctnetlink_init(void)
index e84b499..7e0d956 100644 (file)
@@ -22,6 +22,9 @@
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_timestamp.h>
+#ifdef CONFIG_LWTUNNEL
+#include <net/netfilter/nf_hooks_lwtunnel.h>
+#endif
 #include <linux/rculist_nulls.h>
 
 static bool enable_hooks __read_mostly;
@@ -612,6 +615,9 @@ enum nf_ct_sysctl_index {
        NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
        NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
 #endif
+#ifdef CONFIG_LWTUNNEL
+       NF_SYSCTL_CT_LWTUNNEL,
+#endif
 
        __NF_SYSCTL_CT_LAST_SYSCTL,
 };
@@ -958,6 +964,15 @@ static struct ctl_table nf_ct_sysctl_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec_jiffies,
        },
+#endif
+#ifdef CONFIG_LWTUNNEL
+       [NF_SYSCTL_CT_LWTUNNEL] = {
+               .procname       = "nf_hooks_lwtunnel",
+               .data           = NULL,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = nf_hooks_lwtunnel_sysctl_handler,
+       },
 #endif
        {}
 };
index 8788b51..87a7388 100644 (file)
@@ -99,7 +99,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
                flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
                break;
        case NFPROTO_IPV6:
-               flow_tuple->mtu = ip6_dst_mtu_forward(dst);
+               flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
                break;
        }
 
@@ -180,15 +180,10 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
 
 static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
 {
-       const struct nf_conntrack_l4proto *l4proto;
        struct net *net = nf_ct_net(ct);
        int l4num = nf_ct_protonum(ct);
        s32 timeout;
 
-       l4proto = nf_ct_l4proto_find(l4num);
-       if (!l4proto)
-               return;
-
        if (l4num == IPPROTO_TCP) {
                struct nf_tcp_net *tn = nf_tcp_pernet(net);
 
@@ -278,15 +273,10 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
 
 unsigned long flow_offload_get_timeout(struct flow_offload *flow)
 {
-       const struct nf_conntrack_l4proto *l4proto;
        unsigned long timeout = NF_FLOW_TIMEOUT;
        struct net *net = nf_ct_net(flow->ct);
        int l4num = nf_ct_protonum(flow->ct);
 
-       l4proto = nf_ct_l4proto_find(l4num);
-       if (!l4proto)
-               return timeout;
-
        if (l4num == IPPROTO_TCP) {
                struct nf_tcp_net *tn = nf_tcp_pernet(net);
 
index f92006c..d6bf1b2 100644 (file)
@@ -251,8 +251,7 @@ static int flow_offload_eth_src(struct net *net,
        flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
                            &val, &mask);
 
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
 
        return 0;
 }
@@ -1097,6 +1096,7 @@ static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
        bo->command     = cmd;
        bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
        bo->extack      = extack;
+       bo->cb_list_head = &flowtable->flow_block.cb_list;
        INIT_LIST_HEAD(&bo->cb_list);
 }
 
diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c
new file mode 100644 (file)
index 0000000..00e89ff
--- /dev/null
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/sysctl.h>
+#include <net/lwtunnel.h>
+#include <net/netfilter/nf_hooks_lwtunnel.h>
+
+static inline int nf_hooks_lwtunnel_get(void)
+{
+       if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+               return 1;
+       else
+               return 0;
+}
+
+static inline int nf_hooks_lwtunnel_set(int enable)
+{
+       if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) {
+               if (!enable)
+                       return -EBUSY;
+       } else if (enable) {
+               static_branch_enable(&nf_hooks_lwtunnel_enabled);
+       }
+
+       return 0;
+}
+
+#ifdef CONFIG_SYSCTL
+int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+                                    void *buffer, size_t *lenp, loff_t *ppos)
+{
+       int proc_nf_hooks_lwtunnel_enabled = 0;
+       struct ctl_table tmp = {
+               .procname = table->procname,
+               .data = &proc_nf_hooks_lwtunnel_enabled,
+               .maxlen = sizeof(int),
+               .mode = table->mode,
+               .extra1 = SYSCTL_ZERO,
+               .extra2 = SYSCTL_ONE,
+       };
+       int ret;
+
+       if (!write)
+               proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get();
+
+       ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+       if (write && ret == 0)
+               ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler);
+#endif /* CONFIG_SYSCTL */
index bbd1209..6d12afa 100644 (file)
@@ -21,6 +21,8 @@
 
 #include "nf_internals.h"
 
+static const struct nf_queue_handler __rcu *nf_queue_handler;
+
 /*
  * Hook for nfnetlink_queue to register its queue handler.
  * We do this so that most of the NFQUEUE code can be modular.
  * receives, no matter what.
  */
 
-/* return EBUSY when somebody else is registered, return EEXIST if the
- * same handler is registered, return 0 in case of success. */
-void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh)
+void nf_register_queue_handler(const struct nf_queue_handler *qh)
 {
        /* should never happen, we only have one queueing backend in kernel */
-       WARN_ON(rcu_access_pointer(net->nf.queue_handler));
-       rcu_assign_pointer(net->nf.queue_handler, qh);
+       WARN_ON(rcu_access_pointer(nf_queue_handler));
+       rcu_assign_pointer(nf_queue_handler, qh);
 }
 EXPORT_SYMBOL(nf_register_queue_handler);
 
 /* The caller must flush their queue before this */
-void nf_unregister_queue_handler(struct net *net)
+void nf_unregister_queue_handler(void)
 {
-       RCU_INIT_POINTER(net->nf.queue_handler, NULL);
+       RCU_INIT_POINTER(nf_queue_handler, NULL);
 }
 EXPORT_SYMBOL(nf_unregister_queue_handler);
 
@@ -51,18 +51,14 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
        struct nf_hook_state *state = &entry->state;
 
        /* Release those devices we held, or Alexey will kill me. */
-       if (state->in)
-               dev_put(state->in);
-       if (state->out)
-               dev_put(state->out);
+       dev_put(state->in);
+       dev_put(state->out);
        if (state->sk)
                sock_put(state->sk);
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-       if (entry->physin)
-               dev_put(entry->physin);
-       if (entry->physout)
-               dev_put(entry->physout);
+       dev_put(entry->physin);
+       dev_put(entry->physout);
 #endif
 }
 
@@ -95,18 +91,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 {
        struct nf_hook_state *state = &entry->state;
 
-       if (state->in)
-               dev_hold(state->in);
-       if (state->out)
-               dev_hold(state->out);
+       dev_hold(state->in);
+       dev_hold(state->out);
        if (state->sk)
                sock_hold(state->sk);
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-       if (entry->physin)
-               dev_hold(entry->physin);
-       if (entry->physout)
-               dev_hold(entry->physout);
+       dev_hold(entry->physin);
+       dev_hold(entry->physout);
 #endif
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -116,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net)
        const struct nf_queue_handler *qh;
 
        rcu_read_lock();
-       qh = rcu_dereference(net->nf.queue_handler);
+       qh = rcu_dereference(nf_queue_handler);
        if (qh)
                qh->nf_hook_drop(net);
        rcu_read_unlock();
@@ -157,12 +149,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 {
        struct nf_queue_entry *entry = NULL;
        const struct nf_queue_handler *qh;
-       struct net *net = state->net;
        unsigned int route_key_size;
        int status;
 
        /* QUEUE == DROP if no one is waiting, to be safe. */
-       qh = rcu_dereference(net->nf.queue_handler);
+       qh = rcu_dereference(nf_queue_handler);
        if (!qh)
                return -ESRCH;
 
index b58d73a..9656c16 100644 (file)
@@ -353,6 +353,7 @@ static void nft_flow_block_offload_init(struct flow_block_offload *bo,
        bo->command     = cmd;
        bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
        bo->extack      = extack;
+       bo->cb_list_head = &basechain->flow_block.cb_list;
        INIT_LIST_HEAD(&bo->cb_list);
 }
 
index f774de0..4c3fbaa 100644 (file)
@@ -951,6 +951,16 @@ static void nfqnl_nf_hook_drop(struct net *net)
        struct nfnl_queue_net *q = nfnl_queue_pernet(net);
        int i;
 
+       /* This function is also called on net namespace error unwind,
+        * when pernet_ops->init() failed and ->exit() functions of the
+        * previous pernet_ops gets called.
+        *
+        * This may result in a call to nfqnl_nf_hook_drop() before
+        * struct nfnl_queue_net was allocated.
+        */
+       if (!q)
+               return;
+
        for (i = 0; i < INSTANCE_BUCKETS; i++) {
                struct nfqnl_instance *inst;
                struct hlist_head *head = &q->instance_table[i];
@@ -1502,7 +1512,6 @@ static int __net_init nfnl_queue_net_init(struct net *net)
                        &nfqnl_seq_ops, sizeof(struct iter_state)))
                return -ENOMEM;
 #endif
-       nf_register_queue_handler(net, &nfqh);
        return 0;
 }
 
@@ -1511,7 +1520,6 @@ static void __net_exit nfnl_queue_net_exit(struct net *net)
        struct nfnl_queue_net *q = nfnl_queue_pernet(net);
        unsigned int i;
 
-       nf_unregister_queue_handler(net);
 #ifdef CONFIG_PROC_FS
        remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
 #endif
@@ -1555,6 +1563,8 @@ static int __init nfnetlink_queue_init(void)
                goto cleanup_netlink_subsys;
        }
 
+       nf_register_queue_handler(&nfqh);
+
        return status;
 
 cleanup_netlink_subsys:
@@ -1568,6 +1578,7 @@ out:
 
 static void __exit nfnetlink_queue_fini(void)
 {
+       nf_unregister_queue_handler();
        unregister_netdevice_notifier(&nfqnl_dev_notifier);
        nfnetlink_subsys_unregister(&nfqnl_subsys);
        netlink_unregister_notifier(&nfqnl_rtnl_notifier);
index 639c337..272bcdb 100644 (file)
@@ -683,14 +683,12 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
                goto out_put;
        }
 
-       ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
-                             MSG_DONTWAIT);
-       if (ret > 0)
-               ret = 0;
+       ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
 out_put:
        rcu_read_lock();
        module_put(THIS_MODULE);
-       return ret == -EAGAIN ? -ENOBUFS : ret;
+
+       return ret;
 }
 
 static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
index 84e58ee..25524e3 100644 (file)
@@ -39,6 +39,20 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 #define XT_PCPU_BLOCK_SIZE 4096
 #define XT_MAX_TABLE_SIZE      (512 * 1024 * 1024)
 
+struct xt_template {
+       struct list_head list;
+
+       /* called when table is needed in the given netns */
+       int (*table_init)(struct net *net);
+
+       struct module *me;
+
+       /* A unique name... */
+       char name[XT_TABLE_MAXNAMELEN];
+};
+
+static struct list_head xt_templates[NFPROTO_NUMPROTO];
+
 struct xt_pernet {
        struct list_head tables[NFPROTO_NUMPROTO];
 };
@@ -1221,48 +1235,43 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
                                    const char *name)
 {
        struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
-       struct xt_table *t, *found = NULL;
+       struct module *owner = NULL;
+       struct xt_template *tmpl;
+       struct xt_table *t;
 
        mutex_lock(&xt[af].mutex);
        list_for_each_entry(t, &xt_net->tables[af], list)
                if (strcmp(t->name, name) == 0 && try_module_get(t->me))
                        return t;
 
-       if (net == &init_net)
-               goto out;
-
-       /* Table doesn't exist in this netns, re-try init */
-       xt_net = net_generic(&init_net, xt_pernet_id);
-       list_for_each_entry(t, &xt_net->tables[af], list) {
+       /* Table doesn't exist in this netns, check larval list */
+       list_for_each_entry(tmpl, &xt_templates[af], list) {
                int err;
 
-               if (strcmp(t->name, name))
+               if (strcmp(tmpl->name, name))
                        continue;
-               if (!try_module_get(t->me))
+               if (!try_module_get(tmpl->me))
                        goto out;
+
+               owner = tmpl->me;
+
                mutex_unlock(&xt[af].mutex);
-               err = t->table_init(net);
+               err = tmpl->table_init(net);
                if (err < 0) {
-                       module_put(t->me);
+                       module_put(owner);
                        return ERR_PTR(err);
                }
 
-               found = t;
-
                mutex_lock(&xt[af].mutex);
                break;
        }
 
-       if (!found)
-               goto out;
-
-       xt_net = net_generic(net, xt_pernet_id);
        /* and once again: */
        list_for_each_entry(t, &xt_net->tables[af], list)
                if (strcmp(t->name, name) == 0)
                        return t;
 
-       module_put(found->me);
+       module_put(owner);
  out:
        mutex_unlock(&xt[af].mutex);
        return ERR_PTR(-ENOENT);
@@ -1749,6 +1758,58 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
 }
 EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
 
+int xt_register_template(const struct xt_table *table,
+                        int (*table_init)(struct net *net))
+{
+       int ret = -EEXIST, af = table->af;
+       struct xt_template *t;
+
+       mutex_lock(&xt[af].mutex);
+
+       list_for_each_entry(t, &xt_templates[af], list) {
+               if (WARN_ON_ONCE(strcmp(table->name, t->name) == 0))
+                       goto out_unlock;
+       }
+
+       ret = -ENOMEM;
+       t = kzalloc(sizeof(*t), GFP_KERNEL);
+       if (!t)
+               goto out_unlock;
+
+       BUILD_BUG_ON(sizeof(t->name) != sizeof(table->name));
+
+       strscpy(t->name, table->name, sizeof(t->name));
+       t->table_init = table_init;
+       t->me = table->me;
+       list_add(&t->list, &xt_templates[af]);
+       ret = 0;
+out_unlock:
+       mutex_unlock(&xt[af].mutex);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(xt_register_template);
+
+void xt_unregister_template(const struct xt_table *table)
+{
+       struct xt_template *t;
+       int af = table->af;
+
+       mutex_lock(&xt[af].mutex);
+       list_for_each_entry(t, &xt_templates[af], list) {
+               if (strcmp(table->name, t->name))
+                       continue;
+
+               list_del(&t->list);
+               mutex_unlock(&xt[af].mutex);
+               kfree(t);
+               return;
+       }
+
+       mutex_unlock(&xt[af].mutex);
+       WARN_ON_ONCE(1);
+}
+EXPORT_SYMBOL_GPL(xt_unregister_template);
+
 int xt_proto_init(struct net *net, u_int8_t af)
 {
 #ifdef CONFIG_PROC_FS
@@ -1937,6 +1998,7 @@ static int __init xt_init(void)
 #endif
                INIT_LIST_HEAD(&xt[i].target);
                INIT_LIST_HEAD(&xt[i].match);
+               INIT_LIST_HEAD(&xt_templates[i]);
        }
        rv = register_pernet_subsys(&xt_net_ops);
        if (rv < 0)
index 12404d2..0a913ce 100644 (file)
@@ -351,21 +351,10 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
        return XT_CONTINUE;
 }
 
-static int notrack_chk(const struct xt_tgchk_param *par)
-{
-       if (!par->net->xt.notrack_deprecated_warning) {
-               pr_info("netfilter: NOTRACK target is deprecated, "
-                       "use CT instead or upgrade iptables\n");
-               par->net->xt.notrack_deprecated_warning = true;
-       }
-       return 0;
-}
-
 static struct xt_target notrack_tg_reg __read_mostly = {
        .name           = "NOTRACK",
        .revision       = 0,
        .family         = NFPROTO_UNSPEC,
-       .checkentry     = notrack_chk,
        .target         = notrack_tg,
        .table          = "raw",
        .me             = THIS_MODULE,
index 13cf3f9..849ac55 100644 (file)
@@ -90,7 +90,7 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
        const struct xt_bpf_info *info = par->matchinfo;
 
-       return BPF_PROG_RUN(info->filter, skb);
+       return bpf_prog_run(info->filter, skb);
 }
 
 static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
index baf2357..894e6b8 100644 (file)
@@ -144,8 +144,8 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
                return -ENOMEM;
        doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
        if (doi_def->map.std == NULL) {
-               ret_val = -ENOMEM;
-               goto add_std_failure;
+               kfree(doi_def);
+               return -ENOMEM;
        }
        doi_def->type = CIPSO_V4_MAP_TRANS;
 
@@ -187,14 +187,14 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
                }
        doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
                                              sizeof(u32),
-                                             GFP_KERNEL);
+                                             GFP_KERNEL | __GFP_NOWARN);
        if (doi_def->map.std->lvl.local == NULL) {
                ret_val = -ENOMEM;
                goto add_std_failure;
        }
        doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
                                              sizeof(u32),
-                                             GFP_KERNEL);
+                                             GFP_KERNEL | __GFP_NOWARN);
        if (doi_def->map.std->lvl.cipso == NULL) {
                ret_val = -ENOMEM;
                goto add_std_failure;
@@ -263,7 +263,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
                doi_def->map.std->cat.local = kcalloc(
                                              doi_def->map.std->cat.local_size,
                                              sizeof(u32),
-                                             GFP_KERNEL);
+                                             GFP_KERNEL | __GFP_NOWARN);
                if (doi_def->map.std->cat.local == NULL) {
                        ret_val = -ENOMEM;
                        goto add_std_failure;
@@ -271,7 +271,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
                doi_def->map.std->cat.cipso = kcalloc(
                                              doi_def->map.std->cat.cipso_size,
                                              sizeof(u32),
-                                             GFP_KERNEL);
+                                             GFP_KERNEL | __GFP_NOWARN);
                if (doi_def->map.std->cat.cipso == NULL) {
                        ret_val = -ENOMEM;
                        goto add_std_failure;
index 2483df0..566ba43 100644 (file)
@@ -492,8 +492,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
                netlbl_af4list_audit_addr(audit_buf, 1,
                                          (dev != NULL ? dev->name : NULL),
                                          addr->s_addr, mask->s_addr);
-               if (dev != NULL)
-                       dev_put(dev);
+               dev_put(dev);
                if (entry != NULL &&
                    security_secid_to_secctx(entry->secid,
                                             &secctx, &secctx_len) == 0) {
@@ -553,8 +552,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
                netlbl_af6list_audit_addr(audit_buf, 1,
                                          (dev != NULL ? dev->name : NULL),
                                          addr, mask);
-               if (dev != NULL)
-                       dev_put(dev);
+               dev_put(dev);
                if (entry != NULL &&
                    security_secid_to_secctx(entry->secid,
                                             &secctx, &secctx_len) == 0) {
index 380f95a..24b7cf4 100644 (file)
@@ -2545,13 +2545,15 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
                /* errors reported via destination sk->sk_err, but propagate
                 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
                err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
+               if (err == -ESRCH)
+                       err = 0;
        }
 
        if (report) {
                int err2;
 
                err2 = nlmsg_unicast(sk, skb, portid);
-               if (!err || err == -ESRCH)
+               if (!err)
                        err = err2;
        }
 
index 2d6fdf4..1afca2a 100644 (file)
@@ -40,14 +40,6 @@ void genl_unlock(void)
 }
 EXPORT_SYMBOL(genl_unlock);
 
-#ifdef CONFIG_LOCKDEP
-bool lockdep_genl_is_held(void)
-{
-       return lockdep_is_held(&genl_mutex);
-}
-EXPORT_SYMBOL(lockdep_genl_is_held);
-#endif
-
 static void genl_lock_all(void)
 {
        down_write(&cb_lock);
@@ -1485,6 +1477,7 @@ int genlmsg_multicast_allns(const struct genl_family *family,
 {
        if (WARN_ON_ONCE(group >= family->n_mcgrps))
                return -EINVAL;
+
        group = family->mcgrp_offset + group;
        return genlmsg_mcast(skb, portid, group, flags);
 }
@@ -1495,14 +1488,12 @@ void genl_notify(const struct genl_family *family, struct sk_buff *skb,
 {
        struct net *net = genl_info_net(info);
        struct sock *sk = net->genl_sock;
-       int report = 0;
-
-       if (info->nlhdr)
-               report = nlmsg_report(info->nlhdr);
 
        if (WARN_ON_ONCE(group >= family->n_mcgrps))
                return;
+
        group = family->mcgrp_offset + group;
-       nlmsg_notify(sk, skb, info->snd_portid, group, report, flags);
+       nlmsg_notify(sk, skb, info->snd_portid, group,
+                    nlmsg_report(info->nlhdr), flags);
 }
 EXPORT_SYMBOL(genl_notify);
index a880dd3..511819f 100644 (file)
@@ -59,8 +59,7 @@ static void nr_loopback_timer(struct timer_list *unused)
                if (dev == NULL || nr_rx_frame(skb, dev) == 0)
                        kfree_skb(skb);
 
-               if (dev != NULL)
-                       dev_put(dev);
+               dev_put(dev);
 
                if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running())
                        mod_timer(&loopback_timer, jiffies + 10);
index de04560..ddd5cbd 100644 (file)
@@ -582,8 +582,7 @@ struct net_device *nr_dev_first(void)
                        if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
                                first = dev;
        }
-       if (first)
-               dev_hold(first);
+       dev_hold(first);
        rcu_read_unlock();
 
        return first;
index 4a9e720..6024fad 100644 (file)
@@ -79,7 +79,7 @@ int __init af_nfc_init(void)
        return sock_register(&nfc_sock_family_ops);
 }
 
-void af_nfc_exit(void)
+void __exit af_nfc_exit(void)
 {
        sock_unregister(PF_NFC);
 }
index 573c80c..3c645c1 100644 (file)
@@ -636,7 +636,7 @@ error:
        return rc;
 }
 
-int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+int nfc_set_remote_general_bytes(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
 {
        pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len);
 
@@ -665,7 +665,7 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb)
 EXPORT_SYMBOL(nfc_tm_data_received);
 
 int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode,
-                    u8 *gb, size_t gb_len)
+                    const u8 *gb, size_t gb_len)
 {
        int rc;
 
@@ -824,7 +824,7 @@ EXPORT_SYMBOL(nfc_targets_found);
  */
 int nfc_target_lost(struct nfc_dev *dev, u32 target_idx)
 {
-       struct nfc_target *tg;
+       const struct nfc_target *tg;
        int i;
 
        pr_debug("dev_name %s n_target %d\n", dev_name(&dev->dev), target_idx);
@@ -1048,7 +1048,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx)
  * @tx_headroom: reserved space at beginning of skb
  * @tx_tailroom: reserved space at end of skb
  */
-struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
+struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
                                    u32 supported_protocols,
                                    int tx_headroom, int tx_tailroom)
 {
index 5044c7d..fefc036 100644 (file)
@@ -732,7 +732,7 @@ exit:
        return rc;
 }
 
-static struct nfc_ops digital_nfc_ops = {
+static const struct nfc_ops digital_nfc_ops = {
        .dev_up = digital_dev_up,
        .dev_down = digital_dev_down,
        .start_poll = digital_start_poll,
@@ -745,7 +745,7 @@ static struct nfc_ops digital_nfc_ops = {
        .im_transceive = digital_in_send,
 };
 
-struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
+struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops,
                                            __u32 supported_protocols,
                                            __u32 driver_capabilities,
                                            int tx_headroom, int tx_tailroom)
index 3481941..ceb87db 100644 (file)
@@ -128,7 +128,7 @@ static void nfc_hci_msg_rx_work(struct work_struct *work)
        struct nfc_hci_dev *hdev = container_of(work, struct nfc_hci_dev,
                                                msg_rx_work);
        struct sk_buff *skb;
-       struct hcp_message *message;
+       const struct hcp_message *message;
        u8 pipe;
        u8 type;
        u8 instruction;
@@ -182,9 +182,9 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
                          struct sk_buff *skb)
 {
        u8 status = NFC_HCI_ANY_OK;
-       struct hci_create_pipe_resp *create_info;
-       struct hci_delete_pipe_noti *delete_info;
-       struct hci_all_pipe_cleared_noti *cleared_info;
+       const struct hci_create_pipe_resp *create_info;
+       const struct hci_delete_pipe_noti *delete_info;
+       const struct hci_all_pipe_cleared_noti *cleared_info;
        u8 gate;
 
        pr_debug("from pipe %x cmd %x\n", pipe, cmd);
@@ -447,7 +447,7 @@ static void nfc_hci_cmd_timeout(struct timer_list *t)
 }
 
 static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count,
-                                struct nfc_hci_gate *gates)
+                                const struct nfc_hci_gate *gates)
 {
        int r;
        while (gate_count--) {
@@ -928,7 +928,7 @@ static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name)
        return hdev->ops->fw_download(hdev, firmware_name);
 }
 
-static struct nfc_ops hci_nfc_ops = {
+static const struct nfc_ops hci_nfc_ops = {
        .dev_up = hci_dev_up,
        .dev_down = hci_dev_down,
        .start_poll = hci_start_poll,
@@ -947,7 +947,7 @@ static struct nfc_ops hci_nfc_ops = {
        .se_io = hci_se_io,
 };
 
-struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
+struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops,
                                            struct nfc_hci_init_data *init_data,
                                            unsigned long quirks,
                                            u32 protocols,
index 6ab40ea..2140f67 100644 (file)
@@ -11,7 +11,7 @@
 
 static LIST_HEAD(llc_engines);
 
-int nfc_llc_init(void)
+int __init nfc_llc_init(void)
 {
        int r;
 
@@ -41,7 +41,7 @@ void nfc_llc_exit(void)
        }
 }
 
-int nfc_llc_register(const char *name, struct nfc_llc_ops *ops)
+int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops)
 {
        struct nfc_llc_engine *llc_engine;
 
index 823ddb6..d66271d 100644 (file)
@@ -26,20 +26,20 @@ struct nfc_llc_ops {
 
 struct nfc_llc_engine {
        const char *name;
-       struct nfc_llc_ops *ops;
+       const struct nfc_llc_ops *ops;
        struct list_head entry;
 };
 
 struct nfc_llc {
        void *data;
-       struct nfc_llc_ops *ops;
+       const struct nfc_llc_ops *ops;
        int rx_headroom;
        int rx_tailroom;
 };
 
 void *nfc_llc_get_data(struct nfc_llc *llc);
 
-int nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
+int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops);
 void nfc_llc_unregister(const char *name);
 
 int nfc_llc_nop_register(void);
index a42852f..a58716f 100644 (file)
@@ -71,7 +71,7 @@ static int llc_nop_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
        return llc_nop->xmit_to_drv(llc_nop->hdev, skb);
 }
 
-static struct nfc_llc_ops llc_nop_ops = {
+static const struct nfc_llc_ops llc_nop_ops = {
        .init = llc_nop_init,
        .deinit = llc_nop_deinit,
        .start = llc_nop_start,
index 1e3a900..aef750d 100644 (file)
@@ -123,7 +123,7 @@ static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z)
                return ((y >= x) || (y < z)) ? true : false;
 }
 
-static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc,
+static struct sk_buff *llc_shdlc_alloc_skb(const struct llc_shdlc *shdlc,
                                           int payload_len)
 {
        struct sk_buff *skb;
@@ -137,7 +137,7 @@ static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc,
 }
 
 /* immediately sends an S frame. */
-static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc,
+static int llc_shdlc_send_s_frame(const struct llc_shdlc *shdlc,
                                  enum sframe_type sframe_type, int nr)
 {
        int r;
@@ -159,7 +159,7 @@ static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc,
 }
 
 /* immediately sends an U frame. skb may contain optional payload */
-static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc,
+static int llc_shdlc_send_u_frame(const struct llc_shdlc *shdlc,
                                  struct sk_buff *skb,
                                  enum uframe_modifier uframe_modifier)
 {
@@ -361,7 +361,7 @@ static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r)
        wake_up(shdlc->connect_wq);
 }
 
-static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
+static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc)
 {
        struct sk_buff *skb;
 
@@ -377,7 +377,7 @@ static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
        return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET);
 }
 
-static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc)
+static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc)
 {
        struct sk_buff *skb;
 
@@ -820,7 +820,7 @@ static int llc_shdlc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
        return 0;
 }
 
-static struct nfc_llc_ops llc_shdlc_ops = {
+static const struct nfc_llc_ops llc_shdlc_ops = {
        .init = llc_shdlc_init,
        .deinit = llc_shdlc_deinit,
        .start = llc_shdlc_start,
index 97853c9..d49d4bf 100644 (file)
@@ -221,15 +221,15 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *sk, struct socket *newsock);
 
 /* TLV API */
 int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
-                         u8 *tlv_array, u16 tlv_array_len);
+                         const u8 *tlv_array, u16 tlv_array_len);
 int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
-                                 u8 *tlv_array, u16 tlv_array_len);
+                                 const u8 *tlv_array, u16 tlv_array_len);
 
 /* Commands API */
 void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
-u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length);
+u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length);
 struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap);
-struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri,
                                                  size_t uri_len);
 void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
 void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head);
index 475061c..3c4172a 100644 (file)
@@ -15,7 +15,7 @@
 #include "nfc.h"
 #include "llcp.h"
 
-static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
+static const u8 llcp_tlv_length[LLCP_TLV_MAX] = {
        0,
        1, /* VERSION */
        2, /* MIUX */
@@ -29,7 +29,7 @@ static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
 
 };
 
-static u8 llcp_tlv8(u8 *tlv, u8 type)
+static u8 llcp_tlv8(const u8 *tlv, u8 type)
 {
        if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
                return 0;
@@ -37,7 +37,7 @@ static u8 llcp_tlv8(u8 *tlv, u8 type)
        return tlv[2];
 }
 
-static u16 llcp_tlv16(u8 *tlv, u8 type)
+static u16 llcp_tlv16(const u8 *tlv, u8 type)
 {
        if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
                return 0;
@@ -46,37 +46,37 @@ static u16 llcp_tlv16(u8 *tlv, u8 type)
 }
 
 
-static u8 llcp_tlv_version(u8 *tlv)
+static u8 llcp_tlv_version(const u8 *tlv)
 {
        return llcp_tlv8(tlv, LLCP_TLV_VERSION);
 }
 
-static u16 llcp_tlv_miux(u8 *tlv)
+static u16 llcp_tlv_miux(const u8 *tlv)
 {
        return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7ff;
 }
 
-static u16 llcp_tlv_wks(u8 *tlv)
+static u16 llcp_tlv_wks(const u8 *tlv)
 {
        return llcp_tlv16(tlv, LLCP_TLV_WKS);
 }
 
-static u16 llcp_tlv_lto(u8 *tlv)
+static u16 llcp_tlv_lto(const u8 *tlv)
 {
        return llcp_tlv8(tlv, LLCP_TLV_LTO);
 }
 
-static u8 llcp_tlv_opt(u8 *tlv)
+static u8 llcp_tlv_opt(const u8 *tlv)
 {
        return llcp_tlv8(tlv, LLCP_TLV_OPT);
 }
 
-static u8 llcp_tlv_rw(u8 *tlv)
+static u8 llcp_tlv_rw(const u8 *tlv)
 {
        return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf;
 }
 
-u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length)
+u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length)
 {
        u8 *tlv, length;
 
@@ -130,7 +130,7 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap)
        return sdres;
 }
 
-struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri,
                                                  size_t uri_len)
 {
        struct nfc_llcp_sdp_tlv *sdreq;
@@ -190,9 +190,10 @@ void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head)
 }
 
 int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
-                         u8 *tlv_array, u16 tlv_array_len)
+                         const u8 *tlv_array, u16 tlv_array_len)
 {
-       u8 *tlv = tlv_array, type, length, offset = 0;
+       const u8 *tlv = tlv_array;
+       u8 type, length, offset = 0;
 
        pr_debug("TLV array length %d\n", tlv_array_len);
 
@@ -239,9 +240,10 @@ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
 }
 
 int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
-                                 u8 *tlv_array, u16 tlv_array_len)
+                                 const u8 *tlv_array, u16 tlv_array_len)
 {
-       u8 *tlv = tlv_array, type, length, offset = 0;
+       const u8 *tlv = tlv_array;
+       u8 type, length, offset = 0;
 
        pr_debug("TLV array length %d\n", tlv_array_len);
 
@@ -295,7 +297,7 @@ static struct sk_buff *llcp_add_header(struct sk_buff *pdu,
        return pdu;
 }
 
-static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv,
+static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, const u8 *tlv,
                                    u8 tlv_length)
 {
        /* XXX Add an skb length check */
@@ -389,9 +391,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
 {
        struct nfc_llcp_local *local;
        struct sk_buff *skb;
-       u8 *service_name_tlv = NULL, service_name_tlv_length;
-       u8 *miux_tlv = NULL, miux_tlv_length;
-       u8 *rw_tlv = NULL, rw_tlv_length, rw;
+       const u8 *service_name_tlv = NULL;
+       const u8 *miux_tlv = NULL;
+       const u8 *rw_tlv = NULL;
+       u8 service_name_tlv_length, miux_tlv_length,  rw_tlv_length, rw;
        int err;
        u16 size = 0;
        __be16 miux;
@@ -465,8 +468,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
 {
        struct nfc_llcp_local *local;
        struct sk_buff *skb;
-       u8 *miux_tlv = NULL, miux_tlv_length;
-       u8 *rw_tlv = NULL, rw_tlv_length, rw;
+       const u8 *miux_tlv = NULL;
+       const u8 *rw_tlv = NULL;
+       u8 miux_tlv_length, rw_tlv_length, rw;
        int err;
        u16 size = 0;
        __be16 miux;
index cc99751..eaeb2b1 100644 (file)
@@ -301,7 +301,7 @@ static char *wks[] = {
        "urn:nfc:sn:snep",
 };
 
-static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len)
+static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len)
 {
        int sap, num_wks;
 
@@ -325,7 +325,7 @@ static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len)
 
 static
 struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
-                                           u8 *sn, size_t sn_len)
+                                           const u8 *sn, size_t sn_len)
 {
        struct sock *sk;
        struct nfc_llcp_sock *llcp_sock, *tmp_sock;
@@ -522,7 +522,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
 {
        u8 *gb_cur, version, version_length;
        u8 lto_length, wks_length, miux_length;
-       u8 *version_tlv = NULL, *lto_tlv = NULL,
+       const u8 *version_tlv = NULL, *lto_tlv = NULL,
           *wks_tlv = NULL, *miux_tlv = NULL;
        __be16 wks = cpu_to_be16(local->local_wks);
        u8 gb_len = 0;
@@ -612,7 +612,7 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len)
        return local->gb;
 }
 
-int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
 {
        struct nfc_llcp_local *local;
 
@@ -639,27 +639,27 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
                                     local->remote_gb_len - 3);
 }
 
-static u8 nfc_llcp_dsap(struct sk_buff *pdu)
+static u8 nfc_llcp_dsap(const struct sk_buff *pdu)
 {
        return (pdu->data[0] & 0xfc) >> 2;
 }
 
-static u8 nfc_llcp_ptype(struct sk_buff *pdu)
+static u8 nfc_llcp_ptype(const struct sk_buff *pdu)
 {
        return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6);
 }
 
-static u8 nfc_llcp_ssap(struct sk_buff *pdu)
+static u8 nfc_llcp_ssap(const struct sk_buff *pdu)
 {
        return pdu->data[1] & 0x3f;
 }
 
-static u8 nfc_llcp_ns(struct sk_buff *pdu)
+static u8 nfc_llcp_ns(const struct sk_buff *pdu)
 {
        return pdu->data[2] >> 4;
 }
 
-static u8 nfc_llcp_nr(struct sk_buff *pdu)
+static u8 nfc_llcp_nr(const struct sk_buff *pdu)
 {
        return pdu->data[2] & 0xf;
 }
@@ -801,7 +801,7 @@ out:
 }
 
 static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local,
-                                                 u8 *sn, size_t sn_len)
+                                                 const u8 *sn, size_t sn_len)
 {
        struct nfc_llcp_sock *llcp_sock;
 
@@ -815,9 +815,10 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local,
        return llcp_sock;
 }
 
-static u8 *nfc_llcp_connect_sn(struct sk_buff *skb, size_t *sn_len)
+static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len)
 {
-       u8 *tlv = &skb->data[2], type, length;
+       u8 type, length;
+       const u8 *tlv = &skb->data[2];
        size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0;
 
        while (offset < tlv_array_len) {
@@ -875,7 +876,7 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,
 }
 
 static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
-                                 struct sk_buff *skb)
+                                 const struct sk_buff *skb)
 {
        struct sock *new_sk, *parent;
        struct nfc_llcp_sock *sock, *new_sock;
@@ -893,7 +894,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
                        goto fail;
                }
        } else {
-               u8 *sn;
+               const u8 *sn;
                size_t sn_len;
 
                sn = nfc_llcp_connect_sn(skb, &sn_len);
@@ -1112,7 +1113,7 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local,
 }
 
 static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
-                              struct sk_buff *skb)
+                              const struct sk_buff *skb)
 {
        struct nfc_llcp_sock *llcp_sock;
        struct sock *sk;
@@ -1155,7 +1156,8 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
        nfc_llcp_sock_put(llcp_sock);
 }
 
-static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb)
+static void nfc_llcp_recv_cc(struct nfc_llcp_local *local,
+                            const struct sk_buff *skb)
 {
        struct nfc_llcp_sock *llcp_sock;
        struct sock *sk;
@@ -1188,7 +1190,8 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb)
        nfc_llcp_sock_put(llcp_sock);
 }
 
-static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb)
+static void nfc_llcp_recv_dm(struct nfc_llcp_local *local,
+                            const struct sk_buff *skb)
 {
        struct nfc_llcp_sock *llcp_sock;
        struct sock *sk;
@@ -1226,12 +1229,13 @@ static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb)
 }
 
 static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
-                             struct sk_buff *skb)
+                             const struct sk_buff *skb)
 {
        struct nfc_llcp_sock *llcp_sock;
-       u8 dsap, ssap, *tlv, type, length, tid, sap;
+       u8 dsap, ssap, type, length, tid, sap;
+       const u8 *tlv;
        u16 tlv_len, offset;
-       char *service_name;
+       const char *service_name;
        size_t service_name_len;
        struct nfc_llcp_sdp_tlv *sdp;
        HLIST_HEAD(llc_sdres_list);
index da7fe9d..82ab39d 100644 (file)
@@ -53,9 +53,9 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
 }
 
 int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
-                                         struct dest_spec_params *params)
+                                         const struct dest_spec_params *params)
 {
-       struct nci_conn_info *conn_info;
+       const struct nci_conn_info *conn_info;
 
        list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
                if (conn_info->dest_type == dest_type) {
@@ -95,8 +95,8 @@ static void nci_req_cancel(struct nci_dev *ndev, int err)
 
 /* Execute request and wait for completion. */
 static int __nci_request(struct nci_dev *ndev,
-                        void (*req)(struct nci_dev *ndev, unsigned long opt),
-                        unsigned long opt, __u32 timeout)
+                        void (*req)(struct nci_dev *ndev, const void *opt),
+                        const void *opt, __u32 timeout)
 {
        int rc = 0;
        long completion_rc;
@@ -139,8 +139,8 @@ static int __nci_request(struct nci_dev *ndev,
 
 inline int nci_request(struct nci_dev *ndev,
                       void (*req)(struct nci_dev *ndev,
-                                  unsigned long opt),
-                      unsigned long opt, __u32 timeout)
+                                  const void *opt),
+                      const void *opt, __u32 timeout)
 {
        int rc;
 
@@ -155,7 +155,7 @@ inline int nci_request(struct nci_dev *ndev,
        return rc;
 }
 
-static void nci_reset_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_reset_req(struct nci_dev *ndev, const void *opt)
 {
        struct nci_core_reset_cmd cmd;
 
@@ -163,17 +163,17 @@ static void nci_reset_req(struct nci_dev *ndev, unsigned long opt)
        nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd);
 }
 
-static void nci_init_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_req(struct nci_dev *ndev, const void *opt)
 {
        u8 plen = 0;
 
        if (opt)
                plen = sizeof(struct nci_core_init_v2_cmd);
 
-       nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, (void *)opt);
+       nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt);
 }
 
-static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_complete_req(struct nci_dev *ndev, const void *opt)
 {
        struct nci_rf_disc_map_cmd cmd;
        struct disc_map_config *cfg = cmd.mapping_configs;
@@ -210,14 +210,14 @@ static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt)
 }
 
 struct nci_set_config_param {
-       __u8    id;
-       size_t  len;
-       __u8    *val;
+       __u8            id;
+       size_t          len;
+       const __u8      *val;
 };
 
-static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_set_config_req(struct nci_dev *ndev, const void *opt)
 {
-       struct nci_set_config_param *param = (struct nci_set_config_param *)opt;
+       const struct nci_set_config_param *param = opt;
        struct nci_core_set_config_cmd cmd;
 
        BUG_ON(param->len > NCI_MAX_PARAM_LEN);
@@ -235,10 +235,9 @@ struct nci_rf_discover_param {
        __u32   tm_protocols;
 };
 
-static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt)
 {
-       struct nci_rf_discover_param *param =
-               (struct nci_rf_discover_param *)opt;
+       const struct nci_rf_discover_param *param = opt;
        struct nci_rf_disc_cmd cmd;
 
        cmd.num_disc_configs = 0;
@@ -301,10 +300,9 @@ struct nci_rf_discover_select_param {
        __u8    rf_protocol;
 };
 
-static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt)
 {
-       struct nci_rf_discover_select_param *param =
-               (struct nci_rf_discover_select_param *)opt;
+       const struct nci_rf_discover_select_param *param = opt;
        struct nci_rf_discover_select_cmd cmd;
 
        cmd.rf_discovery_id = param->rf_discovery_id;
@@ -328,11 +326,11 @@ static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt)
                     sizeof(struct nci_rf_discover_select_cmd), &cmd);
 }
 
-static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt)
 {
        struct nci_rf_deactivate_cmd cmd;
 
-       cmd.type = opt;
+       cmd.type = (unsigned long)opt;
 
        nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD,
                     sizeof(struct nci_rf_deactivate_cmd), &cmd);
@@ -341,18 +339,17 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
 struct nci_cmd_param {
        __u16 opcode;
        size_t len;
-       __u8 *payload;
+       const __u8 *payload;
 };
 
-static void nci_generic_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_generic_req(struct nci_dev *ndev, const void *opt)
 {
-       struct nci_cmd_param *param =
-               (struct nci_cmd_param *)opt;
+       const struct nci_cmd_param *param = opt;
 
        nci_send_cmd(ndev, param->opcode, param->len, param->payload);
 }
 
-int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
+int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload)
 {
        struct nci_cmd_param param;
 
@@ -360,12 +357,13 @@ int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
        param.len = len;
        param.payload = payload;
 
-       return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+       return __nci_request(ndev, nci_generic_req, &param,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_prop_cmd);
 
-int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload)
+int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
+                const __u8 *payload)
 {
        struct nci_cmd_param param;
 
@@ -373,21 +371,21 @@ int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload)
        param.len = len;
        param.payload = payload;
 
-       return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+       return __nci_request(ndev, nci_generic_req, &param,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_core_cmd);
 
 int nci_core_reset(struct nci_dev *ndev)
 {
-       return __nci_request(ndev, nci_reset_req, 0,
+       return __nci_request(ndev, nci_reset_req, (void *)0,
                             msecs_to_jiffies(NCI_RESET_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_core_reset);
 
 int nci_core_init(struct nci_dev *ndev)
 {
-       return __nci_request(ndev, nci_init_req, 0,
+       return __nci_request(ndev, nci_init_req, (void *)0,
                             msecs_to_jiffies(NCI_INIT_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_core_init);
@@ -397,9 +395,9 @@ struct nci_loopback_data {
        struct sk_buff *data;
 };
 
-static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_send_data_req(struct nci_dev *ndev, const void *opt)
 {
-       struct nci_loopback_data *data = (struct nci_loopback_data *)opt;
+       const struct nci_loopback_data *data = opt;
 
        nci_send_data(ndev, data->conn_id, data->data);
 }
@@ -407,7 +405,7 @@ static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt)
 static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err)
 {
        struct nci_dev *ndev = (struct nci_dev *)context;
-       struct nci_conn_info    *conn_info;
+       struct nci_conn_info *conn_info;
 
        conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
        if (!conn_info) {
@@ -420,7 +418,7 @@ static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err)
        nci_req_complete(ndev, NCI_STATUS_OK);
 }
 
-int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
+int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len,
                      struct sk_buff **resp)
 {
        int r;
@@ -460,7 +458,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
        loopback_data.data = skb;
 
        ndev->cur_conn_id = conn_id;
-       r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data,
+       r = nci_request(ndev, nci_send_data_req, &loopback_data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));
        if (r == NCI_STATUS_OK && resp)
                *resp = conn_info->rx_skb;
@@ -493,7 +491,7 @@ static int nci_open_device(struct nci_dev *ndev)
                rc = ndev->ops->init(ndev);
 
        if (!rc) {
-               rc = __nci_request(ndev, nci_reset_req, 0,
+               rc = __nci_request(ndev, nci_reset_req, (void *)0,
                                   msecs_to_jiffies(NCI_RESET_TIMEOUT));
        }
 
@@ -506,10 +504,10 @@ static int nci_open_device(struct nci_dev *ndev)
                        .feature1 = NCI_FEATURE_DISABLE,
                        .feature2 = NCI_FEATURE_DISABLE
                };
-               unsigned long opt = 0;
+               const void *opt = NULL;
 
                if (ndev->nci_ver & NCI_VER_2_MASK)
-                       opt = (unsigned long)&nci_init_v2_cmd;
+                       opt = &nci_init_v2_cmd;
 
                rc = __nci_request(ndev, nci_init_req, opt,
                                   msecs_to_jiffies(NCI_INIT_TIMEOUT));
@@ -519,7 +517,7 @@ static int nci_open_device(struct nci_dev *ndev)
                rc = ndev->ops->post_setup(ndev);
 
        if (!rc) {
-               rc = __nci_request(ndev, nci_init_complete_req, 0,
+               rc = __nci_request(ndev, nci_init_complete_req, (void *)0,
                                   msecs_to_jiffies(NCI_INIT_TIMEOUT));
        }
 
@@ -569,7 +567,7 @@ static int nci_close_device(struct nci_dev *ndev)
        atomic_set(&ndev->cmd_cnt, 1);
 
        set_bit(NCI_INIT, &ndev->flags);
-       __nci_request(ndev, nci_reset_req, 0,
+       __nci_request(ndev, nci_reset_req, (void *)0,
                      msecs_to_jiffies(NCI_RESET_TIMEOUT));
 
        /* After this point our queues are empty
@@ -624,7 +622,7 @@ static int nci_dev_down(struct nfc_dev *nfc_dev)
        return nci_close_device(ndev);
 }
 
-int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val)
+int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val)
 {
        struct nci_set_config_param param;
 
@@ -635,15 +633,15 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val)
        param.len = len;
        param.val = val;
 
-       return __nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+       return __nci_request(ndev, nci_set_config_req, &param,
                             msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_set_config);
 
-static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt)
 {
        struct nci_nfcee_discover_cmd cmd;
-       __u8 action = opt;
+       __u8 action = (unsigned long)opt;
 
        cmd.discovery_action = action;
 
@@ -652,15 +650,16 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
 
 int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
 {
-       return __nci_request(ndev, nci_nfcee_discover_req, action,
+       unsigned long opt = action;
+
+       return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt,
                                msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_nfcee_discover);
 
-static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt)
 {
-       struct nci_nfcee_mode_set_cmd *cmd =
-                                       (struct nci_nfcee_mode_set_cmd *)opt;
+       const struct nci_nfcee_mode_set_cmd *cmd = opt;
 
        nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD,
                     sizeof(struct nci_nfcee_mode_set_cmd), cmd);
@@ -673,16 +672,14 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
        cmd.nfcee_id = nfcee_id;
        cmd.nfcee_mode = nfcee_mode;
 
-       return __nci_request(ndev, nci_nfcee_mode_set_req,
-                            (unsigned long)&cmd,
+       return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_nfcee_mode_set);
 
-static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt)
 {
-       struct core_conn_create_data *data =
-                                       (struct core_conn_create_data *)opt;
+       const struct core_conn_create_data *data = opt;
 
        nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd);
 }
@@ -690,7 +687,7 @@ static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
 int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
                         u8 number_destination_params,
                         size_t params_len,
-                        struct core_conn_create_dest_spec_params *params)
+                        const struct core_conn_create_dest_spec_params *params)
 {
        int r;
        struct nci_core_conn_create_cmd *cmd;
@@ -719,24 +716,26 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
        }
        ndev->cur_dest_type = destination_type;
 
-       r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data,
+       r = __nci_request(ndev, nci_core_conn_create_req, &data,
                          msecs_to_jiffies(NCI_CMD_TIMEOUT));
        kfree(cmd);
        return r;
 }
 EXPORT_SYMBOL(nci_core_conn_create);
 
-static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt)
 {
-       __u8 conn_id = opt;
+       __u8 conn_id = (unsigned long)opt;
 
        nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id);
 }
 
 int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
 {
+       unsigned long opt = conn_id;
+
        ndev->cur_conn_id = conn_id;
-       return __nci_request(ndev, nci_core_conn_close_req, conn_id,
+       return __nci_request(ndev, nci_core_conn_close_req, (void *)opt,
                             msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_core_conn_close);
@@ -756,14 +755,14 @@ static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
 
        param.id = NCI_PN_ATR_REQ_GEN_BYTES;
 
-       rc = nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+       rc = nci_request(ndev, nci_set_config_req, &param,
                         msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
        if (rc)
                return rc;
 
        param.id = NCI_LN_ATR_RES_GEN_BYTES;
 
-       return nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+       return nci_request(ndev, nci_set_config_req, &param,
                           msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
 }
 
@@ -813,7 +812,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
                pr_debug("target active or w4 select, implicitly deactivate\n");
 
                rc = nci_request(ndev, nci_rf_deactivate_req,
-                                NCI_DEACTIVATE_TYPE_IDLE_MODE,
+                                (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
                                 msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
                if (rc)
                        return -EBUSY;
@@ -835,7 +834,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
 
        param.im_protocols = im_protocols;
        param.tm_protocols = tm_protocols;
-       rc = nci_request(ndev, nci_rf_discover_req, (unsigned long)&param,
+       rc = nci_request(ndev, nci_rf_discover_req, &param,
                         msecs_to_jiffies(NCI_RF_DISC_TIMEOUT));
 
        if (!rc)
@@ -854,7 +853,8 @@ static void nci_stop_poll(struct nfc_dev *nfc_dev)
                return;
        }
 
-       nci_request(ndev, nci_rf_deactivate_req, NCI_DEACTIVATE_TYPE_IDLE_MODE,
+       nci_request(ndev, nci_rf_deactivate_req,
+                   (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
                    msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
 }
 
@@ -863,7 +863,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
 {
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
        struct nci_rf_discover_select_param param;
-       struct nfc_target *nci_target = NULL;
+       const struct nfc_target *nci_target = NULL;
        int i;
        int rc = 0;
 
@@ -913,8 +913,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
                else
                        param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP;
 
-               rc = nci_request(ndev, nci_rf_discover_select_req,
-                                (unsigned long)&param,
+               rc = nci_request(ndev, nci_rf_discover_select_req, &param,
                                 msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT));
        }
 
@@ -929,7 +928,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
                                  __u8 mode)
 {
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
-       u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
+       unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
 
        pr_debug("entry\n");
 
@@ -947,7 +946,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
        }
 
        if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
-               nci_request(ndev, nci_rf_deactivate_req, nci_mode,
+               nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode,
                            msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
        }
 }
@@ -985,8 +984,8 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev)
        } else {
                if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE ||
                    atomic_read(&ndev->state) == NCI_DISCOVERY) {
-                       nci_request(ndev, nci_rf_deactivate_req, 0,
-                               msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
+                       nci_request(ndev, nci_rf_deactivate_req, (void *)0,
+                                   msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
                }
 
                rc = nfc_tm_deactivated(nfc_dev);
@@ -1004,7 +1003,7 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
 {
        struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
        int rc;
-       struct nci_conn_info    *conn_info;
+       struct nci_conn_info *conn_info;
 
        conn_info = ndev->rf_conn_info;
        if (!conn_info)
@@ -1102,7 +1101,7 @@ static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name)
        return ndev->ops->fw_download(ndev, firmware_name);
 }
 
-static struct nfc_ops nci_nfc_ops = {
+static const struct nfc_ops nci_nfc_ops = {
        .dev_up = nci_dev_up,
        .dev_down = nci_dev_down,
        .start_poll = nci_start_poll,
@@ -1129,7 +1128,7 @@ static struct nfc_ops nci_nfc_ops = {
  * @tx_headroom: Reserved space at beginning of skb
  * @tx_tailroom: Reserved space at end of skb
  */
-struct nci_dev *nci_allocate_device(struct nci_ops *ops,
+struct nci_dev *nci_allocate_device(const struct nci_ops *ops,
                                    __u32 supported_protocols,
                                    int tx_headroom, int tx_tailroom)
 {
@@ -1152,8 +1151,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
        if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) {
                pr_err("Too many proprietary commands: %zd\n",
                       ops->n_prop_ops);
-               ops->prop_ops = NULL;
-               ops->n_prop_ops = 0;
+               goto free_nci;
        }
 
        ndev->tx_headroom = tx_headroom;
@@ -1270,7 +1268,7 @@ EXPORT_SYMBOL(nci_register_device);
  */
 void nci_unregister_device(struct nci_dev *ndev)
 {
-       struct nci_conn_info    *conn_info, *n;
+       struct nci_conn_info *conn_info, *n;
 
        nci_close_device(ndev);
 
@@ -1332,7 +1330,7 @@ int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
 EXPORT_SYMBOL(nci_send_frame);
 
 /* Send NCI command */
-int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
+int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload)
 {
        struct nci_ctrl_hdr *hdr;
        struct sk_buff *skb;
@@ -1364,12 +1362,12 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
 EXPORT_SYMBOL(nci_send_cmd);
 
 /* Proprietary commands API */
-static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops,
-                                            size_t n_ops,
-                                            __u16 opcode)
+static const struct nci_driver_ops *ops_cmd_lookup(const struct nci_driver_ops *ops,
+                                                  size_t n_ops,
+                                                  __u16 opcode)
 {
        size_t i;
-       struct nci_driver_ops *op;
+       const struct nci_driver_ops *op;
 
        if (!ops || !n_ops)
                return NULL;
@@ -1384,10 +1382,10 @@ static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops,
 }
 
 static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
-                            struct sk_buff *skb, struct nci_driver_ops *ops,
+                            struct sk_buff *skb, const struct nci_driver_ops *ops,
                             size_t n_ops)
 {
-       struct nci_driver_ops *op;
+       const struct nci_driver_ops *op;
 
        op = ops_cmd_lookup(ops, n_ops, rsp_opcode);
        if (!op || !op->rsp)
@@ -1397,10 +1395,10 @@ static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
 }
 
 static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode,
-                            struct sk_buff *skb, struct nci_driver_ops *ops,
+                            struct sk_buff *skb, const struct nci_driver_ops *ops,
                             size_t n_ops)
 {
-       struct nci_driver_ops *op;
+       const struct nci_driver_ops *op;
 
        op = ops_cmd_lookup(ops, n_ops, ntf_opcode);
        if (!op || !op->ntf)
@@ -1442,7 +1440,7 @@ int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
 static void nci_tx_work(struct work_struct *work)
 {
        struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work);
-       struct nci_conn_info    *conn_info;
+       struct nci_conn_info *conn_info;
        struct sk_buff *skb;
 
        conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
index ce3382b..6055dc9 100644 (file)
@@ -26,7 +26,7 @@
 void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
                                __u8 conn_id, int err)
 {
-       struct nci_conn_info    *conn_info;
+       const struct nci_conn_info *conn_info;
        data_exchange_cb_t cb;
        void *cb_context;
 
@@ -80,7 +80,7 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev,
 
 int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id)
 {
-       struct nci_conn_info *conn_info;
+       const struct nci_conn_info *conn_info;
 
        conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
        if (!conn_info)
@@ -93,9 +93,9 @@ EXPORT_SYMBOL(nci_conn_max_data_pkt_payload_size);
 static int nci_queue_tx_data_frags(struct nci_dev *ndev,
                                   __u8 conn_id,
                                   struct sk_buff *skb) {
-       struct nci_conn_info    *conn_info;
+       const struct nci_conn_info *conn_info;
        int total_len = skb->len;
-       unsigned char *data = skb->data;
+       const unsigned char *data = skb->data;
        unsigned long flags;
        struct sk_buff_head frags_q;
        struct sk_buff *skb_frag;
@@ -166,7 +166,7 @@ exit:
 /* Send NCI data */
 int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb)
 {
-       struct nci_conn_info    *conn_info;
+       const struct nci_conn_info *conn_info;
        int rc = 0;
 
        pr_debug("conn_id 0x%x, plen %d\n", conn_id, skb->len);
@@ -269,7 +269,7 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
        __u8 pbf = nci_pbf(skb->data);
        __u8 status = 0;
        __u8 conn_id = nci_conn_id(skb->data);
-       struct nci_conn_info    *conn_info;
+       const struct nci_conn_info *conn_info;
 
        pr_debug("len %d\n", skb->len);
 
index d6732e5..e199912 100644 (file)
 #include <linux/nfc.h>
 
 struct nci_data {
-       u8              conn_id;
-       u8              pipe;
-       u8              cmd;
-       const u8        *data;
-       u32             data_len;
+       u8 conn_id;
+       u8 pipe;
+       u8 cmd;
+       const u8 *data;
+       u32 data_len;
 } __packed;
 
 struct nci_hci_create_pipe_params {
@@ -142,7 +142,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
                             const u8 data_type, const u8 *data,
                             size_t data_len)
 {
-       struct nci_conn_info    *conn_info;
+       const struct nci_conn_info *conn_info;
        struct sk_buff *skb;
        int len, i, r;
        u8 cb = pipe;
@@ -195,9 +195,9 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
        return i;
 }
 
-static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt)
 {
-       struct nci_data *data = (struct nci_data *)opt;
+       const struct nci_data *data = opt;
 
        nci_hci_send_data(ndev, data->pipe, data->cmd,
                          data->data, data->data_len);
@@ -221,8 +221,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
                     const u8 *param, size_t param_len,
                     struct sk_buff **skb)
 {
-       struct nci_hcp_message *message;
-       struct nci_conn_info   *conn_info;
+       const struct nci_hcp_message *message;
+       const struct nci_conn_info *conn_info;
        struct nci_data data;
        int r;
        u8 pipe = ndev->hci_dev->gate2pipe[gate];
@@ -240,7 +240,7 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
        data.data = param;
        data.data_len = param_len;
 
-       r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+       r = nci_request(ndev, nci_hci_send_data_req, &data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));
        if (r == NCI_STATUS_OK) {
                message = (struct nci_hcp_message *)conn_info->rx_skb->data;
@@ -363,7 +363,7 @@ exit:
 static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
                                  struct sk_buff *skb)
 {
-       struct nci_conn_info    *conn_info;
+       struct nci_conn_info *conn_info;
 
        conn_info = ndev->hci_dev->conn_info;
        if (!conn_info)
@@ -406,7 +406,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work)
        struct nci_hci_dev *hdev =
                container_of(work, struct nci_hci_dev, msg_rx_work);
        struct sk_buff *skb;
-       struct nci_hcp_message *message;
+       const struct nci_hcp_message *message;
        u8 pipe, type, instruction;
 
        while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) {
@@ -498,7 +498,7 @@ void nci_hci_data_received_cb(void *context,
 int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
 {
        struct nci_data data;
-       struct nci_conn_info    *conn_info;
+       const struct nci_conn_info *conn_info;
 
        conn_info = ndev->hci_dev->conn_info;
        if (!conn_info)
@@ -511,9 +511,8 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
        data.data = NULL;
        data.data_len = 0;
 
-       return nci_request(ndev, nci_hci_send_data_req,
-                       (unsigned long)&data,
-                       msecs_to_jiffies(NCI_DATA_TIMEOUT));
+       return nci_request(ndev, nci_hci_send_data_req, &data,
+                          msecs_to_jiffies(NCI_DATA_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_hci_open_pipe);
 
@@ -523,7 +522,7 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host,
        u8 pipe;
        struct sk_buff *skb;
        struct nci_hci_create_pipe_params params;
-       struct nci_hci_create_pipe_resp *resp;
+       const struct nci_hci_create_pipe_resp *resp;
 
        pr_debug("gate=%d\n", dest_gate);
 
@@ -557,8 +556,8 @@ static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe)
 int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
                      const u8 *param, size_t param_len)
 {
-       struct nci_hcp_message *message;
-       struct nci_conn_info *conn_info;
+       const struct nci_hcp_message *message;
+       const struct nci_conn_info *conn_info;
        struct nci_data data;
        int r;
        u8 *tmp;
@@ -587,8 +586,7 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
        data.data = tmp;
        data.data_len = param_len + 1;
 
-       r = nci_request(ndev, nci_hci_send_data_req,
-                       (unsigned long)&data,
+       r = nci_request(ndev, nci_hci_send_data_req, &data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));
        if (r == NCI_STATUS_OK) {
                message = (struct nci_hcp_message *)conn_info->rx_skb->data;
@@ -605,8 +603,8 @@ EXPORT_SYMBOL(nci_hci_set_param);
 int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
                      struct sk_buff **skb)
 {
-       struct nci_hcp_message *message;
-       struct nci_conn_info    *conn_info;
+       const struct nci_hcp_message *message;
+       const struct nci_conn_info *conn_info;
        struct nci_data data;
        int r;
        u8 pipe = ndev->hci_dev->gate2pipe[gate];
@@ -627,7 +625,7 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
        data.data = &idx;
        data.data_len = 1;
 
-       r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+       r = nci_request(ndev, nci_hci_send_data_req, &data,
                        msecs_to_jiffies(NCI_DATA_TIMEOUT));
 
        if (r == NCI_STATUS_OK) {
@@ -697,7 +695,7 @@ EXPORT_SYMBOL(nci_hci_connect_gate);
 
 static int nci_hci_dev_connect_gates(struct nci_dev *ndev,
                                     u8 gate_count,
-                                    struct nci_hci_gate *gates)
+                                    const struct nci_hci_gate *gates)
 {
        int r;
 
@@ -714,7 +712,7 @@ static int nci_hci_dev_connect_gates(struct nci_dev *ndev,
 
 int nci_hci_dev_session_init(struct nci_dev *ndev)
 {
-       struct nci_conn_info    *conn_info;
+       struct nci_conn_info *conn_info;
        struct sk_buff *skb;
        int r;
 
index 98af04c..c5eacaa 100644 (file)
 /* Handle NCI Notification packets */
 
 static void nci_core_reset_ntf_packet(struct nci_dev *ndev,
-                                     struct sk_buff *skb)
+                                     const struct sk_buff *skb)
 {
        /* Handle NCI 2.x core reset notification */
-       struct nci_core_reset_ntf *ntf = (void *)skb->data;
+       const struct nci_core_reset_ntf *ntf = (void *)skb->data;
 
        ndev->nci_ver = ntf->nci_ver;
        pr_debug("nci_ver 0x%x, config_status 0x%x\n",
@@ -48,7 +48,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev,
                                             struct sk_buff *skb)
 {
        struct nci_core_conn_credit_ntf *ntf = (void *) skb->data;
-       struct nci_conn_info    *conn_info;
+       struct nci_conn_info *conn_info;
        int i;
 
        pr_debug("num_entries %d\n", ntf->num_entries);
@@ -80,7 +80,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev,
 }
 
 static void nci_core_generic_error_ntf_packet(struct nci_dev *ndev,
-                                             struct sk_buff *skb)
+                                             const struct sk_buff *skb)
 {
        __u8 status = skb->data[0];
 
@@ -107,9 +107,10 @@ static void nci_core_conn_intf_error_ntf_packet(struct nci_dev *ndev,
                nci_data_exchange_complete(ndev, NULL, ntf->conn_id, -EIO);
 }
 
-static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev,
-                       struct rf_tech_specific_params_nfca_poll *nfca_poll,
-                                                    __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev,
+                                       struct rf_tech_specific_params_nfca_poll *nfca_poll,
+                                       const __u8 *data)
 {
        nfca_poll->sens_res = __le16_to_cpu(*((__le16 *)data));
        data += 2;
@@ -134,9 +135,10 @@ static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev,
        return data;
 }
 
-static __u8 *nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev,
-                       struct rf_tech_specific_params_nfcb_poll *nfcb_poll,
-                                                    __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev,
+                                       struct rf_tech_specific_params_nfcb_poll *nfcb_poll,
+                                       const __u8 *data)
 {
        nfcb_poll->sensb_res_len = min_t(__u8, *data++, NFC_SENSB_RES_MAXSIZE);
 
@@ -148,9 +150,10 @@ static __u8 *nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev,
        return data;
 }
 
-static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev,
-                       struct rf_tech_specific_params_nfcf_poll *nfcf_poll,
-                                                    __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev,
+                                       struct rf_tech_specific_params_nfcf_poll *nfcf_poll,
+                                       const __u8 *data)
 {
        nfcf_poll->bit_rate = *data++;
        nfcf_poll->sensf_res_len = min_t(__u8, *data++, NFC_SENSF_RES_MAXSIZE);
@@ -164,9 +167,10 @@ static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev,
        return data;
 }
 
-static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev,
-                       struct rf_tech_specific_params_nfcv_poll *nfcv_poll,
-                                                    __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev,
+                                       struct rf_tech_specific_params_nfcv_poll *nfcv_poll,
+                                       const __u8 *data)
 {
        ++data;
        nfcv_poll->dsfid = *data++;
@@ -175,9 +179,10 @@ static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev,
        return data;
 }
 
-static __u8 *nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev,
-                       struct rf_tech_specific_params_nfcf_listen *nfcf_listen,
-                                                    __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev,
+                                         struct rf_tech_specific_params_nfcf_listen *nfcf_listen,
+                                         const __u8 *data)
 {
        nfcf_listen->local_nfcid2_len = min_t(__u8, *data++,
                                              NFC_NFCID2_MAXSIZE);
@@ -198,12 +203,12 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
                                struct nfc_target *target,
                                __u8 rf_protocol,
                                __u8 rf_tech_and_mode,
-                               void *params)
+                               const void *params)
 {
-       struct rf_tech_specific_params_nfca_poll *nfca_poll;
-       struct rf_tech_specific_params_nfcb_poll *nfcb_poll;
-       struct rf_tech_specific_params_nfcf_poll *nfcf_poll;
-       struct rf_tech_specific_params_nfcv_poll *nfcv_poll;
+       const struct rf_tech_specific_params_nfca_poll *nfca_poll;
+       const struct rf_tech_specific_params_nfcb_poll *nfcb_poll;
+       const struct rf_tech_specific_params_nfcf_poll *nfcf_poll;
+       const struct rf_tech_specific_params_nfcv_poll *nfcv_poll;
        __u32 protocol;
 
        if (rf_protocol == NCI_RF_PROTOCOL_T1T)
@@ -274,7 +279,7 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
 }
 
 static void nci_add_new_target(struct nci_dev *ndev,
-                              struct nci_rf_discover_ntf *ntf)
+                              const struct nci_rf_discover_ntf *ntf)
 {
        struct nfc_target *target;
        int i, rc;
@@ -319,10 +324,10 @@ void nci_clear_target_list(struct nci_dev *ndev)
 }
 
 static void nci_rf_discover_ntf_packet(struct nci_dev *ndev,
-                                      struct sk_buff *skb)
+                                      const struct sk_buff *skb)
 {
        struct nci_rf_discover_ntf ntf;
-       __u8 *data = skb->data;
+       const __u8 *data = skb->data;
        bool add_target = true;
 
        ntf.rf_discovery_id = *data++;
@@ -382,7 +387,8 @@ static void nci_rf_discover_ntf_packet(struct nci_dev *ndev,
 }
 
 static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev,
-                       struct nci_rf_intf_activated_ntf *ntf, __u8 *data)
+                                                struct nci_rf_intf_activated_ntf *ntf,
+                                                const __u8 *data)
 {
        struct activation_params_nfca_poll_iso_dep *nfca_poll;
        struct activation_params_nfcb_poll_iso_dep *nfcb_poll;
@@ -418,7 +424,8 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev,
 }
 
 static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
-                       struct nci_rf_intf_activated_ntf *ntf, __u8 *data)
+                                                struct nci_rf_intf_activated_ntf *ntf,
+                                                const __u8 *data)
 {
        struct activation_params_poll_nfc_dep *poll;
        struct activation_params_listen_nfc_dep *listen;
@@ -454,7 +461,7 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
 }
 
 static void nci_target_auto_activated(struct nci_dev *ndev,
-                                     struct nci_rf_intf_activated_ntf *ntf)
+                                     const struct nci_rf_intf_activated_ntf *ntf)
 {
        struct nfc_target *target;
        int rc;
@@ -477,7 +484,7 @@ static void nci_target_auto_activated(struct nci_dev *ndev,
 }
 
 static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev,
-               struct nci_rf_intf_activated_ntf *ntf)
+                                          const struct nci_rf_intf_activated_ntf *ntf)
 {
        ndev->remote_gb_len = 0;
 
@@ -519,11 +526,11 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev,
 }
 
 static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
-                                            struct sk_buff *skb)
+                                            const struct sk_buff *skb)
 {
-       struct nci_conn_info    *conn_info;
+       struct nci_conn_info *conn_info;
        struct nci_rf_intf_activated_ntf ntf;
-       __u8 *data = skb->data;
+       const __u8 *data = skb->data;
        int err = NCI_STATUS_OK;
 
        ntf.rf_discovery_id = *data++;
@@ -681,10 +688,10 @@ listen:
 }
 
 static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
-                                        struct sk_buff *skb)
+                                        const struct sk_buff *skb)
 {
-       struct nci_conn_info    *conn_info;
-       struct nci_rf_deactivate_ntf *ntf = (void *) skb->data;
+       const struct nci_conn_info *conn_info;
+       const struct nci_rf_deactivate_ntf *ntf = (void *)skb->data;
 
        pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason);
 
@@ -725,10 +732,10 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
 }
 
 static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
-                                         struct sk_buff *skb)
+                                         const struct sk_buff *skb)
 {
        u8 status = NCI_STATUS_OK;
-       struct nci_nfcee_discover_ntf   *nfcee_ntf =
+       const struct nci_nfcee_discover_ntf *nfcee_ntf =
                                (struct nci_nfcee_discover_ntf *)skb->data;
 
        pr_debug("\n");
@@ -745,7 +752,7 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
 }
 
 static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev,
-                                       struct sk_buff *skb)
+                                       const struct sk_buff *skb)
 {
        pr_debug("\n");
 }
index e960592..a2e72c0 100644 (file)
 
 /* Handle NCI Response packets */
 
-static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
+static void nci_core_reset_rsp_packet(struct nci_dev *ndev,
+                                     const struct sk_buff *skb)
 {
-       struct nci_core_reset_rsp *rsp = (void *) skb->data;
+       const struct nci_core_reset_rsp *rsp = (void *)skb->data;
 
        pr_debug("status 0x%x\n", rsp->status);
 
@@ -43,10 +44,11 @@ static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
        }
 }
 
-static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, struct sk_buff *skb)
+static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev,
+                                     const struct sk_buff *skb)
 {
-       struct nci_core_init_rsp_1 *rsp_1 = (void *) skb->data;
-       struct nci_core_init_rsp_2 *rsp_2;
+       const struct nci_core_init_rsp_1 *rsp_1 = (void *)skb->data;
+       const struct nci_core_init_rsp_2 *rsp_2;
 
        pr_debug("status 0x%x\n", rsp_1->status);
 
@@ -81,10 +83,11 @@ static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, struct sk_buff *skb)
        return NCI_STATUS_OK;
 }
 
-static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, struct sk_buff *skb)
+static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev,
+                                     const struct sk_buff *skb)
 {
-       struct nci_core_init_rsp_nci_ver2 *rsp = (void *)skb->data;
-       u8 *supported_rf_interface = rsp->supported_rf_interfaces;
+       const struct nci_core_init_rsp_nci_ver2 *rsp = (void *)skb->data;
+       const u8 *supported_rf_interface = rsp->supported_rf_interfaces;
        u8 rf_interface_idx = 0;
        u8 rf_extension_cnt = 0;
 
@@ -118,7 +121,7 @@ static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, struct sk_buff *skb)
        return NCI_STATUS_OK;
 }
 
-static void nci_core_init_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
+static void nci_core_init_rsp_packet(struct nci_dev *ndev, const struct sk_buff *skb)
 {
        u8 status = 0;
 
@@ -160,9 +163,9 @@ exit:
 }
 
 static void nci_core_set_config_rsp_packet(struct nci_dev *ndev,
-                                          struct sk_buff *skb)
+                                          const struct sk_buff *skb)
 {
-       struct nci_core_set_config_rsp *rsp = (void *) skb->data;
+       const struct nci_core_set_config_rsp *rsp = (void *)skb->data;
 
        pr_debug("status 0x%x\n", rsp->status);
 
@@ -170,7 +173,7 @@ static void nci_core_set_config_rsp_packet(struct nci_dev *ndev,
 }
 
 static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev,
-                                      struct sk_buff *skb)
+                                      const struct sk_buff *skb)
 {
        __u8 status = skb->data[0];
 
@@ -179,9 +182,10 @@ static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev,
        nci_req_complete(ndev, status);
 }
 
-static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
+static void nci_rf_disc_rsp_packet(struct nci_dev *ndev,
+                                  const struct sk_buff *skb)
 {
-       struct nci_conn_info    *conn_info;
+       struct nci_conn_info *conn_info;
        __u8 status = skb->data[0];
 
        pr_debug("status 0x%x\n", status);
@@ -210,7 +214,7 @@ exit:
 }
 
 static void nci_rf_disc_select_rsp_packet(struct nci_dev *ndev,
-                                         struct sk_buff *skb)
+                                         const struct sk_buff *skb)
 {
        __u8 status = skb->data[0];
 
@@ -222,7 +226,7 @@ static void nci_rf_disc_select_rsp_packet(struct nci_dev *ndev,
 }
 
 static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev,
-                                        struct sk_buff *skb)
+                                        const struct sk_buff *skb)
 {
        __u8 status = skb->data[0];
 
@@ -238,9 +242,9 @@ static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev,
 }
 
 static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev,
-                                         struct sk_buff *skb)
+                                         const struct sk_buff *skb)
 {
-       struct nci_nfcee_discover_rsp *discover_rsp;
+       const struct nci_nfcee_discover_rsp *discover_rsp;
 
        if (skb->len != 2) {
                nci_req_complete(ndev, NCI_STATUS_NFCEE_PROTOCOL_ERROR);
@@ -255,7 +259,7 @@ static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev,
 }
 
 static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev,
-                                         struct sk_buff *skb)
+                                         const struct sk_buff *skb)
 {
        __u8 status = skb->data[0];
 
@@ -264,11 +268,11 @@ static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev,
 }
 
 static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
-                                           struct sk_buff *skb)
+                                           const struct sk_buff *skb)
 {
        __u8 status = skb->data[0];
        struct nci_conn_info *conn_info = NULL;
-       struct nci_core_conn_create_rsp *rsp;
+       const struct nci_core_conn_create_rsp *rsp;
 
        pr_debug("status 0x%x\n", status);
 
@@ -319,7 +323,7 @@ exit:
 }
 
 static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
-                                          struct sk_buff *skb)
+                                          const struct sk_buff *skb)
 {
        struct nci_conn_info *conn_info;
        __u8 status = skb->data[0];
index 7d8e10e..0935527 100644 (file)
@@ -27,7 +27,7 @@
 
 #define CRC_INIT               0xFFFF
 
-static int __nci_spi_send(struct nci_spi *nspi, struct sk_buff *skb,
+static int __nci_spi_send(struct nci_spi *nspi, const struct sk_buff *skb,
                          int cs_change)
 {
        struct spi_message m;
index 722f7ef..49089c5 100644 (file)
@@ -530,7 +530,7 @@ free_msg:
 
 int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx)
 {
-       struct nfc_se *se;
+       const struct nfc_se *se;
        struct sk_buff *msg;
        void *hdr;
 
@@ -1531,7 +1531,7 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
                               struct genl_info *info)
 {
        struct nfc_dev *dev;
-       struct nfc_vendor_cmd *cmd;
+       const struct nfc_vendor_cmd *cmd;
        u32 dev_idx, vid, subcmd;
        u8 *data;
        size_t data_len;
index 889fefd..de2ec66 100644 (file)
@@ -48,7 +48,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
                        u8 comm_mode, u8 rf_mode);
 int nfc_llcp_register_device(struct nfc_dev *dev);
 void nfc_llcp_unregister_device(struct nfc_dev *dev);
-int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len);
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len);
 u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len);
 int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);
 struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
index 5e39640..0ca214a 100644 (file)
@@ -140,7 +140,7 @@ static void rawsock_data_exchange_complete(void *context, struct sk_buff *skb,
 {
        struct sock *sk = (struct sock *) context;
 
-       BUG_ON(in_irq());
+       BUG_ON(in_hardirq());
 
        pr_debug("sk=%p err=%d\n", sk, err);
 
index ef15d9e..0767740 100644 (file)
@@ -924,7 +924,13 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
                        break;
 
                case OVS_USERSPACE_ATTR_PID:
-                       upcall.portid = nla_get_u32(a);
+                       if (dp->user_features &
+                           OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+                               upcall.portid =
+                                 ovs_dp_get_upcall_portid(dp,
+                                                          smp_processor_id());
+                       else
+                               upcall.portid = nla_get_u32(a);
                        break;
 
                case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
index bc164b3..67ad083 100644 (file)
@@ -133,6 +133,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
 
 static void ovs_dp_masks_rebalance(struct work_struct *work);
 
+static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
+
 /* Must be called with rcu_read_lock or ovs_mutex. */
 const char *ovs_dp_name(const struct datapath *dp)
 {
@@ -166,6 +168,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
        free_percpu(dp->stats_percpu);
        kfree(dp->ports);
        ovs_meters_exit(dp);
+       kfree(rcu_dereference_raw(dp->upcall_portids));
        kfree(dp);
 }
 
@@ -239,7 +242,13 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 
                memset(&upcall, 0, sizeof(upcall));
                upcall.cmd = OVS_PACKET_CMD_MISS;
-               upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+
+               if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+                       upcall.portid =
+                           ovs_dp_get_upcall_portid(dp, smp_processor_id());
+               else
+                       upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+
                upcall.mru = OVS_CB(skb)->mru;
                error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
                if (unlikely(error))
@@ -1594,16 +1603,70 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
 
 DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
 
+static int ovs_dp_set_upcall_portids(struct datapath *dp,
+                             const struct nlattr *ids)
+{
+       struct dp_nlsk_pids *old, *dp_nlsk_pids;
+
+       if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
+               return -EINVAL;
+
+       old = ovsl_dereference(dp->upcall_portids);
+
+       dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
+                              GFP_KERNEL);
+       if (!dp_nlsk_pids)
+               return -ENOMEM;
+
+       dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
+       nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
+
+       rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
+
+       kfree_rcu(old, rcu);
+
+       return 0;
+}
+
+u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
+{
+       struct dp_nlsk_pids *dp_nlsk_pids;
+
+       dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
+
+       if (dp_nlsk_pids) {
+               if (cpu_id < dp_nlsk_pids->n_pids) {
+                       return dp_nlsk_pids->pids[cpu_id];
+               } else if (dp_nlsk_pids->n_pids > 0 &&
+                          cpu_id >= dp_nlsk_pids->n_pids) {
+                       /* If the number of netlink PIDs is mismatched with
+                        * the number of CPUs as seen by the kernel, log this
+                        * and send the upcall to an arbitrary socket (0) in
+                        * order to not drop packets
+                        */
+                       pr_info_ratelimited("cpu_id mismatch with handler threads");
+                       return dp_nlsk_pids->pids[cpu_id %
+                                                 dp_nlsk_pids->n_pids];
+               } else {
+                       return 0;
+               }
+       } else {
+               return 0;
+       }
+}
+
 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
 {
        u32 user_features = 0;
+       int err;
 
        if (a[OVS_DP_ATTR_USER_FEATURES]) {
                user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
 
                if (user_features & ~(OVS_DP_F_VPORT_PIDS |
                                      OVS_DP_F_UNALIGNED |
-                                     OVS_DP_F_TC_RECIRC_SHARING))
+                                     OVS_DP_F_TC_RECIRC_SHARING |
+                                     OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
                        return -EOPNOTSUPP;
 
 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
@@ -1624,6 +1687,15 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
 
        dp->user_features = user_features;
 
+       if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
+           a[OVS_DP_ATTR_PER_CPU_PIDS]) {
+               /* Upcall Netlink Port IDs have been updated */
+               err = ovs_dp_set_upcall_portids(dp,
+                                               a[OVS_DP_ATTR_PER_CPU_PIDS]);
+               if (err)
+                       return err;
+       }
+
        if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
                static_branch_enable(&tc_recirc_sharing_support);
        else
index 38f7d3e..fcfe6cb 100644 (file)
@@ -50,6 +50,21 @@ struct dp_stats_percpu {
        struct u64_stats_sync syncp;
 };
 
+/**
+ * struct dp_nlsk_pids - array of netlink portids of for a datapath.
+ *                       This is used when OVS_DP_F_DISPATCH_UPCALL_PER_CPU
+ *                       is enabled and must be protected by rcu.
+ * @rcu: RCU callback head for deferred destruction.
+ * @n_pids: Size of @pids array.
+ * @pids: Array storing the Netlink socket PIDs indexed by CPU ID for packets
+ *       that miss the flow table.
+ */
+struct dp_nlsk_pids {
+       struct rcu_head rcu;
+       u32 n_pids;
+       u32 pids[];
+};
+
 /**
  * struct datapath - datapath for flow-based packet switching
  * @rcu: RCU callback head for deferred destruction.
@@ -61,6 +76,7 @@ struct dp_stats_percpu {
  * @net: Reference to net namespace.
  * @max_headroom: the maximum headroom of all vports in this datapath; it will
  * be used by all the internal vports in this dp.
+ * @upcall_portids: RCU protected 'struct dp_nlsk_pids'.
  *
  * Context: See the comment on locking at the top of datapath.c for additional
  * locking information.
@@ -87,6 +103,8 @@ struct datapath {
 
        /* Switch meters. */
        struct dp_meter_table meter_tbl;
+
+       struct dp_nlsk_pids __rcu *upcall_portids;
 };
 
 /**
@@ -243,6 +261,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *,
                  const struct sw_flow_key *, const struct dp_upcall_info *,
                  uint32_t cutlen);
 
+u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id);
+
 const char *ovs_dp_name(const struct datapath *dp);
 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
                                         u32 portid, u32 seq, u8 cmd);
index 57a1971..543365f 100644 (file)
@@ -250,8 +250,7 @@ static struct net_device *packet_cached_dev_get(struct packet_sock *po)
 
        rcu_read_lock();
        dev = rcu_dereference(po->cached_dev);
-       if (likely(dev))
-               dev_hold(dev);
+       dev_hold(dev);
        rcu_read_unlock();
 
        return dev;
@@ -3024,8 +3023,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 out_free:
        kfree_skb(skb);
 out_unlock:
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
 out:
        return err;
 }
@@ -3158,8 +3156,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
                }
        }
 
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
 
        proto_curr = po->prot_hook.type;
        dev_curr = po->prot_hook.dev;
@@ -3196,8 +3193,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
                        packet_cached_dev_assign(po, dev);
                }
        }
-       if (dev_curr)
-               dev_put(dev_curr);
+       dev_put(dev_curr);
 
        if (proto == 0 || !need_rehook)
                goto out_unlock;
@@ -4109,8 +4105,7 @@ static int packet_notifier(struct notifier_block *this,
                                if (msg == NETDEV_UNREGISTER) {
                                        packet_cached_dev_reset(po);
                                        WRITE_ONCE(po->ifindex, -1);
-                                       if (po->prot_hook.dev)
-                                               dev_put(po->prot_hook.dev);
+                                       dev_put(po->prot_hook.dev);
                                        po->prot_hook.dev = NULL;
                                }
                                spin_unlock(&po->bind_lock);
index ca6ae4c..65218b7 100644 (file)
@@ -275,8 +275,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
 
 drop:
        kfree_skb(skb);
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
        return err;
 }
 EXPORT_SYMBOL(pn_skb_send);
index ac0fae0..cde671d 100644 (file)
@@ -122,8 +122,7 @@ struct net_device *phonet_device_get(struct net *net)
                        break;
                dev = NULL;
        }
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
        rcu_read_unlock();
        return dev;
 }
@@ -233,11 +232,11 @@ static int phonet_device_autoconf(struct net_device *dev)
        struct if_phonet_req req;
        int ret;
 
-       if (!dev->netdev_ops->ndo_do_ioctl)
+       if (!dev->netdev_ops->ndo_siocdevprivate)
                return -EOPNOTSUPP;
 
-       ret = dev->netdev_ops->ndo_do_ioctl(dev, (struct ifreq *)&req,
-                                               SIOCPNGAUTOCONF);
+       ret = dev->netdev_ops->ndo_siocdevprivate(dev, (struct ifreq *)&req,
+                                                 NULL, SIOCPNGAUTOCONF);
        if (ret < 0)
                return ret;
 
@@ -411,8 +410,7 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr)
        daddr >>= 2;
        rcu_read_lock();
        dev = rcu_dereference(routes->table[daddr]);
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
        rcu_read_unlock();
 
        if (!dev)
index 2599235..71e2caf 100644 (file)
@@ -379,8 +379,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
                        saddr = PN_NO_ADDR;
                release_sock(sk);
 
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
                if (saddr == PN_NO_ADDR)
                        return -EHOSTUNREACH;
 
index 0c30908..525e3ea 100644 (file)
@@ -493,7 +493,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
                goto err;
        }
 
-       if (!size || len != ALIGN(size, 4) + hdrlen)
+       if (!size || size & 3 || len != size + hdrlen)
                goto err;
 
        if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA &&
@@ -506,8 +506,12 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
 
        if (cb->type == QRTR_TYPE_NEW_SERVER) {
                /* Remote node endpoint can bridge other distant nodes */
-               const struct qrtr_ctrl_pkt *pkt = data + hdrlen;
+               const struct qrtr_ctrl_pkt *pkt;
 
+               if (size < sizeof(*pkt))
+                       goto err;
+
+               pkt = data + hdrlen;
                qrtr_node_assign(node, le32_to_cpu(pkt->server.node));
        }
 
@@ -1157,14 +1161,14 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
                rc = put_user(len, (int __user *)argp);
                break;
        case SIOCGIFADDR:
-               if (copy_from_user(&ifr, argp, sizeof(ifr))) {
+               if (get_user_ifreq(&ifr, NULL, argp)) {
                        rc = -EFAULT;
                        break;
                }
 
                sq = (struct sockaddr_qrtr *)&ifr.ifr_addr;
                *sq = ipc->us;
-               if (copy_to_user(argp, &ifr, sizeof(ifr))) {
+               if (put_user_ifreq(&ifr, argp)) {
                        rc = -EFAULT;
                        break;
                }
index 0885b22..accd35c 100644 (file)
@@ -21,6 +21,8 @@ config AF_RXRPC
 
          See Documentation/networking/rxrpc.rst.
 
+if AF_RXRPC
+
 config AF_RXRPC_IPV6
        bool "IPv6 support for RxRPC"
        depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC)
@@ -30,7 +32,6 @@ config AF_RXRPC_IPV6
 
 config AF_RXRPC_INJECT_LOSS
        bool "Inject packet loss into RxRPC packet stream"
-       depends on AF_RXRPC
        help
          Say Y here to inject packet loss by discarding some received and some
          transmitted packets.
@@ -38,7 +39,6 @@ config AF_RXRPC_INJECT_LOSS
 
 config AF_RXRPC_DEBUG
        bool "RxRPC dynamic debugging"
-       depends on AF_RXRPC
        help
          Say Y here to make runtime controllable debugging messages appear.
 
@@ -47,7 +47,6 @@ config AF_RXRPC_DEBUG
 
 config RXKAD
        bool "RxRPC Kerberos security"
-       depends on AF_RXRPC
        select CRYPTO
        select CRYPTO_MANAGER
        select CRYPTO_SKCIPHER
@@ -58,3 +57,5 @@ config RXKAD
          through the use of the key retention service.
 
          See Documentation/networking/rxrpc.rst.
+
+endif
index d17a66a..7dd3a2d 100644 (file)
@@ -495,7 +495,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
        p->tcfa_tm.install = jiffies;
        p->tcfa_tm.lastuse = jiffies;
        p->tcfa_tm.firstuse = 0;
-       p->tcfa_flags = flags;
+       p->tcfa_flags = flags & TCA_ACT_FLAGS_USER_MASK;
        if (est) {
                err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
                                        &p->tcfa_rate_est,
@@ -941,7 +941,7 @@ void tcf_idr_insert_many(struct tc_action *actions[])
        }
 }
 
-struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
                                         bool rtnl_held,
                                         struct netlink_ext_ack *extack)
 {
@@ -951,7 +951,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
        struct nlattr *kind;
        int err;
 
-       if (name == NULL) {
+       if (!police) {
                err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
                                                  tcf_action_policy, extack);
                if (err < 0)
@@ -967,7 +967,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
                        return ERR_PTR(err);
                }
        } else {
-               if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+               if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) {
                        NL_SET_ERR_MSG(extack, "TC action name too long");
                        return ERR_PTR(-EINVAL);
                }
@@ -1004,12 +1004,11 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                    struct nlattr *nla, struct nlattr *est,
-                                   char *name, int ovr, int bind,
                                    struct tc_action_ops *a_o, int *init_res,
-                                   bool rtnl_held,
-                                   struct netlink_ext_ack *extack)
+                                   u32 flags, struct netlink_ext_ack *extack)
 {
-       struct nla_bitfield32 flags = { 0, 0 };
+       bool police = flags & TCA_ACT_FLAGS_POLICE;
+       struct nla_bitfield32 userflags = { 0, 0 };
        u8 hw_stats = TCA_ACT_HW_STATS_ANY;
        struct nlattr *tb[TCA_ACT_MAX + 1];
        struct tc_cookie *cookie = NULL;
@@ -1017,7 +1016,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
        int err;
 
        /* backward compatibility for policer */
-       if (name == NULL) {
+       if (!police) {
                err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
                                                  tcf_action_policy, extack);
                if (err < 0)
@@ -1032,22 +1031,22 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                }
                hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
                if (tb[TCA_ACT_FLAGS])
-                       flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
+                       userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
 
-               err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
-                               rtnl_held, tp, flags.value, extack);
+               err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp,
+                               userflags.value | flags, extack);
        } else {
-               err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
-                               tp, flags.value, extack);
+               err = a_o->init(net, nla, est, &a, tp, userflags.value | flags,
+                               extack);
        }
        if (err < 0)
                goto err_out;
        *init_res = err;
 
-       if (!name && tb[TCA_ACT_COOKIE])
+       if (!police && tb[TCA_ACT_COOKIE])
                tcf_set_action_cookie(&a->act_cookie, cookie);
 
-       if (!name)
+       if (!police)
                a->hw_stats = hw_stats;
 
        return a;
@@ -1063,9 +1062,9 @@ err_out:
 /* Returns numbers of initialized actions or negative error. */
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
-                   struct nlattr *est, char *name, int ovr, int bind,
-                   struct tc_action *actions[], int init_res[], size_t *attr_size,
-                   bool rtnl_held, struct netlink_ext_ack *extack)
+                   struct nlattr *est, struct tc_action *actions[],
+                   int init_res[], size_t *attr_size, u32 flags,
+                   struct netlink_ext_ack *extack)
 {
        struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -1082,7 +1081,9 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
        for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
                struct tc_action_ops *a_o;
 
-               a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack);
+               a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE,
+                                        !(flags & TCA_ACT_FLAGS_NO_RTNL),
+                                        extack);
                if (IS_ERR(a_o)) {
                        err = PTR_ERR(a_o);
                        goto err_mod;
@@ -1091,9 +1092,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
        }
 
        for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-               act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
-                                       ops[i - 1], &init_res[i - 1], rtnl_held,
-                                       extack);
+               act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1],
+                                       &init_res[i - 1], flags, extack);
                if (IS_ERR(act)) {
                        err = PTR_ERR(act);
                        goto err;
@@ -1113,7 +1113,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
        goto err_mod;
 
 err:
-       tcf_action_destroy(actions, bind);
+       tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND);
 err_mod:
        for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
                if (ops[i])
@@ -1351,8 +1351,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
        module_put(ops->owner);
        err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
                             n->nlmsg_flags & NLM_F_ECHO);
-       if (err > 0)
-               return 0;
        if (err < 0)
                NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
 
@@ -1423,8 +1421,6 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
 
        ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
                             n->nlmsg_flags & NLM_F_ECHO);
-       if (ret > 0)
-               return 0;
        return ret;
 }
 
@@ -1481,7 +1477,6 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
               u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
        struct sk_buff *skb;
-       int err = 0;
 
        skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
                        GFP_KERNEL);
@@ -1495,15 +1490,12 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
                return -EINVAL;
        }
 
-       err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-                            n->nlmsg_flags & NLM_F_ECHO);
-       if (err > 0)
-               err = 0;
-       return err;
+       return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+                             n->nlmsg_flags & NLM_F_ECHO);
 }
 
 static int tcf_action_add(struct net *net, struct nlattr *nla,
-                         struct nlmsghdr *n, u32 portid, int ovr,
+                         struct nlmsghdr *n, u32 portid, u32 flags,
                          struct netlink_ext_ack *extack)
 {
        size_t attr_size = 0;
@@ -1512,8 +1504,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
        int init_res[TCA_ACT_MAX_PRIO] = {};
 
        for (loop = 0; loop < 10; loop++) {
-               ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0,
-                                     actions, init_res, &attr_size, true, extack);
+               ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res,
+                                     &attr_size, flags, extack);
                if (ret != -EAGAIN)
                        break;
        }
@@ -1543,7 +1535,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
        struct net *net = sock_net(skb->sk);
        struct nlattr *tca[TCA_ROOT_MAX + 1];
        u32 portid = NETLINK_CB(skb).portid;
-       int ret = 0, ovr = 0;
+       u32 flags = 0;
+       int ret = 0;
 
        if ((n->nlmsg_type != RTM_GETACTION) &&
            !netlink_capable(skb, CAP_NET_ADMIN))
@@ -1569,8 +1562,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
                 * is zero) then just set this
                 */
                if (n->nlmsg_flags & NLM_F_REPLACE)
-                       ovr = 1;
-               ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+                       flags = TCA_ACT_FLAGS_REPLACE;
+               ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags,
                                     extack);
                break;
        case RTM_DELACTION:
index e409a00..5c36013 100644 (file)
@@ -47,11 +47,11 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
        if (at_ingress) {
                __skb_push(skb, skb->mac_len);
                bpf_compute_data_pointers(skb);
-               filter_res = BPF_PROG_RUN(filter, skb);
+               filter_res = bpf_prog_run(filter, skb);
                __skb_pull(skb, skb->mac_len);
        } else {
                bpf_compute_data_pointers(skb);
-               filter_res = BPF_PROG_RUN(filter, skb);
+               filter_res = bpf_prog_run(filter, skb);
        }
        if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
                skb_orphan(skb);
@@ -275,11 +275,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **act,
-                       int replace, int bind, bool rtnl_held,
                        struct tcf_proto *tp, u32 flags,
                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, bpf_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tcf_bpf_cfg cfg, old;
@@ -317,7 +317,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
                if (bind)
                        return 0;
 
-               if (!replace) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*act, bind);
                        return -EEXIST;
                }
index e19885d..94e78ac 100644 (file)
@@ -96,12 +96,12 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
                             struct nlattr *est, struct tc_action **a,
-                            int ovr, int bind, bool rtnl_held,
                             struct tcf_proto *tp, u32 flags,
                             struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, connmark_net_id);
        struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct tcf_chain *goto_ch = NULL;
        struct tcf_connmark_info *ci;
        struct tc_connmark *parm;
@@ -144,7 +144,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
                ci = to_connmark(*a);
                if (bind)
                        return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index 4fa4fcb..a15ec95 100644 (file)
@@ -41,11 +41,12 @@ static unsigned int csum_net_id;
 static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
-                        struct nlattr *est, struct tc_action **a, int ovr,
-                        int bind, bool rtnl_held, struct tcf_proto *tp,
+                        struct nlattr *est, struct tc_action **a,
+                        struct tcf_proto *tp,
                         u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, csum_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct tcf_csum_params *params_new;
        struct nlattr *tb[TCA_CSUM_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
@@ -78,7 +79,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
        } else if (err > 0) {
                if (bind)/* dont override defaults */
                        return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index 1b4b351..ad9df0c 100644 (file)
@@ -1235,11 +1235,11 @@ static int tcf_ct_fill_params(struct net *net,
 
 static int tcf_ct_init(struct net *net, struct nlattr *nla,
                       struct nlattr *est, struct tc_action **a,
-                      int replace, int bind, bool rtnl_held,
                       struct tcf_proto *tp, u32 flags,
                       struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ct_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct tcf_ct_params *params = NULL;
        struct nlattr *tb[TCA_CT_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
@@ -1279,7 +1279,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
                if (bind)
                        return 0;
 
-               if (!replace) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index b20c8ce..549374a 100644 (file)
@@ -154,11 +154,11 @@ static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
 
 static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                           struct tcf_proto *tp, u32 flags,
                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        u32 dscpmask = 0, dscpstatemask, index;
        struct nlattr *tb[TCA_CTINFO_MAX + 1];
        struct tcf_ctinfo_params *cp_new;
@@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
        } else if (err > 0) {
                if (bind) /* don't override defaults */
                        return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index 73c3926..d8dce17 100644 (file)
@@ -52,11 +52,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, gact_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_GACT_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tc_gact *parm;
@@ -109,7 +109,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
        } else if (err > 0) {
                if (bind)/* dont override defaults */
                        return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index a78cb79..7df72a4 100644 (file)
@@ -295,12 +295,12 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
 
 static int tcf_gate_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, gate_net_id);
        enum tk_offsets tk_offset = TK_OFFS_TAI;
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_GATE_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        u64 cycletime = 0, basetime = 0;
@@ -364,7 +364,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
                }
 
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
index a2ddea0..7064a36 100644 (file)
@@ -479,11 +479,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **a,
-                       int ovr, int bind, bool rtnl_held,
                        struct tcf_proto *tp, u32 flags,
                        struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, ife_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_IFE_MAX + 1];
        struct nlattr *tb2[IFE_META_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
@@ -532,7 +532,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
                        kfree(p);
                        return err;
                }
-               err = load_metalist(tb2, rtnl_held);
+               err = load_metalist(tb2, !(flags & TCA_ACT_FLAGS_NO_RTNL));
                if (err) {
                        kfree(p);
                        return err;
@@ -560,7 +560,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
                        return ret;
                }
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                kfree(p);
                return -EEXIST;
@@ -600,7 +600,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
        }
 
        if (tb[TCA_IFE_METALST]) {
-               err = populate_metalist(ife, tb2, exists, rtnl_held);
+               err = populate_metalist(ife, tb2, exists,
+                                       !(flags & TCA_ACT_FLAGS_NO_RTNL));
                if (err)
                        goto metadata_parse_err;
        } else {
index ac7297f..265b144 100644 (file)
@@ -94,10 +94,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
 
 static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
-                         const struct tc_action_ops *ops, int ovr, int bind,
+                         const struct tc_action_ops *ops,
                          struct tcf_proto *tp, u32 flags)
 {
        struct tc_action_net *tn = net_generic(net, id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_IPT_MAX + 1];
        struct tcf_ipt *ipt;
        struct xt_entry_target *td, *t;
@@ -154,7 +155,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
                if (bind)/* dont override defaults */
                        return 0;
 
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
@@ -201,21 +202,21 @@ err1:
 }
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
-                       struct nlattr *est, struct tc_action **a, int ovr,
-                       int bind, bool rtnl_held, struct tcf_proto *tp,
+                       struct nlattr *est, struct tc_action **a,
+                       struct tcf_proto *tp,
                        u32 flags, struct netlink_ext_ack *extack)
 {
-       return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
-                             bind, tp, flags);
+       return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops,
+                             tp, flags);
 }
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
-                      struct nlattr *est, struct tc_action **a, int ovr,
-                      int bind, bool unlocked, struct tcf_proto *tp,
+                      struct nlattr *est, struct tc_action **a,
+                      struct tcf_proto *tp,
                       u32 flags, struct netlink_ext_ack *extack)
 {
-       return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
-                             bind, tp, flags);
+       return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops,
+                             tp, flags);
 }
 
 static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
index 2ef4cd2..d64b0ee 100644 (file)
@@ -78,8 +78,7 @@ static void tcf_mirred_release(struct tc_action *a)
 
        /* last reference to action, no need to lock */
        dev = rcu_dereference_protected(m->tcfm_dev, 1);
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
 }
 
 static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -91,11 +90,11 @@ static struct tc_action_ops act_mirred_ops;
 
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                           struct tcf_proto *tp,
                           u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, mirred_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_MIRRED_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        bool mac_header_xmit = false;
@@ -155,7 +154,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                        return ret;
                }
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
@@ -180,8 +179,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                mac_header_xmit = dev_is_mac_header_xmit(dev);
                dev = rcu_replace_pointer(m->tcfm_dev, dev,
                                          lockdep_is_held(&m->tcf_lock));
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
                m->tcfm_mac_header_xmit = mac_header_xmit;
        }
        goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
index d1486ea..e4529b4 100644 (file)
@@ -152,11 +152,11 @@ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
 
 static int tcf_mpls_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, mpls_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_MPLS_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tcf_mpls_params *p;
@@ -255,7 +255,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
                }
 
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
index 1ebd2a8..7dd6b58 100644 (file)
@@ -34,11 +34,11 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 };
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
-                       struct tc_action **a, int ovr, int bind,
-                       bool rtnl_held, struct tcf_proto *tp,
+                       struct tc_action **a, struct tcf_proto *tp,
                        u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, nat_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_NAT_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tc_nat *parm;
@@ -70,7 +70,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
        } else if (err > 0) {
                if (bind)
                        return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index b453044..c6c862c 100644 (file)
@@ -136,11 +136,11 @@ nla_failure:
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
-                         int ovr, int bind, bool rtnl_held,
                          struct tcf_proto *tp, u32 flags,
                          struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, pedit_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_PEDIT_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tc_pedit_key *keys = NULL;
@@ -198,7 +198,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
        } else if (err > 0) {
                if (bind)
                        goto out_free;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        ret = -EEXIST;
                        goto out_release;
                }
index 0fab8de..832157a 100644 (file)
@@ -48,11 +48,11 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 
 static int tcf_police_init(struct net *net, struct nlattr *nla,
                               struct nlattr *est, struct tc_action **a,
-                              int ovr, int bind, bool rtnl_held,
                               struct tcf_proto *tp, u32 flags,
                               struct netlink_ext_ack *extack)
 {
        int ret = 0, tcfp_result = TC_ACT_OK, err, size;
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_POLICE_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tc_police *parm;
@@ -97,7 +97,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
                }
                ret = ACT_P_CREATED;
                spin_lock_init(&(to_police(*a)->tcfp_lock));
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
index 6a0c16e..230501e 100644 (file)
@@ -34,11 +34,12 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 };
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
-                          struct nlattr *est, struct tc_action **a, int ovr,
-                          int bind, bool rtnl_held, struct tcf_proto *tp,
+                          struct nlattr *est, struct tc_action **a,
+                          struct tcf_proto *tp,
                           u32 flags, struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, sample_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_SAMPLE_MAX + 1];
        struct psample_group *psample_group;
        u32 psample_group_num, rate, index;
@@ -75,7 +76,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
                        return ret;
                }
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
index 726cc95..cbbe186 100644 (file)
@@ -85,11 +85,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, simp_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_DEF_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        struct tc_defact *parm;
@@ -147,7 +147,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
                tcf_action_set_ctrlact(*a, parm->action, goto_ch);
                ret = ACT_P_CREATED;
        } else {
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                        err = -EEXIST;
                        goto release_idr;
                }
index e5f3fb8..6054185 100644 (file)
@@ -96,11 +96,11 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                            struct nlattr *est, struct tc_action **a,
-                           int ovr, int bind, bool rtnl_held,
                            struct tcf_proto *tp, u32 act_flags,
                            struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+       bool bind = act_flags & TCA_ACT_FLAGS_BIND;
        struct tcf_skbedit_params *params_new;
        struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
@@ -186,7 +186,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                ret = ACT_P_CREATED;
        } else {
                d = to_skbedit(*a);
-               if (!ovr) {
+               if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
                        tcf_idr_release(*a, bind);
                        return -EEXIST;
                }
index 8d17a54..ecb9ee6 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+#include <net/inet_ecn.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 static unsigned int skbmod_net_id;
 static struct tc_action_ops act_skbmod_ops;
 
-#define MAX_EDIT_LEN ETH_HLEN
 static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
                          struct tcf_result *res)
 {
        struct tcf_skbmod *d = to_skbmod(a);
-       int action;
+       int action, max_edit_len, err;
        struct tcf_skbmod_params *p;
        u64 flags;
-       int err;
 
        tcf_lastuse_update(&d->tcf_tm);
        bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
@@ -38,19 +37,34 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
        if (unlikely(action == TC_ACT_SHOT))
                goto drop;
 
-       if (!skb->dev || skb->dev->type != ARPHRD_ETHER)
-               return action;
+       max_edit_len = skb_mac_header_len(skb);
+       p = rcu_dereference_bh(d->skbmod_p);
+       flags = p->flags;
+
+       /* tcf_skbmod_init() guarantees "flags" to be one of the following:
+        *      1. a combination of SKBMOD_F_{DMAC,SMAC,ETYPE}
+        *      2. SKBMOD_F_SWAPMAC
+        *      3. SKBMOD_F_ECN
+        * SKBMOD_F_ECN only works with IP packets; all other flags only work with Ethernet
+        * packets.
+        */
+       if (flags == SKBMOD_F_ECN) {
+               switch (skb_protocol(skb, true)) {
+               case cpu_to_be16(ETH_P_IP):
+               case cpu_to_be16(ETH_P_IPV6):
+                       max_edit_len += skb_network_header_len(skb);
+                       break;
+               default:
+                       goto out;
+               }
+       } else if (!skb->dev || skb->dev->type != ARPHRD_ETHER) {
+               goto out;
+       }
 
-       /* XXX: if you are going to edit more fields beyond ethernet header
-        * (example when you add IP header replacement or vlan swap)
-        * then MAX_EDIT_LEN needs to change appropriately
-       */
-       err = skb_ensure_writable(skb, MAX_EDIT_LEN);
+       err = skb_ensure_writable(skb, max_edit_len);
        if (unlikely(err)) /* best policy is to drop on the floor */
                goto drop;
 
-       p = rcu_dereference_bh(d->skbmod_p);
-       flags = p->flags;
        if (flags & SKBMOD_F_DMAC)
                ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
        if (flags & SKBMOD_F_SMAC)
@@ -66,6 +80,10 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
                ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr);
        }
 
+       if (flags & SKBMOD_F_ECN)
+               INET_ECN_set_ce(skb);
+
+out:
        return action;
 
 drop:
@@ -82,11 +100,12 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                           struct tcf_proto *tp, u32 flags,
                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+       bool ovr = flags & TCA_ACT_FLAGS_REPLACE;
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_SKBMOD_MAX + 1];
        struct tcf_skbmod_params *p, *p_old;
        struct tcf_chain *goto_ch = NULL;
@@ -129,6 +148,8 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
        index = parm->index;
        if (parm->flags & SKBMOD_F_SWAPMAC)
                lflags = SKBMOD_F_SWAPMAC;
+       if (parm->flags & SKBMOD_F_ECN)
+               lflags = SKBMOD_F_ECN;
 
        err = tcf_idr_check_alloc(tn, &index, a, bind);
        if (err < 0)
index 85c0d0d..d9cd174 100644 (file)
@@ -355,11 +355,11 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                           struct tcf_proto *tp, u32 act_flags,
                           struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+       bool bind = act_flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
        struct tcf_tunnel_key_params *params_new;
        struct metadata_dst *metadata = NULL;
@@ -504,7 +504,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                }
 
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
                NL_SET_ERR_MSG(extack, "TC IDR already exists");
                ret = -EEXIST;
                goto release_tun_meta;
index 71f2015..e4dc5a5 100644 (file)
@@ -114,11 +114,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        struct tc_action_net *tn = net_generic(net, vlan_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
        struct nlattr *tb[TCA_VLAN_MAX + 1];
        struct tcf_chain *goto_ch = NULL;
        bool push_prio_exists = false;
@@ -223,7 +223,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                }
 
                ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                tcf_idr_release(*a, bind);
                return -EEXIST;
        }
index e3e79e9..2ef8f5a 100644 (file)
@@ -634,6 +634,7 @@ static void tcf_block_offload_init(struct flow_block_offload *bo,
        bo->block_shared = shared;
        bo->extack = extack;
        bo->sch = sch;
+       bo->cb_list_head = &flow_block->cb_list;
        INIT_LIST_HEAD(&bo->cb_list);
 }
 
@@ -1577,20 +1578,10 @@ reset:
 #endif
 }
 
-int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+int tcf_classify(struct sk_buff *skb,
+                const struct tcf_block *block,
+                const struct tcf_proto *tp,
                 struct tcf_result *res, bool compat_mode)
-{
-       u32 last_executed_chain = 0;
-
-       return __tcf_classify(skb, tp, tp, res, compat_mode,
-                             &last_executed_chain);
-}
-EXPORT_SYMBOL(tcf_classify);
-
-int tcf_classify_ingress(struct sk_buff *skb,
-                        const struct tcf_block *ingress_block,
-                        const struct tcf_proto *tp,
-                        struct tcf_result *res, bool compat_mode)
 {
 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
        u32 last_executed_chain = 0;
@@ -1603,20 +1594,22 @@ int tcf_classify_ingress(struct sk_buff *skb,
        struct tc_skb_ext *ext;
        int ret;
 
-       ext = skb_ext_find(skb, TC_SKB_EXT);
+       if (block) {
+               ext = skb_ext_find(skb, TC_SKB_EXT);
 
-       if (ext && ext->chain) {
-               struct tcf_chain *fchain;
+               if (ext && ext->chain) {
+                       struct tcf_chain *fchain;
 
-               fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain);
-               if (!fchain)
-                       return TC_ACT_SHOT;
+                       fchain = tcf_chain_lookup_rcu(block, ext->chain);
+                       if (!fchain)
+                               return TC_ACT_SHOT;
 
-               /* Consume, so cloned/redirect skbs won't inherit ext */
-               skb_ext_del(skb, TC_SKB_EXT);
+                       /* Consume, so cloned/redirect skbs won't inherit ext */
+                       skb_ext_del(skb, TC_SKB_EXT);
 
-               tp = rcu_dereference_bh(fchain->filter_chain);
-               last_executed_chain = fchain->index;
+                       tp = rcu_dereference_bh(fchain->filter_chain);
+                       last_executed_chain = fchain->index;
+               }
        }
 
        ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
@@ -1635,7 +1628,7 @@ int tcf_classify_ingress(struct sk_buff *skb,
        return ret;
 #endif
 }
-EXPORT_SYMBOL(tcf_classify_ingress);
+EXPORT_SYMBOL(tcf_classify);
 
 struct tcf_chain_info {
        struct tcf_proto __rcu **pprev;
@@ -1870,13 +1863,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
        }
 
        if (unicast)
-               err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+               err = rtnl_unicast(skb, net, portid);
        else
                err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
                                     n->nlmsg_flags & NLM_F_ECHO);
-
-       if (err > 0)
-               err = 0;
        return err;
 }
 
@@ -1909,15 +1899,13 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
        }
 
        if (unicast)
-               err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+               err = rtnl_unicast(skb, net, portid);
        else
                err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
                                     n->nlmsg_flags & NLM_F_ECHO);
        if (err < 0)
                NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
 
-       if (err > 0)
-               err = 0;
        return err;
 }
 
@@ -1962,6 +1950,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
        int err;
        int tp_created;
        bool rtnl_held = false;
+       u32 flags;
 
        if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
                return -EPERM;
@@ -1982,6 +1971,7 @@ replay:
        tp = NULL;
        cl = 0;
        block = NULL;
+       flags = 0;
 
        if (prio == 0) {
                /* If no priority is provided by the user,
@@ -2125,9 +2115,12 @@ replay:
                goto errout;
        }
 
+       if (!(n->nlmsg_flags & NLM_F_CREATE))
+               flags |= TCA_ACT_FLAGS_REPLACE;
+       if (!rtnl_held)
+               flags |= TCA_ACT_FLAGS_NO_RTNL;
        err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
-                             n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
-                             rtnl_held, extack);
+                             flags, extack);
        if (err == 0) {
                tfilter_notify(net, skb, n, tp, block, q, parent, fh,
                               RTM_NEWTFILTER, false, rtnl_held);
@@ -2711,13 +2704,11 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
        }
 
        if (unicast)
-               err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+               err = rtnl_unicast(skb, net, portid);
        else
                err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
                                     flags & NLM_F_ECHO);
 
-       if (err > 0)
-               err = 0;
        return err;
 }
 
@@ -2741,7 +2732,7 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
        }
 
        if (unicast)
-               return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+               return rtnl_unicast(skb, net, portid);
 
        return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
 }
@@ -3035,8 +3026,8 @@ void tcf_exts_destroy(struct tcf_exts *exts)
 EXPORT_SYMBOL(tcf_exts_destroy);
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
-                     struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
-                     bool rtnl_held, struct netlink_ext_ack *extack)
+                     struct nlattr *rate_tlv, struct tcf_exts *exts,
+                     u32 flags, struct netlink_ext_ack *extack)
 {
 #ifdef CONFIG_NET_CLS_ACT
        {
@@ -3047,13 +3038,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                if (exts->police && tb[exts->police]) {
                        struct tc_action_ops *a_o;
 
-                       a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
+                       a_o = tc_action_load_ops(tb[exts->police], true,
+                                                !(flags & TCA_ACT_FLAGS_NO_RTNL),
+                                                extack);
                        if (IS_ERR(a_o))
                                return PTR_ERR(a_o);
+                       flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
                        act = tcf_action_init_1(net, tp, tb[exts->police],
-                                               rate_tlv, "police", ovr,
-                                               TCA_ACT_BIND, a_o, init_res,
-                                               rtnl_held, extack);
+                                               rate_tlv, a_o, init_res, flags,
+                                               extack);
                        module_put(a_o->owner);
                        if (IS_ERR(act))
                                return PTR_ERR(act);
@@ -3065,10 +3058,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                } else if (exts->action && tb[exts->action]) {
                        int err;
 
+                       flags |= TCA_ACT_FLAGS_BIND;
                        err = tcf_action_init(net, tp, tb[exts->action],
-                                             rate_tlv, NULL, ovr, TCA_ACT_BIND,
-                                             exts->actions, init_res,
-                                             &attr_size, rtnl_held, extack);
+                                             rate_tlv, exts->actions, init_res,
+                                             &attr_size, flags, extack);
                        if (err < 0)
                                return err;
                        exts->nr_actions = err;
@@ -3832,7 +3825,7 @@ struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, stru
 
        fl = rcu_dereference_bh(qe->filter_chain);
 
-       switch (tcf_classify(skb, fl, &cl_res, false)) {
+       switch (tcf_classify(skb, NULL, fl, &cl_res, false)) {
        case TC_ACT_SHOT:
                qdisc_qstats_drop(sch);
                __qdisc_drop(skb, to_free);
index f256a7c..8158fc9 100644 (file)
@@ -145,12 +145,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
 static int basic_set_parms(struct net *net, struct tcf_proto *tp,
                           struct basic_filter *f, unsigned long base,
                           struct nlattr **tb,
-                          struct nlattr *est, bool ovr,
+                          struct nlattr *est, u32 flags,
                           struct netlink_ext_ack *extack)
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
        if (err < 0)
                return err;
 
@@ -169,8 +169,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 
 static int basic_change(struct net *net, struct sk_buff *in_skb,
                        struct tcf_proto *tp, unsigned long base, u32 handle,
-                       struct nlattr **tca, void **arg, bool ovr,
-                       bool rtnl_held, struct netlink_ext_ack *extack)
+                       struct nlattr **tca, void **arg,
+                       u32 flags, struct netlink_ext_ack *extack)
 {
        int err;
        struct basic_head *head = rtnl_dereference(tp->root);
@@ -216,7 +216,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
                goto errout;
        }
 
-       err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
+       err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], flags,
                              extack);
        if (err < 0) {
                if (!fold)
index fa739ef..df19a84 100644 (file)
@@ -96,11 +96,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
                        /* It is safe to push/pull even if skb_shared() */
                        __skb_push(skb, skb->mac_len);
                        bpf_compute_data_pointers(skb);
-                       filter_res = BPF_PROG_RUN(prog->filter, skb);
+                       filter_res = bpf_prog_run(prog->filter, skb);
                        __skb_pull(skb, skb->mac_len);
                } else {
                        bpf_compute_data_pointers(skb);
-                       filter_res = BPF_PROG_RUN(prog->filter, skb);
+                       filter_res = bpf_prog_run(prog->filter, skb);
                }
 
                if (prog->exts_integrated) {
@@ -404,7 +404,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
 
 static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
                             struct cls_bpf_prog *prog, unsigned long base,
-                            struct nlattr **tb, struct nlattr *est, bool ovr,
+                            struct nlattr **tb, struct nlattr *est, u32 flags,
                             struct netlink_ext_ack *extack)
 {
        bool is_bpf, is_ebpf, have_exts = false;
@@ -416,7 +416,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
        if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
                return -EINVAL;
 
-       ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+       ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
                                extack);
        if (ret < 0)
                return ret;
@@ -455,7 +455,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
                          struct tcf_proto *tp, unsigned long base,
                          u32 handle, struct nlattr **tca,
-                         void **arg, bool ovr, bool rtnl_held,
+                         void **arg, u32 flags,
                          struct netlink_ext_ack *extack)
 {
        struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -500,7 +500,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
                goto errout;
        prog->handle = handle;
 
-       ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr,
+       ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
                                extack);
        if (ret < 0)
                goto errout_idr;
index fb88114..ed00001 100644 (file)
@@ -76,7 +76,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work)
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
                             struct tcf_proto *tp, unsigned long base,
                             u32 handle, struct nlattr **tca,
-                            void **arg, bool ovr, bool rtnl_held,
+                            void **arg, u32 flags,
                             struct netlink_ext_ack *extack)
 {
        struct nlattr *tb[TCA_CGROUP_MAX + 1];
@@ -108,8 +108,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
        if (err < 0)
                goto errout;
 
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
-                               true, extack);
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags,
+                               extack);
        if (err < 0)
                goto errout;
 
index 87398af..972303a 100644 (file)
@@ -387,7 +387,7 @@ static void flow_destroy_filter_work(struct work_struct *work)
 static int flow_change(struct net *net, struct sk_buff *in_skb,
                       struct tcf_proto *tp, unsigned long base,
                       u32 handle, struct nlattr **tca,
-                      void **arg, bool ovr, bool rtnl_held,
+                      void **arg, u32 flags,
                       struct netlink_ext_ack *extack)
 {
        struct flow_head *head = rtnl_dereference(tp->root);
@@ -442,8 +442,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
        if (err < 0)
                goto err2;
 
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
-                               true, extack);
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags,
+                               extack);
        if (err < 0)
                goto err2;
 
index d7869a9..23b2125 100644 (file)
@@ -1915,23 +1915,22 @@ errout_cleanup:
 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
                        struct cls_fl_filter *f, struct fl_flow_mask *mask,
                        unsigned long base, struct nlattr **tb,
-                       struct nlattr *est, bool ovr,
-                       struct fl_flow_tmplt *tmplt, bool rtnl_held,
+                       struct nlattr *est,
+                       struct fl_flow_tmplt *tmplt, u32 flags,
                        struct netlink_ext_ack *extack)
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held,
-                               extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
        if (err < 0)
                return err;
 
        if (tb[TCA_FLOWER_CLASSID]) {
                f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
-               if (!rtnl_held)
+               if (flags & TCA_ACT_FLAGS_NO_RTNL)
                        rtnl_lock();
                tcf_bind_filter(tp, &f->res, base);
-               if (!rtnl_held)
+               if (flags & TCA_ACT_FLAGS_NO_RTNL)
                        rtnl_unlock();
        }
 
@@ -1975,10 +1974,11 @@ static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
 static int fl_change(struct net *net, struct sk_buff *in_skb,
                     struct tcf_proto *tp, unsigned long base,
                     u32 handle, struct nlattr **tca,
-                    void **arg, bool ovr, bool rtnl_held,
+                    void **arg, u32 flags,
                     struct netlink_ext_ack *extack)
 {
        struct cls_fl_head *head = fl_head_dereference(tp);
+       bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
        struct cls_fl_filter *fold = *arg;
        struct cls_fl_filter *fnew;
        struct fl_flow_mask *mask;
@@ -2034,8 +2034,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
                }
        }
 
-       err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
-                          tp->chain->tmplt_priv, rtnl_held, extack);
+       err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
+                          tp->chain->tmplt_priv, flags, extack);
        if (err)
                goto errout;
 
index ec94529..8654b0c 100644 (file)
@@ -198,15 +198,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
 
 static int fw_set_parms(struct net *net, struct tcf_proto *tp,
                        struct fw_filter *f, struct nlattr **tb,
-                       struct nlattr **tca, unsigned long base, bool ovr,
+                       struct nlattr **tca, unsigned long base, u32 flags,
                        struct netlink_ext_ack *extack)
 {
        struct fw_head *head = rtnl_dereference(tp->root);
        u32 mask;
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
-                               true, extack);
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags,
+                               extack);
        if (err < 0)
                return err;
 
@@ -237,8 +237,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
 static int fw_change(struct net *net, struct sk_buff *in_skb,
                     struct tcf_proto *tp, unsigned long base,
                     u32 handle, struct nlattr **tca, void **arg,
-                    bool ovr, bool rtnl_held,
-                    struct netlink_ext_ack *extack)
+                    u32 flags, struct netlink_ext_ack *extack)
 {
        struct fw_head *head = rtnl_dereference(tp->root);
        struct fw_filter *f = *arg;
@@ -277,7 +276,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
                        return err;
                }
 
-               err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack);
+               err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack);
                if (err < 0) {
                        tcf_exts_destroy(&fnew->exts);
                        kfree(fnew);
@@ -326,7 +325,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
        f->id = handle;
        f->tp = tp;
 
-       err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack);
+       err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack);
        if (err < 0)
                goto errout;
 
index cafb844..24f0046 100644 (file)
@@ -163,13 +163,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
 static int mall_set_parms(struct net *net, struct tcf_proto *tp,
                          struct cls_mall_head *head,
                          unsigned long base, struct nlattr **tb,
-                         struct nlattr *est, bool ovr,
+                         struct nlattr *est, u32 flags,
                          struct netlink_ext_ack *extack)
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
-                               extack);
+       err = tcf_exts_validate(net, tp, tb, est, &head->exts, flags, extack);
        if (err < 0)
                return err;
 
@@ -183,13 +182,13 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 static int mall_change(struct net *net, struct sk_buff *in_skb,
                       struct tcf_proto *tp, unsigned long base,
                       u32 handle, struct nlattr **tca,
-                      void **arg, bool ovr, bool rtnl_held,
+                      void **arg, u32 flags,
                       struct netlink_ext_ack *extack)
 {
        struct cls_mall_head *head = rtnl_dereference(tp->root);
        struct nlattr *tb[TCA_MATCHALL_MAX + 1];
        struct cls_mall_head *new;
-       u32 flags = 0;
+       u32 userflags = 0;
        int err;
 
        if (!tca[TCA_OPTIONS])
@@ -204,8 +203,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
                return err;
 
        if (tb[TCA_MATCHALL_FLAGS]) {
-               flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
-               if (!tc_flags_valid(flags))
+               userflags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
+               if (!tc_flags_valid(userflags))
                        return -EINVAL;
        }
 
@@ -220,14 +219,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
        if (!handle)
                handle = 1;
        new->handle = handle;
-       new->flags = flags;
+       new->flags = userflags;
        new->pf = alloc_percpu(struct tc_matchall_pcnt);
        if (!new->pf) {
                err = -ENOMEM;
                goto err_alloc_percpu;
        }
 
-       err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
+       err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], flags,
                             extack);
        if (err)
                goto err_set_parms;
index 5efa3e7..a35ab8c 100644 (file)
@@ -382,7 +382,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
                            unsigned long base, struct route4_filter *f,
                            u32 handle, struct route4_head *head,
                            struct nlattr **tb, struct nlattr *est, int new,
-                           bool ovr, struct netlink_ext_ack *extack)
+                           u32 flags, struct netlink_ext_ack *extack)
 {
        u32 id = 0, to = 0, nhandle = 0x8000;
        struct route4_filter *fp;
@@ -390,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
        struct route4_bucket *b;
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
        if (err < 0)
                return err;
 
@@ -464,8 +464,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 
 static int route4_change(struct net *net, struct sk_buff *in_skb,
                         struct tcf_proto *tp, unsigned long base, u32 handle,
-                        struct nlattr **tca, void **arg, bool ovr,
-                        bool rtnl_held, struct netlink_ext_ack *extack)
+                        struct nlattr **tca, void **arg, u32 flags,
+                        struct netlink_ext_ack *extack)
 {
        struct route4_head *head = rtnl_dereference(tp->root);
        struct route4_filter __rcu **fp;
@@ -510,7 +510,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
        }
 
        err = route4_set_parms(net, tp, base, f, handle, head, tb,
-                              tca[TCA_RATE], new, ovr, extack);
+                              tca[TCA_RATE], new, flags, extack);
        if (err < 0)
                goto errout;
 
index 27a4b6d..5cd9d6b 100644 (file)
@@ -470,9 +470,8 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
 
 static int rsvp_change(struct net *net, struct sk_buff *in_skb,
                       struct tcf_proto *tp, unsigned long base,
-                      u32 handle,
-                      struct nlattr **tca,
-                      void **arg, bool ovr, bool rtnl_held,
+                      u32 handle, struct nlattr **tca,
+                      void **arg, u32 flags,
                       struct netlink_ext_ack *extack)
 {
        struct rsvp_head *data = rtnl_dereference(tp->root);
@@ -499,7 +498,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
        err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
        if (err < 0)
                return err;
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags,
                                extack);
        if (err < 0)
                goto errout2;
index e9a8a2c..742c7d4 100644 (file)
@@ -330,7 +330,7 @@ static int
 tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
                  u32 handle, struct tcindex_data *p,
                  struct tcindex_filter_result *r, struct nlattr **tb,
-                 struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
+                 struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
 {
        struct tcindex_filter_result new_filter_result, *old_r = r;
        struct tcindex_data *cp = NULL, *oldp;
@@ -342,7 +342,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
        err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
        if (err < 0)
                return err;
-       err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack);
        if (err < 0)
                goto errout;
 
@@ -529,8 +529,8 @@ errout:
 static int
 tcindex_change(struct net *net, struct sk_buff *in_skb,
               struct tcf_proto *tp, unsigned long base, u32 handle,
-              struct nlattr **tca, void **arg, bool ovr,
-              bool rtnl_held, struct netlink_ext_ack *extack)
+              struct nlattr **tca, void **arg, u32 flags,
+              struct netlink_ext_ack *extack)
 {
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -551,7 +551,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
                return err;
 
        return tcindex_set_parms(net, tp, base, handle, p, r, tb,
-                                tca[TCA_RATE], ovr, extack);
+                                tca[TCA_RATE], flags, extack);
 }
 
 static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
index 6e1abe8..4272814 100644 (file)
@@ -709,12 +709,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
 static int u32_set_parms(struct net *net, struct tcf_proto *tp,
                         unsigned long base,
                         struct tc_u_knode *n, struct nlattr **tb,
-                        struct nlattr *est, bool ovr,
+                        struct nlattr *est, u32 flags,
                         struct netlink_ext_ack *extack)
 {
        int err;
 
-       err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack);
        if (err < 0)
                return err;
 
@@ -840,7 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
 
 static int u32_change(struct net *net, struct sk_buff *in_skb,
                      struct tcf_proto *tp, unsigned long base, u32 handle,
-                     struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
+                     struct nlattr **tca, void **arg, u32 flags,
                      struct netlink_ext_ack *extack)
 {
        struct tc_u_common *tp_c = tp->data;
@@ -849,7 +849,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
        struct tc_u32_sel *s;
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct nlattr *tb[TCA_U32_MAX + 1];
-       u32 htid, flags = 0;
+       u32 htid, userflags = 0;
        size_t sel_size;
        int err;
 
@@ -868,8 +868,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                return err;
 
        if (tb[TCA_U32_FLAGS]) {
-               flags = nla_get_u32(tb[TCA_U32_FLAGS]);
-               if (!tc_flags_valid(flags)) {
+               userflags = nla_get_u32(tb[TCA_U32_FLAGS]);
+               if (!tc_flags_valid(userflags)) {
                        NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
                        return -EINVAL;
                }
@@ -884,7 +884,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                        return -EINVAL;
                }
 
-               if ((n->flags ^ flags) &
+               if ((n->flags ^ userflags) &
                    ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
                        NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
                        return -EINVAL;
@@ -895,7 +895,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                        return -ENOMEM;
 
                err = u32_set_parms(net, tp, base, new, tb,
-                                   tca[TCA_RATE], ovr, extack);
+                                   tca[TCA_RATE], flags, extack);
 
                if (err) {
                        u32_destroy_key(new, false);
@@ -955,9 +955,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                ht->handle = handle;
                ht->prio = tp->prio;
                idr_init(&ht->handle_idr);
-               ht->flags = flags;
+               ht->flags = userflags;
 
-               err = u32_replace_hw_hnode(tp, ht, flags, extack);
+               err = u32_replace_hw_hnode(tp, ht, userflags, extack);
                if (err) {
                        idr_remove(&tp_c->handle_idr, handle);
                        kfree(ht);
@@ -1038,7 +1038,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
        RCU_INIT_POINTER(n->ht_up, ht);
        n->handle = handle;
        n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
-       n->flags = flags;
+       n->flags = userflags;
 
        err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
        if (err < 0)
@@ -1060,7 +1060,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
        }
 #endif
 
-       err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
+       err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], flags,
                            extack);
        if (err == 0) {
                struct tc_u_knode __rcu **ins;
index f87d077..5e90e9b 100644 (file)
@@ -1845,7 +1845,6 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
 {
        struct sk_buff *skb;
        u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
-       int err = 0;
 
        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb)
@@ -1856,11 +1855,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
                return -EINVAL;
        }
 
-       err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
-                            n->nlmsg_flags & NLM_F_ECHO);
-       if (err > 0)
-               err = 0;
-       return err;
+       return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+                             n->nlmsg_flags & NLM_F_ECHO);
 }
 
 static int tclass_del_notify(struct net *net,
@@ -1894,8 +1890,6 @@ static int tclass_del_notify(struct net *net,
 
        err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
                             n->nlmsg_flags & NLM_F_ECHO);
-       if (err > 0)
-               err = 0;
        return err;
 }
 
index d0c9a57..7d85181 100644 (file)
@@ -394,7 +394,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                list_for_each_entry(flow, &p->flows, list) {
                        fl = rcu_dereference_bh(flow->filter_list);
                        if (fl) {
-                               result = tcf_classify(skb, fl, &res, true);
+                               result = tcf_classify(skb, NULL, fl, &res, true);
                                if (result < 0)
                                        continue;
                                flow = (struct atm_flow_data *)res.class;
index 28af8b1..3c2300d 100644 (file)
@@ -1665,7 +1665,7 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
                goto hash;
 
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-       result = tcf_classify(skb, filter, &res, false);
+       result = tcf_classify(skb, NULL, filter, &res, false);
 
        if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
index b79a7e2..e0da155 100644 (file)
@@ -228,7 +228,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
                /*
                 * Step 2+n. Apply classifier.
                 */
-               result = tcf_classify(skb, fl, &res, true);
+               result = tcf_classify(skb, NULL, fl, &res, true);
                if (!fl || result < 0)
                        goto fallback;
 
@@ -1614,7 +1614,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
        err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
        if (err) {
                kfree(cl);
-               return err;
+               goto failure;
        }
 
        if (tca[TCA_RATE]) {
index fc1e470..642cd17 100644 (file)
@@ -317,7 +317,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
 
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
        fl = rcu_dereference_bh(q->filter_list);
-       result = tcf_classify(skb, fl, &res, false);
+       result = tcf_classify(skb, NULL, fl, &res, false);
        if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                switch (result) {
index d320bcf..4c100d1 100644 (file)
@@ -242,7 +242,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
        else {
                struct tcf_result res;
                struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
-               int result = tcf_classify(skb, fl, &res, false);
+               int result = tcf_classify(skb, NULL, fl, &res, false);
 
                pr_debug("result %d class 0x%04x\n", result, res.classid);
 
index c76701a..1f857ff 100644 (file)
@@ -390,7 +390,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
        if (TC_H_MAJ(skb->priority) != sch->handle) {
                fl = rcu_dereference_bh(q->filter_list);
-               err = tcf_classify(skb, fl, &res, false);
+               err = tcf_classify(skb, NULL, fl, &res, false);
 #ifdef CONFIG_NET_CLS_ACT
                switch (err) {
                case TC_ACT_STOLEN:
index bbd5f87..c4afdd0 100644 (file)
@@ -91,7 +91,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
                return fq_codel_hash(q, skb) + 1;
 
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-       result = tcf_classify(skb, filter, &res, false);
+       result = tcf_classify(skb, NULL, filter, &res, false);
        if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                switch (result) {
index cac6849..830f355 100644 (file)
@@ -94,7 +94,7 @@ static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch,
                return fq_pie_hash(q, skb) + 1;
 
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-       result = tcf_classify(skb, filter, &res, false);
+       result = tcf_classify(skb, NULL, filter, &res, false);
        if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                switch (result) {
index bf0034c..b7ac30c 100644 (file)
@@ -1130,7 +1130,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
        head = &q->root;
        tcf = rcu_dereference_bh(q->root.filter_list);
-       while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
+       while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                switch (result) {
                case TC_ACT_QUEUED:
index 5f7ac27..5067a6e 100644 (file)
@@ -125,6 +125,7 @@ struct htb_class {
                struct htb_class_leaf {
                        int             deficit[TC_HTB_MAXDEPTH];
                        struct Qdisc    *q;
+                       struct netdev_queue *offload_queue;
                } leaf;
                struct htb_class_inner {
                        struct htb_prio clprio[TC_HTB_NUMPRIO];
@@ -238,7 +239,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
        }
 
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-       while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
+       while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                switch (result) {
                case TC_ACT_QUEUED:
@@ -1411,24 +1412,47 @@ htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
        return old_q;
 }
 
-static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
+static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
+{
+       struct netdev_queue *queue;
+
+       queue = cl->leaf.offload_queue;
+       if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
+               WARN_ON(cl->leaf.q->dev_queue != queue);
+
+       return queue;
+}
+
+static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
+                                  struct htb_class *cl_new, bool destroying)
 {
        struct netdev_queue *queue_old, *queue_new;
        struct net_device *dev = qdisc_dev(sch);
-       struct Qdisc *qdisc;
 
-       queue_old = netdev_get_tx_queue(dev, qid_old);
-       queue_new = netdev_get_tx_queue(dev, qid_new);
+       queue_old = htb_offload_get_queue(cl_old);
+       queue_new = htb_offload_get_queue(cl_new);
 
-       if (dev->flags & IFF_UP)
-               dev_deactivate(dev);
-       qdisc = dev_graft_qdisc(queue_old, NULL);
-       qdisc->dev_queue = queue_new;
-       qdisc = dev_graft_qdisc(queue_new, qdisc);
-       if (dev->flags & IFF_UP)
-               dev_activate(dev);
+       if (!destroying) {
+               struct Qdisc *qdisc;
 
-       WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
+               if (dev->flags & IFF_UP)
+                       dev_deactivate(dev);
+               qdisc = dev_graft_qdisc(queue_old, NULL);
+               WARN_ON(qdisc != cl_old->leaf.q);
+       }
+
+       if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
+               cl_old->leaf.q->dev_queue = queue_new;
+       cl_old->leaf.offload_queue = queue_new;
+
+       if (!destroying) {
+               struct Qdisc *qdisc;
+
+               qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
+               if (dev->flags & IFF_UP)
+                       dev_activate(dev);
+               WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
+       }
 }
 
 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
@@ -1442,10 +1466,8 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
        if (cl->level)
                return -EINVAL;
 
-       if (q->offload) {
-               dev_queue = new->dev_queue;
-               WARN_ON(dev_queue != cl->leaf.q->dev_queue);
-       }
+       if (q->offload)
+               dev_queue = htb_offload_get_queue(cl);
 
        if (!new) {
                new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
@@ -1514,6 +1536,8 @@ static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
        parent->ctokens = parent->cbuffer;
        parent->t_c = ktime_get_ns();
        parent->cmode = HTB_CAN_SEND;
+       if (q->offload)
+               parent->leaf.offload_queue = cl->leaf.offload_queue;
 }
 
 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
@@ -1534,6 +1558,7 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
                                     struct netlink_ext_ack *extack)
 {
        struct tc_htb_qopt_offload offload_opt;
+       struct netdev_queue *dev_queue;
        struct Qdisc *q = cl->leaf.q;
        struct Qdisc *old = NULL;
        int err;
@@ -1542,16 +1567,15 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
                return -EINVAL;
 
        WARN_ON(!q);
-       if (!destroying) {
-               /* On destroy of HTB, two cases are possible:
-                * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
-                * 2. q is a noop qdisc (for nodes that were inner),
-                *    q->dev_queue is noop_netdev_queue.
+       dev_queue = htb_offload_get_queue(cl);
+       old = htb_graft_helper(dev_queue, NULL);
+       if (destroying)
+               /* Before HTB is destroyed, the kernel grafts noop_qdisc to
+                * all queues.
                 */
-               old = htb_graft_helper(q->dev_queue, NULL);
-               WARN_ON(!old);
+               WARN_ON(!(old->flags & TCQ_F_BUILTIN));
+       else
                WARN_ON(old != q);
-       }
 
        if (cl->parent) {
                cl->parent->bstats_bias.bytes += q->bstats.bytes;
@@ -1570,18 +1594,17 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
        if (!err || destroying)
                qdisc_put(old);
        else
-               htb_graft_helper(q->dev_queue, old);
+               htb_graft_helper(dev_queue, old);
 
        if (last_child)
                return err;
 
-       if (!err && offload_opt.moved_qid != 0) {
-               if (destroying)
-                       q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
-                                                          offload_opt.qid);
-               else
-                       htb_offload_move_qdisc(sch, offload_opt.moved_qid,
-                                              offload_opt.qid);
+       if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
+               u32 classid = TC_H_MAJ(sch->handle) |
+                             TC_H_MIN(offload_opt.classid);
+               struct htb_class *moved_cl = htb_find(classid, sch);
+
+               htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
        }
 
        return err;
@@ -1704,9 +1727,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
        }
 
        if (last_child) {
-               struct netdev_queue *dev_queue;
+               struct netdev_queue *dev_queue = sch->dev_queue;
+
+               if (q->offload)
+                       dev_queue = htb_offload_get_queue(cl);
 
-               dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
                new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
                                          cl->parent->common.classid,
                                          NULL);
@@ -1878,7 +1903,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                        }
                        dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
                } else { /* First child. */
-                       dev_queue = parent->leaf.q->dev_queue;
+                       dev_queue = htb_offload_get_queue(parent);
                        old_q = htb_graft_helper(dev_queue, NULL);
                        WARN_ON(old_q != parent->leaf.q);
                        offload_opt = (struct tc_htb_qopt_offload) {
@@ -1935,6 +1960,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
                /* leaf (we) needs elementary qdisc */
                cl->leaf.q = new_q ? new_q : &noop_qdisc;
+               if (q->offload)
+                       cl->leaf.offload_queue = dev_queue;
 
                cl->parent = parent;
 
index 5c27b42..e282e73 100644 (file)
@@ -36,7 +36,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
        int err;
 
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-       err = tcf_classify(skb, fl, &res, false);
+       err = tcf_classify(skb, NULL, fl, &res, false);
 #ifdef CONFIG_NET_CLS_ACT
        switch (err) {
        case TC_ACT_STOLEN:
index 3eabb87..03fdf31 100644 (file)
@@ -39,7 +39,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
        if (TC_H_MAJ(skb->priority) != sch->handle) {
                fl = rcu_dereference_bh(q->filter_list);
-               err = tcf_classify(skb, fl, &res, false);
+               err = tcf_classify(skb, NULL, fl, &res, false);
 #ifdef CONFIG_NET_CLS_ACT
                switch (err) {
                case TC_ACT_STOLEN:
index b692a0d..58a9d42 100644 (file)
@@ -690,7 +690,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
        fl = rcu_dereference_bh(q->filter_list);
-       result = tcf_classify(skb, fl, &res, false);
+       result = tcf_classify(skb, NULL, fl, &res, false);
        if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                switch (result) {
index dde829d..3d061a1 100644 (file)
@@ -257,7 +257,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
        struct tcf_result res;
        int result;
 
-       result = tcf_classify(skb, fl, &res, false);
+       result = tcf_classify(skb, NULL, fl, &res, false);
        if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                switch (result) {
index 066754a..f8e569f 100644 (file)
@@ -178,7 +178,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
                return sfq_hash(q, skb) + 1;
 
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-       result = tcf_classify(skb, fl, &res, false);
+       result = tcf_classify(skb, NULL, fl, &res, false);
        if (result >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                switch (result) {
index 9c79374..1ab2fc9 100644 (file)
@@ -1513,7 +1513,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
        taprio_set_picos_per_byte(dev, q);
 
        if (mqprio) {
-               netdev_set_num_tc(dev, mqprio->num_tc);
+               err = netdev_set_num_tc(dev, mqprio->num_tc);
+               if (err)
+                       goto free_sched;
                for (i = 0; i < mqprio->num_tc; i++)
                        netdev_set_tc_queue(dev, i,
                                            mqprio->count[i],
index c160ff5..af227b6 100644 (file)
@@ -1752,21 +1752,30 @@ out:
        return rc;
 }
 
-/* convert the RMB size into the compressed notation - minimum 16K.
+#define SMCD_DMBE_SIZES                6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+#define SMCR_RMBE_SIZES                5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
+
+/* convert the RMB size into the compressed notation (minimum 16K, see
+ * SMCD/R_DMBE_SIZES.
  * In contrast to plain ilog2, this rounds towards the next power of 2,
  * so the socket application gets at least its desired sndbuf / rcvbuf size.
  */
-static u8 smc_compress_bufsize(int size)
+static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
 {
+       const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
        u8 compressed;
 
        if (size <= SMC_BUF_MIN_SIZE)
                return 0;
 
-       size = (size - 1) >> 14;
-       compressed = ilog2(size) + 1;
-       if (compressed >= SMC_RMBE_SIZES)
-               compressed = SMC_RMBE_SIZES - 1;
+       size = (size - 1) >> 14;  /* convert to 16K multiple */
+       compressed = min_t(u8, ilog2(size) + 1,
+                          is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
+
+       if (!is_smcd && is_rmb)
+               /* RMBs are backed by & limited to max size of scatterlists */
+               compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
+
        return compressed;
 }
 
@@ -1982,17 +1991,12 @@ out:
        return rc;
 }
 
-#define SMCD_DMBE_SIZES                6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
-
 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
                                                bool is_dmb, int bufsize)
 {
        struct smc_buf_desc *buf_desc;
        int rc;
 
-       if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
-               return ERR_PTR(-EAGAIN);
-
        /* try to alloc a new DMB */
        buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
        if (!buf_desc)
@@ -2041,9 +2045,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
                /* use socket send buffer size (w/o overhead) as start value */
                sk_buf_size = smc->sk.sk_sndbuf / 2;
 
-       for (bufsize_short = smc_compress_bufsize(sk_buf_size);
+       for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
             bufsize_short >= 0; bufsize_short--) {
-
                if (is_rmb) {
                        lock = &lgr->rmbs_lock;
                        buf_list = &lgr->rmbs[bufsize_short];
@@ -2052,8 +2055,6 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
                        buf_list = &lgr->sndbufs[bufsize_short];
                }
                bufsize = smc_uncompress_bufsize(bufsize_short);
-               if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
-                       continue;
 
                /* check for reusable slot in the link group */
                buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
index 7d7ba03..a884534 100644 (file)
@@ -753,8 +753,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event)
                        if (!libdev->ops.get_netdev)
                                continue;
                        lndev = libdev->ops.get_netdev(libdev, i + 1);
-                       if (lndev)
-                               dev_put(lndev);
+                       dev_put(lndev);
                        if (lndev != ndev)
                                continue;
                        if (event == NETDEV_REGISTER)
index 6f6d33e..4a964e9 100644 (file)
@@ -394,8 +394,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
        return 0;
 
 out_put:
-       if (ndev)
-               dev_put(ndev);
+       dev_put(ndev);
        return rc;
 }
 
index 8808b36..83e7ac9 100644 (file)
@@ -212,6 +212,7 @@ static const char * const pf_family_names[] = {
        [PF_QIPCRTR]    = "PF_QIPCRTR",
        [PF_SMC]        = "PF_SMC",
        [PF_XDP]        = "PF_XDP",
+       [PF_MCTP]       = "PF_MCTP",
 };
 
 /*
@@ -1064,9 +1065,13 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
  */
 
 static DEFINE_MUTEX(br_ioctl_mutex);
-static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
+static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
+                           unsigned int cmd, struct ifreq *ifr,
+                           void __user *uarg);
 
-void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
+void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
+                            unsigned int cmd, struct ifreq *ifr,
+                            void __user *uarg))
 {
        mutex_lock(&br_ioctl_mutex);
        br_ioctl_hook = hook;
@@ -1074,6 +1079,22 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
 }
 EXPORT_SYMBOL(brioctl_set);
 
+int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
+                 struct ifreq *ifr, void __user *uarg)
+{
+       int err = -ENOPKG;
+
+       if (!br_ioctl_hook)
+               request_module("bridge");
+
+       mutex_lock(&br_ioctl_mutex);
+       if (br_ioctl_hook)
+               err = br_ioctl_hook(net, br, cmd, ifr, uarg);
+       mutex_unlock(&br_ioctl_mutex);
+
+       return err;
+}
+
 static DEFINE_MUTEX(vlan_ioctl_mutex);
 static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
 
@@ -1088,8 +1109,11 @@ EXPORT_SYMBOL(vlan_ioctl_set);
 static long sock_do_ioctl(struct net *net, struct socket *sock,
                          unsigned int cmd, unsigned long arg)
 {
+       struct ifreq ifr;
+       bool need_copyout;
        int err;
        void __user *argp = (void __user *)arg;
+       void __user *data;
 
        err = sock->ops->ioctl(sock, cmd, arg);
 
@@ -1100,27 +1124,16 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
        if (err != -ENOIOCTLCMD)
                return err;
 
-       if (cmd == SIOCGIFCONF) {
-               struct ifconf ifc;
-               if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
-                       return -EFAULT;
-               rtnl_lock();
-               err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
-               rtnl_unlock();
-               if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
-                       err = -EFAULT;
-       } else if (is_socket_ioctl_cmd(cmd)) {
-               struct ifreq ifr;
-               bool need_copyout;
-               if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+       if (!is_socket_ioctl_cmd(cmd))
+               return -ENOTTY;
+
+       if (get_user_ifreq(&ifr, &data, argp))
+               return -EFAULT;
+       err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
+       if (!err && need_copyout)
+               if (put_user_ifreq(&ifr, argp))
                        return -EFAULT;
-               err = dev_ioctl(net, cmd, &ifr, &need_copyout);
-               if (!err && need_copyout)
-                       if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
-                               return -EFAULT;
-       } else {
-               err = -ENOTTY;
-       }
+
        return err;
 }
 
@@ -1142,12 +1155,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
        net = sock_net(sk);
        if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
                struct ifreq ifr;
+               void __user *data;
                bool need_copyout;
-               if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+               if (get_user_ifreq(&ifr, &data, argp))
                        return -EFAULT;
-               err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+               err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
                if (!err && need_copyout)
-                       if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+                       if (put_user_ifreq(&ifr, argp))
                                return -EFAULT;
        } else
 #ifdef CONFIG_WEXT_CORE
@@ -1172,14 +1186,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
                case SIOCSIFBR:
                case SIOCBRADDBR:
                case SIOCBRDELBR:
-                       err = -ENOPKG;
-                       if (!br_ioctl_hook)
-                               request_module("bridge");
-
-                       mutex_lock(&br_ioctl_mutex);
-                       if (br_ioctl_hook)
-                               err = br_ioctl_hook(net, cmd, argp);
-                       mutex_unlock(&br_ioctl_mutex);
+                       err = br_ioctl_call(net, NULL, cmd, NULL, argp);
                        break;
                case SIOCGIFVLAN:
                case SIOCSIFVLAN:
@@ -1219,6 +1226,11 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
                                                   cmd == SIOCGSTAMP_NEW,
                                                   false);
                        break;
+
+               case SIOCGIFCONF:
+                       err = dev_ifconf(net, argp);
+                       break;
+
                default:
                        err = sock_do_ioctl(net, sock, cmd, arg);
                        break;
@@ -3128,154 +3140,55 @@ void socket_seq_show(struct seq_file *seq)
 }
 #endif                         /* CONFIG_PROC_FS */
 
-#ifdef CONFIG_COMPAT
-static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
+/* Handle the fact that while struct ifreq has the same *layout* on
+ * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
+ * which are handled elsewhere, it still has different *size* due to
+ * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
+ * resulting in struct ifreq being 32 and 40 bytes respectively).
+ * As a result, if the struct happens to be at the end of a page and
+ * the next page isn't readable/writable, we get a fault. To prevent
+ * that, copy back and forth to the full size.
+ */
+int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
 {
-       struct compat_ifconf ifc32;
-       struct ifconf ifc;
-       int err;
+       if (in_compat_syscall()) {
+               struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
 
-       if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
-               return -EFAULT;
+               memset(ifr, 0, sizeof(*ifr));
+               if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
+                       return -EFAULT;
 
-       ifc.ifc_len = ifc32.ifc_len;
-       ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
+               if (ifrdata)
+                       *ifrdata = compat_ptr(ifr32->ifr_data);
 
-       rtnl_lock();
-       err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
-       rtnl_unlock();
-       if (err)
-               return err;
+               return 0;
+       }
 
-       ifc32.ifc_len = ifc.ifc_len;
-       if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
+       if (copy_from_user(ifr, arg, sizeof(*ifr)))
                return -EFAULT;
 
+       if (ifrdata)
+               *ifrdata = ifr->ifr_data;
+
        return 0;
 }
+EXPORT_SYMBOL(get_user_ifreq);
 
-static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
+int put_user_ifreq(struct ifreq *ifr, void __user *arg)
 {
-       struct compat_ethtool_rxnfc __user *compat_rxnfc;
-       bool convert_in = false, convert_out = false;
-       size_t buf_size = 0;
-       struct ethtool_rxnfc __user *rxnfc = NULL;
-       struct ifreq ifr;
-       u32 rule_cnt = 0, actual_rule_cnt;
-       u32 ethcmd;
-       u32 data;
-       int ret;
+       size_t size = sizeof(*ifr);
 
-       if (get_user(data, &ifr32->ifr_ifru.ifru_data))
-               return -EFAULT;
-
-       compat_rxnfc = compat_ptr(data);
+       if (in_compat_syscall())
+               size = sizeof(struct compat_ifreq);
 
-       if (get_user(ethcmd, &compat_rxnfc->cmd))
+       if (copy_to_user(arg, ifr, size))
                return -EFAULT;
 
-       /* Most ethtool structures are defined without padding.
-        * Unfortunately struct ethtool_rxnfc is an exception.
-        */
-       switch (ethcmd) {
-       default:
-               break;
-       case ETHTOOL_GRXCLSRLALL:
-               /* Buffer size is variable */
-               if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
-                       return -EFAULT;
-               if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
-                       return -ENOMEM;
-               buf_size += rule_cnt * sizeof(u32);
-               fallthrough;
-       case ETHTOOL_GRXRINGS:
-       case ETHTOOL_GRXCLSRLCNT:
-       case ETHTOOL_GRXCLSRULE:
-       case ETHTOOL_SRXCLSRLINS:
-               convert_out = true;
-               fallthrough;
-       case ETHTOOL_SRXCLSRLDEL:
-               buf_size += sizeof(struct ethtool_rxnfc);
-               convert_in = true;
-               rxnfc = compat_alloc_user_space(buf_size);
-               break;
-       }
-
-       if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
-               return -EFAULT;
-
-       ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
-
-       if (convert_in) {
-               /* We expect there to be holes between fs.m_ext and
-                * fs.ring_cookie and at the end of fs, but nowhere else.
-                */
-               BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
-                            sizeof(compat_rxnfc->fs.m_ext) !=
-                            offsetof(struct ethtool_rxnfc, fs.m_ext) +
-                            sizeof(rxnfc->fs.m_ext));
-               BUILD_BUG_ON(
-                       offsetof(struct compat_ethtool_rxnfc, fs.location) -
-                       offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
-                       offsetof(struct ethtool_rxnfc, fs.location) -
-                       offsetof(struct ethtool_rxnfc, fs.ring_cookie));
-
-               if (copy_in_user(rxnfc, compat_rxnfc,
-                                (void __user *)(&rxnfc->fs.m_ext + 1) -
-                                (void __user *)rxnfc) ||
-                   copy_in_user(&rxnfc->fs.ring_cookie,
-                                &compat_rxnfc->fs.ring_cookie,
-                                (void __user *)(&rxnfc->fs.location + 1) -
-                                (void __user *)&rxnfc->fs.ring_cookie))
-                       return -EFAULT;
-               if (ethcmd == ETHTOOL_GRXCLSRLALL) {
-                       if (put_user(rule_cnt, &rxnfc->rule_cnt))
-                               return -EFAULT;
-               } else if (copy_in_user(&rxnfc->rule_cnt,
-                                       &compat_rxnfc->rule_cnt,
-                                       sizeof(rxnfc->rule_cnt)))
-                       return -EFAULT;
-       }
-
-       ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
-       if (ret)
-               return ret;
-
-       if (convert_out) {
-               if (copy_in_user(compat_rxnfc, rxnfc,
-                                (const void __user *)(&rxnfc->fs.m_ext + 1) -
-                                (const void __user *)rxnfc) ||
-                   copy_in_user(&compat_rxnfc->fs.ring_cookie,
-                                &rxnfc->fs.ring_cookie,
-                                (const void __user *)(&rxnfc->fs.location + 1) -
-                                (const void __user *)&rxnfc->fs.ring_cookie) ||
-                   copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
-                                sizeof(rxnfc->rule_cnt)))
-                       return -EFAULT;
-
-               if (ethcmd == ETHTOOL_GRXCLSRLALL) {
-                       /* As an optimisation, we only copy the actual
-                        * number of rules that the underlying
-                        * function returned.  Since Mallory might
-                        * change the rule count in user memory, we
-                        * check that it is less than the rule count
-                        * originally given (as the user buffer size),
-                        * which has been range-checked.
-                        */
-                       if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
-                               return -EFAULT;
-                       if (actual_rule_cnt < rule_cnt)
-                               rule_cnt = actual_rule_cnt;
-                       if (copy_in_user(&compat_rxnfc->rule_locs[0],
-                                        &rxnfc->rule_locs[0],
-                                        rule_cnt * sizeof(u32)))
-                               return -EFAULT;
-               }
-       }
-
        return 0;
 }
+EXPORT_SYMBOL(put_user_ifreq);
 
+#ifdef CONFIG_COMPAT
 static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
 {
        compat_uptr_t uptr32;
@@ -3283,7 +3196,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
        void __user *saved;
        int err;
 
-       if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
+       if (get_user_ifreq(&ifr, NULL, uifr32))
                return -EFAULT;
 
        if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
@@ -3292,10 +3205,10 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
        saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
        ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
 
-       err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
+       err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
        if (!err) {
                ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
-               if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
+               if (put_user_ifreq(&ifr, uifr32))
                        err = -EFAULT;
        }
        return err;
@@ -3306,99 +3219,15 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
                                 struct compat_ifreq __user *u_ifreq32)
 {
        struct ifreq ifreq;
-       u32 data32;
+       void __user *data;
 
        if (!is_socket_ioctl_cmd(cmd))
                return -ENOTTY;
-       if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
-               return -EFAULT;
-       if (get_user(data32, &u_ifreq32->ifr_data))
-               return -EFAULT;
-       ifreq.ifr_data = compat_ptr(data32);
-
-       return dev_ioctl(net, cmd, &ifreq, NULL);
-}
-
-static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
-                             unsigned int cmd,
-                             struct compat_ifreq __user *uifr32)
-{
-       struct ifreq __user *uifr;
-       int err;
-
-       /* Handle the fact that while struct ifreq has the same *layout* on
-        * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
-        * which are handled elsewhere, it still has different *size* due to
-        * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
-        * resulting in struct ifreq being 32 and 40 bytes respectively).
-        * As a result, if the struct happens to be at the end of a page and
-        * the next page isn't readable/writable, we get a fault. To prevent
-        * that, copy back and forth to the full size.
-        */
-
-       uifr = compat_alloc_user_space(sizeof(*uifr));
-       if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
-               return -EFAULT;
-
-       err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
-
-       if (!err) {
-               switch (cmd) {
-               case SIOCGIFFLAGS:
-               case SIOCGIFMETRIC:
-               case SIOCGIFMTU:
-               case SIOCGIFMEM:
-               case SIOCGIFHWADDR:
-               case SIOCGIFINDEX:
-               case SIOCGIFADDR:
-               case SIOCGIFBRDADDR:
-               case SIOCGIFDSTADDR:
-               case SIOCGIFNETMASK:
-               case SIOCGIFPFLAGS:
-               case SIOCGIFTXQLEN:
-               case SIOCGMIIPHY:
-               case SIOCGMIIREG:
-               case SIOCGIFNAME:
-                       if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
-                               err = -EFAULT;
-                       break;
-               }
-       }
-       return err;
-}
-
-static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
-                       struct compat_ifreq __user *uifr32)
-{
-       struct ifreq ifr;
-       struct compat_ifmap __user *uifmap32;
-       int err;
-
-       uifmap32 = &uifr32->ifr_ifru.ifru_map;
-       err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
-       err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
-       err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
-       err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
-       err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
-       err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
-       err |= get_user(ifr.ifr_map.port, &uifmap32->port);
-       if (err)
+       if (get_user_ifreq(&ifreq, &data, u_ifreq32))
                return -EFAULT;
+       ifreq.ifr_data = data;
 
-       err = dev_ioctl(net, cmd, &ifr, NULL);
-
-       if (cmd == SIOCGIFMAP && !err) {
-               err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
-               err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
-               err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
-               err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
-               err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
-               err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
-               err |= put_user(ifr.ifr_map.port, &uifmap32->port);
-               if (err)
-                       err = -EFAULT;
-       }
-       return err;
+       return dev_ioctl(net, cmd, &ifreq, data, NULL);
 }
 
 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
@@ -3424,21 +3253,14 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
        struct net *net = sock_net(sk);
 
        if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
-               return compat_ifr_data_ioctl(net, cmd, argp);
+               return sock_ioctl(file, cmd, (unsigned long)argp);
 
        switch (cmd) {
        case SIOCSIFBR:
        case SIOCGIFBR:
                return old_bridge_ioctl(argp);
-       case SIOCGIFCONF:
-               return compat_dev_ifconf(net, argp);
-       case SIOCETHTOOL:
-               return ethtool_ioctl(net, argp);
        case SIOCWANDEV:
                return compat_siocwandev(net, argp);
-       case SIOCGIFMAP:
-       case SIOCSIFMAP:
-               return compat_sioc_ifmap(net, cmd, argp);
        case SIOCGSTAMP_OLD:
        case SIOCGSTAMPNS_OLD:
                if (!sock->ops->gettstamp)
@@ -3446,6 +3268,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
                return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
                                            !COMPAT_USE_64BIT_TIME);
 
+       case SIOCETHTOOL:
        case SIOCBONDSLAVEINFOQUERY:
        case SIOCBONDINFOQUERY:
        case SIOCSHWTSTAMP:
@@ -3463,10 +3286,13 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
        case SIOCGSKNS:
        case SIOCGSTAMP_NEW:
        case SIOCGSTAMPNS_NEW:
+       case SIOCGIFCONF:
                return sock_ioctl(file, cmd, arg);
 
        case SIOCGIFFLAGS:
        case SIOCSIFFLAGS:
+       case SIOCGIFMAP:
+       case SIOCSIFMAP:
        case SIOCGIFMETRIC:
        case SIOCSIFMETRIC:
        case SIOCGIFMTU:
@@ -3503,8 +3329,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
        case SIOCBONDRELEASE:
        case SIOCBONDSETHWADDR:
        case SIOCBONDCHANGEACTIVE:
-               return compat_ifreq_ioctl(net, sock, cmd, argp);
-
        case SIOCSARP:
        case SIOCGARP:
        case SIOCDARP:
index 070698d..0b2c18e 100644 (file)
@@ -378,6 +378,266 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
 
+struct switchdev_nested_priv {
+       bool (*check_cb)(const struct net_device *dev);
+       bool (*foreign_dev_check_cb)(const struct net_device *dev,
+                                    const struct net_device *foreign_dev);
+       const struct net_device *dev;
+       struct net_device *lower_dev;
+};
+
+static int switchdev_lower_dev_walk(struct net_device *lower_dev,
+                                   struct netdev_nested_priv *priv)
+{
+       struct switchdev_nested_priv *switchdev_priv = priv->data;
+       bool (*foreign_dev_check_cb)(const struct net_device *dev,
+                                    const struct net_device *foreign_dev);
+       bool (*check_cb)(const struct net_device *dev);
+       const struct net_device *dev;
+
+       check_cb = switchdev_priv->check_cb;
+       foreign_dev_check_cb = switchdev_priv->foreign_dev_check_cb;
+       dev = switchdev_priv->dev;
+
+       if (check_cb(lower_dev) && !foreign_dev_check_cb(lower_dev, dev)) {
+               switchdev_priv->lower_dev = lower_dev;
+               return 1;
+       }
+
+       return 0;
+}
+
+static struct net_device *
+switchdev_lower_dev_find(struct net_device *dev,
+                        bool (*check_cb)(const struct net_device *dev),
+                        bool (*foreign_dev_check_cb)(const struct net_device *dev,
+                                                     const struct net_device *foreign_dev))
+{
+       struct switchdev_nested_priv switchdev_priv = {
+               .check_cb = check_cb,
+               .foreign_dev_check_cb = foreign_dev_check_cb,
+               .dev = dev,
+               .lower_dev = NULL,
+       };
+       struct netdev_nested_priv priv = {
+               .data = &switchdev_priv,
+       };
+
+       netdev_walk_all_lower_dev_rcu(dev, switchdev_lower_dev_walk, &priv);
+
+       return switchdev_priv.lower_dev;
+}
+
+static int __switchdev_handle_fdb_add_to_device(struct net_device *dev,
+               const struct net_device *orig_dev,
+               const struct switchdev_notifier_fdb_info *fdb_info,
+               bool (*check_cb)(const struct net_device *dev),
+               bool (*foreign_dev_check_cb)(const struct net_device *dev,
+                                            const struct net_device *foreign_dev),
+               int (*add_cb)(struct net_device *dev,
+                             const struct net_device *orig_dev, const void *ctx,
+                             const struct switchdev_notifier_fdb_info *fdb_info),
+               int (*lag_add_cb)(struct net_device *dev,
+                                 const struct net_device *orig_dev, const void *ctx,
+                                 const struct switchdev_notifier_fdb_info *fdb_info))
+{
+       const struct switchdev_notifier_info *info = &fdb_info->info;
+       struct net_device *br, *lower_dev;
+       struct list_head *iter;
+       int err = -EOPNOTSUPP;
+
+       if (check_cb(dev))
+               return add_cb(dev, orig_dev, info->ctx, fdb_info);
+
+       if (netif_is_lag_master(dev)) {
+               if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+                       goto maybe_bridged_with_us;
+
+               /* This is a LAG interface that we offload */
+               if (!lag_add_cb)
+                       return -EOPNOTSUPP;
+
+               return lag_add_cb(dev, orig_dev, info->ctx, fdb_info);
+       }
+
+       /* Recurse through lower interfaces in case the FDB entry is pointing
+        * towards a bridge device.
+        */
+       if (netif_is_bridge_master(dev)) {
+               if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+                       return 0;
+
+               /* This is a bridge interface that we offload */
+               netdev_for_each_lower_dev(dev, lower_dev, iter) {
+                       /* Do not propagate FDB entries across bridges */
+                       if (netif_is_bridge_master(lower_dev))
+                               continue;
+
+                       /* Bridge ports might be either us, or LAG interfaces
+                        * that we offload.
+                        */
+                       if (!check_cb(lower_dev) &&
+                           !switchdev_lower_dev_find(lower_dev, check_cb,
+                                                     foreign_dev_check_cb))
+                               continue;
+
+                       err = __switchdev_handle_fdb_add_to_device(lower_dev, orig_dev,
+                                                                  fdb_info, check_cb,
+                                                                  foreign_dev_check_cb,
+                                                                  add_cb, lag_add_cb);
+                       if (err && err != -EOPNOTSUPP)
+                               return err;
+               }
+
+               return 0;
+       }
+
+maybe_bridged_with_us:
+       /* Event is neither on a bridge nor a LAG. Check whether it is on an
+        * interface that is in a bridge with us.
+        */
+       br = netdev_master_upper_dev_get_rcu(dev);
+       if (!br || !netif_is_bridge_master(br))
+               return 0;
+
+       if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
+               return 0;
+
+       return __switchdev_handle_fdb_add_to_device(br, orig_dev, fdb_info,
+                                                   check_cb, foreign_dev_check_cb,
+                                                   add_cb, lag_add_cb);
+}
+
+int switchdev_handle_fdb_add_to_device(struct net_device *dev,
+               const struct switchdev_notifier_fdb_info *fdb_info,
+               bool (*check_cb)(const struct net_device *dev),
+               bool (*foreign_dev_check_cb)(const struct net_device *dev,
+                                            const struct net_device *foreign_dev),
+               int (*add_cb)(struct net_device *dev,
+                             const struct net_device *orig_dev, const void *ctx,
+                             const struct switchdev_notifier_fdb_info *fdb_info),
+               int (*lag_add_cb)(struct net_device *dev,
+                                 const struct net_device *orig_dev, const void *ctx,
+                                 const struct switchdev_notifier_fdb_info *fdb_info))
+{
+       int err;
+
+       err = __switchdev_handle_fdb_add_to_device(dev, dev, fdb_info,
+                                                  check_cb,
+                                                  foreign_dev_check_cb,
+                                                  add_cb, lag_add_cb);
+       if (err == -EOPNOTSUPP)
+               err = 0;
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_fdb_add_to_device);
+
+static int __switchdev_handle_fdb_del_to_device(struct net_device *dev,
+               const struct net_device *orig_dev,
+               const struct switchdev_notifier_fdb_info *fdb_info,
+               bool (*check_cb)(const struct net_device *dev),
+               bool (*foreign_dev_check_cb)(const struct net_device *dev,
+                                            const struct net_device *foreign_dev),
+               int (*del_cb)(struct net_device *dev,
+                             const struct net_device *orig_dev, const void *ctx,
+                             const struct switchdev_notifier_fdb_info *fdb_info),
+               int (*lag_del_cb)(struct net_device *dev,
+                                 const struct net_device *orig_dev, const void *ctx,
+                                 const struct switchdev_notifier_fdb_info *fdb_info))
+{
+       const struct switchdev_notifier_info *info = &fdb_info->info;
+       struct net_device *br, *lower_dev;
+       struct list_head *iter;
+       int err = -EOPNOTSUPP;
+
+       if (check_cb(dev))
+               return del_cb(dev, orig_dev, info->ctx, fdb_info);
+
+       if (netif_is_lag_master(dev)) {
+               if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+                       goto maybe_bridged_with_us;
+
+               /* This is a LAG interface that we offload */
+               if (!lag_del_cb)
+                       return -EOPNOTSUPP;
+
+               return lag_del_cb(dev, orig_dev, info->ctx, fdb_info);
+       }
+
+       /* Recurse through lower interfaces in case the FDB entry is pointing
+        * towards a bridge device.
+        */
+       if (netif_is_bridge_master(dev)) {
+               if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+                       return 0;
+
+               /* This is a bridge interface that we offload */
+               netdev_for_each_lower_dev(dev, lower_dev, iter) {
+                       /* Do not propagate FDB entries across bridges */
+                       if (netif_is_bridge_master(lower_dev))
+                               continue;
+
+                       /* Bridge ports might be either us, or LAG interfaces
+                        * that we offload.
+                        */
+                       if (!check_cb(lower_dev) &&
+                           !switchdev_lower_dev_find(lower_dev, check_cb,
+                                                     foreign_dev_check_cb))
+                               continue;
+
+                       err = __switchdev_handle_fdb_del_to_device(lower_dev, orig_dev,
+                                                                  fdb_info, check_cb,
+                                                                  foreign_dev_check_cb,
+                                                                  del_cb, lag_del_cb);
+                       if (err && err != -EOPNOTSUPP)
+                               return err;
+               }
+
+               return 0;
+       }
+
+maybe_bridged_with_us:
+       /* Event is neither on a bridge nor a LAG. Check whether it is on an
+        * interface that is in a bridge with us.
+        */
+       br = netdev_master_upper_dev_get_rcu(dev);
+       if (!br || !netif_is_bridge_master(br))
+               return 0;
+
+       if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
+               return 0;
+
+       return __switchdev_handle_fdb_del_to_device(br, orig_dev, fdb_info,
+                                                   check_cb, foreign_dev_check_cb,
+                                                   del_cb, lag_del_cb);
+}
+
+int switchdev_handle_fdb_del_to_device(struct net_device *dev,
+               const struct switchdev_notifier_fdb_info *fdb_info,
+               bool (*check_cb)(const struct net_device *dev),
+               bool (*foreign_dev_check_cb)(const struct net_device *dev,
+                                            const struct net_device *foreign_dev),
+               int (*del_cb)(struct net_device *dev,
+                             const struct net_device *orig_dev, const void *ctx,
+                             const struct switchdev_notifier_fdb_info *fdb_info),
+               int (*lag_del_cb)(struct net_device *dev,
+                                 const struct net_device *orig_dev, const void *ctx,
+                                 const struct switchdev_notifier_fdb_info *fdb_info))
+{
+       int err;
+
+       err = __switchdev_handle_fdb_del_to_device(dev, dev, fdb_info,
+                                                  check_cb,
+                                                  foreign_dev_check_cb,
+                                                  del_cb, lag_del_cb);
+       if (err == -EOPNOTSUPP)
+               err = 0;
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_fdb_del_to_device);
+
 static int __switchdev_handle_port_obj_add(struct net_device *dev,
                        struct switchdev_notifier_port_obj_info *port_obj_info,
                        bool (*check_cb)(const struct net_device *dev),
@@ -549,3 +809,51 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
        return err;
 }
 EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
+
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+                                 struct net_device *dev, const void *ctx,
+                                 struct notifier_block *atomic_nb,
+                                 struct notifier_block *blocking_nb,
+                                 bool tx_fwd_offload,
+                                 struct netlink_ext_ack *extack)
+{
+       struct switchdev_notifier_brport_info brport_info = {
+               .brport = {
+                       .dev = dev,
+                       .ctx = ctx,
+                       .atomic_nb = atomic_nb,
+                       .blocking_nb = blocking_nb,
+                       .tx_fwd_offload = tx_fwd_offload,
+               },
+       };
+       int err;
+
+       ASSERT_RTNL();
+
+       err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_OFFLOADED,
+                                               brport_dev, &brport_info.info,
+                                               extack);
+       return notifier_to_errno(err);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload);
+
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+                                    const void *ctx,
+                                    struct notifier_block *atomic_nb,
+                                    struct notifier_block *blocking_nb)
+{
+       struct switchdev_notifier_brport_info brport_info = {
+               .brport = {
+                       .ctx = ctx,
+                       .atomic_nb = atomic_nb,
+                       .blocking_nb = blocking_nb,
+               },
+       };
+
+       ASSERT_RTNL();
+
+       call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_UNOFFLOADED,
+                                         brport_dev, &brport_info.info,
+                                         NULL);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
index 8754bd8..e3105ba 100644 (file)
@@ -1886,6 +1886,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
        bool connected = !tipc_sk_type_connectionless(sk);
        struct tipc_sock *tsk = tipc_sk(sk);
        int rc, err, hlen, dlen, copy;
+       struct tipc_skb_cb *skb_cb;
        struct sk_buff_head xmitq;
        struct tipc_msg *hdr;
        struct sk_buff *skb;
@@ -1909,6 +1910,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
                if (unlikely(rc))
                        goto exit;
                skb = skb_peek(&sk->sk_receive_queue);
+               skb_cb = TIPC_SKB_CB(skb);
                hdr = buf_msg(skb);
                dlen = msg_data_sz(hdr);
                hlen = msg_hdr_sz(hdr);
@@ -1928,18 +1930,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
 
        /* Capture data if non-error msg, otherwise just set return value */
        if (likely(!err)) {
-               copy = min_t(int, dlen, buflen);
-               if (unlikely(copy != dlen))
-                       m->msg_flags |= MSG_TRUNC;
-               rc = skb_copy_datagram_msg(skb, hlen, m, copy);
+               int offset = skb_cb->bytes_read;
+
+               copy = min_t(int, dlen - offset, buflen);
+               rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+               if (unlikely(rc))
+                       goto exit;
+               if (unlikely(offset + copy < dlen)) {
+                       if (flags & MSG_EOR) {
+                               if (!(flags & MSG_PEEK))
+                                       skb_cb->bytes_read = offset + copy;
+                       } else {
+                               m->msg_flags |= MSG_TRUNC;
+                               skb_cb->bytes_read = 0;
+                       }
+               } else {
+                       if (flags & MSG_EOR)
+                               m->msg_flags |= MSG_EOR;
+                       skb_cb->bytes_read = 0;
+               }
        } else {
                copy = 0;
                rc = 0;
-               if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
+               if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) {
                        rc = -ECONNRESET;
+                       goto exit;
+               }
        }
-       if (unlikely(rc))
-               goto exit;
 
        /* Mark message as group event if applicable */
        if (unlikely(grp_evt)) {
@@ -1962,6 +1979,9 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
                tipc_node_distr_xmit(sock_net(sk), &xmitq);
        }
 
+       if (skb_cb->bytes_read)
+               goto exit;
+
        tsk_advance_rx_queue(sk);
 
        if (likely(!connected))
index b6c4282..b7f8112 100644 (file)
@@ -25,6 +25,11 @@ config UNIX_SCM
        depends on UNIX
        default y
 
+config AF_UNIX_OOB
+       bool
+       depends on UNIX
+       default y
+
 config UNIX_DIAG
        tristate "UNIX: socket monitoring interface"
        depends on UNIX
index 54e58cc..2049182 100644 (file)
@@ -7,6 +7,7 @@ obj-$(CONFIG_UNIX)      += unix.o
 
 unix-y                 := af_unix.o garbage.o
 unix-$(CONFIG_SYSCTL)  += sysctl_net_unix.o
+unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
 
 obj-$(CONFIG_UNIX_DIAG)        += unix_diag.o
 unix_diag-y            := diag.o
index ba7ced9..eb47b9d 100644 (file)
 #include <linux/security.h>
 #include <linux/freezer.h>
 #include <linux/file.h>
+#include <linux/btf_ids.h>
 
 #include "scm.h"
 
@@ -494,6 +495,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
                        sk_error_report(other);
                }
        }
+       other->sk_state = TCP_CLOSE;
 }
 
 static void unix_sock_destructor(struct sock *sk)
@@ -502,6 +504,12 @@ static void unix_sock_destructor(struct sock *sk)
 
        skb_queue_purge(&sk->sk_receive_queue);
 
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+       if (u->oob_skb) {
+               kfree_skb(u->oob_skb);
+               u->oob_skb = NULL;
+       }
+#endif
        WARN_ON(refcount_read(&sk->sk_wmem_alloc));
        WARN_ON(!sk_unhashed(sk));
        WARN_ON(sk->sk_socket);
@@ -669,6 +677,10 @@ static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
                                       unsigned int flags);
 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+                         sk_read_actor_t recv_actor);
+static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
+                                sk_read_actor_t recv_actor);
 static int unix_dgram_connect(struct socket *, struct sockaddr *,
                              int, int);
 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -722,6 +734,7 @@ static const struct proto_ops unix_stream_ops = {
        .shutdown =     unix_shutdown,
        .sendmsg =      unix_stream_sendmsg,
        .recvmsg =      unix_stream_recvmsg,
+       .read_sock =    unix_stream_read_sock,
        .mmap =         sock_no_mmap,
        .sendpage =     unix_stream_sendpage,
        .splice_read =  unix_stream_splice_read,
@@ -746,6 +759,7 @@ static const struct proto_ops unix_dgram_ops = {
        .listen =       sock_no_listen,
        .shutdown =     unix_shutdown,
        .sendmsg =      unix_dgram_sendmsg,
+       .read_sock =    unix_read_sock,
        .recvmsg =      unix_dgram_recvmsg,
        .mmap =         sock_no_mmap,
        .sendpage =     sock_no_sendpage,
@@ -777,13 +791,42 @@ static const struct proto_ops unix_seqpacket_ops = {
        .show_fdinfo =  unix_show_fdinfo,
 };
 
-static struct proto unix_proto = {
-       .name                   = "UNIX",
+static void unix_close(struct sock *sk, long timeout)
+{
+       /* Nothing to do here, unix socket does not need a ->close().
+        * This is merely for sockmap.
+        */
+}
+
+static void unix_unhash(struct sock *sk)
+{
+       /* Nothing to do here, unix socket does not need a ->unhash().
+        * This is merely for sockmap.
+        */
+}
+
+struct proto unix_dgram_proto = {
+       .name                   = "UNIX-DGRAM",
        .owner                  = THIS_MODULE,
        .obj_size               = sizeof(struct unix_sock),
+       .close                  = unix_close,
+#ifdef CONFIG_BPF_SYSCALL
+       .psock_update_sk_prot   = unix_dgram_bpf_update_proto,
+#endif
 };
 
-static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
+struct proto unix_stream_proto = {
+       .name                   = "UNIX-STREAM",
+       .owner                  = THIS_MODULE,
+       .obj_size               = sizeof(struct unix_sock),
+       .close                  = unix_close,
+       .unhash                 = unix_unhash,
+#ifdef CONFIG_BPF_SYSCALL
+       .psock_update_sk_prot   = unix_stream_bpf_update_proto,
+#endif
+};
+
+static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
 {
        struct sock *sk = NULL;
        struct unix_sock *u;
@@ -792,7 +835,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
        if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
                goto out;
 
-       sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
+       if (type == SOCK_STREAM)
+               sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
+       else /*dgram and  seqpacket */
+               sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
+
        if (!sk)
                goto out;
 
@@ -854,7 +901,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
                return -ESOCKTNOSUPPORT;
        }
 
-       return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
+       return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM;
 }
 
 static int unix_release(struct socket *sock)
@@ -864,6 +911,7 @@ static int unix_release(struct socket *sock)
        if (!sk)
                return 0;
 
+       sk->sk_prot->close(sk, 0);
        unix_release_sock(sk, 0);
        sock->sk = NULL;
 
@@ -1174,6 +1222,7 @@ restart:
                if (err)
                        goto out_unlock;
 
+               sk->sk_state = other->sk_state = TCP_ESTABLISHED;
        } else {
                /*
                 *      1003.1g breaking connected state with AF_UNSPEC
@@ -1187,7 +1236,10 @@ restart:
         */
        if (unix_peer(sk)) {
                struct sock *old_peer = unix_peer(sk);
+
                unix_peer(sk) = other;
+               if (!other)
+                       sk->sk_state = TCP_CLOSE;
                unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
 
                unix_state_double_unlock(sk, other);
@@ -1199,6 +1251,7 @@ restart:
                unix_peer(sk) = other;
                unix_state_double_unlock(sk, other);
        }
+
        return 0;
 
 out_unlock:
@@ -1264,7 +1317,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
        err = -ENOMEM;
 
        /* create new sock for complete connection */
-       newsk = unix_create1(sock_net(sk), NULL, 0);
+       newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
        if (newsk == NULL)
                goto out;
 
@@ -1431,12 +1484,10 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
        init_peercred(ska);
        init_peercred(skb);
 
-       if (ska->sk_type != SOCK_DGRAM) {
-               ska->sk_state = TCP_ESTABLISHED;
-               skb->sk_state = TCP_ESTABLISHED;
-               socka->state  = SS_CONNECTED;
-               sockb->state  = SS_CONNECTED;
-       }
+       ska->sk_state = TCP_ESTABLISHED;
+       skb->sk_state = TCP_ESTABLISHED;
+       socka->state  = SS_CONNECTED;
+       sockb->state  = SS_CONNECTED;
        return 0;
 }
 
@@ -1782,6 +1833,7 @@ restart_locked:
 
                        unix_state_unlock(sk);
 
+                       sk->sk_state = TCP_CLOSE;
                        unix_dgram_disconnected(sk, other);
                        sock_put(other);
                        err = -ECONNREFUSED;
@@ -1872,6 +1924,53 @@ out:
  */
 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
 
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
+{
+       struct unix_sock *ousk = unix_sk(other);
+       struct sk_buff *skb;
+       int err = 0;
+
+       skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
+
+       if (!skb)
+               return err;
+
+       skb_put(skb, 1);
+       err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
+
+       if (err) {
+               kfree_skb(skb);
+               return err;
+       }
+
+       unix_state_lock(other);
+
+       if (sock_flag(other, SOCK_DEAD) ||
+           (other->sk_shutdown & RCV_SHUTDOWN)) {
+               unix_state_unlock(other);
+               kfree_skb(skb);
+               return -EPIPE;
+       }
+
+       maybe_add_creds(skb, sock, other);
+       skb_get(skb);
+
+       if (ousk->oob_skb)
+               consume_skb(ousk->oob_skb);
+
+       ousk->oob_skb = skb;
+
+       scm_stat_add(other, skb);
+       skb_queue_tail(&other->sk_receive_queue, skb);
+       sk_send_sigurg(other);
+       unix_state_unlock(other);
+       other->sk_data_ready(other);
+
+       return err;
+}
+#endif
+
 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                               size_t len)
 {
@@ -1890,8 +1989,14 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                return err;
 
        err = -EOPNOTSUPP;
-       if (msg->msg_flags&MSG_OOB)
-               goto out_err;
+       if (msg->msg_flags & MSG_OOB) {
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+               if (len)
+                       len--;
+               else
+#endif
+                       goto out_err;
+       }
 
        if (msg->msg_namelen) {
                err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
@@ -1956,6 +2061,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                sent += size;
        }
 
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+       if (msg->msg_flags & MSG_OOB) {
+               err = queue_oob(sock, msg, other);
+               if (err)
+                       goto out_err;
+               sent++;
+       }
+#endif
+
        scm_destroy(&scm);
 
        return sent;
@@ -2128,11 +2242,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
        }
 }
 
-static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
-                             size_t size, int flags)
+int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+                        int flags)
 {
        struct scm_cookie scm;
-       struct sock *sk = sock->sk;
+       struct socket *sock = sk->sk_socket;
        struct unix_sock *u = unix_sk(sk);
        struct sk_buff *skb, *last;
        long timeo;
@@ -2235,6 +2349,55 @@ out:
        return err;
 }
 
+static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+                             int flags)
+{
+       struct sock *sk = sock->sk;
+
+#ifdef CONFIG_BPF_SYSCALL
+       const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+       if (prot != &unix_dgram_proto)
+               return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+                                           flags & ~MSG_DONTWAIT, NULL);
+#endif
+       return __unix_dgram_recvmsg(sk, msg, size, flags);
+}
+
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+                         sk_read_actor_t recv_actor)
+{
+       int copied = 0;
+
+       while (1) {
+               struct unix_sock *u = unix_sk(sk);
+               struct sk_buff *skb;
+               int used, err;
+
+               mutex_lock(&u->iolock);
+               skb = skb_recv_datagram(sk, 0, 1, &err);
+               mutex_unlock(&u->iolock);
+               if (!skb)
+                       return err;
+
+               used = recv_actor(desc, skb, 0, skb->len);
+               if (used <= 0) {
+                       if (!copied)
+                               copied = used;
+                       kfree_skb(skb);
+                       break;
+               } else if (used <= skb->len) {
+                       copied += used;
+               }
+
+               kfree_skb(skb);
+               if (!desc->count)
+                       break;
+       }
+
+       return copied;
+}
+
 /*
  *     Sleep until more data has arrived. But check for races..
  */
@@ -2294,6 +2457,86 @@ struct unix_stream_read_state {
        unsigned int splice_flags;
 };
 
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+static int unix_stream_recv_urg(struct unix_stream_read_state *state)
+{
+       struct socket *sock = state->socket;
+       struct sock *sk = sock->sk;
+       struct unix_sock *u = unix_sk(sk);
+       int chunk = 1;
+       struct sk_buff *oob_skb;
+
+       mutex_lock(&u->iolock);
+       unix_state_lock(sk);
+
+       if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
+               unix_state_unlock(sk);
+               mutex_unlock(&u->iolock);
+               return -EINVAL;
+       }
+
+       oob_skb = u->oob_skb;
+
+       if (!(state->flags & MSG_PEEK)) {
+               u->oob_skb = NULL;
+       }
+
+       unix_state_unlock(sk);
+
+       chunk = state->recv_actor(oob_skb, 0, chunk, state);
+
+       if (!(state->flags & MSG_PEEK)) {
+               UNIXCB(oob_skb).consumed += 1;
+               kfree_skb(oob_skb);
+       }
+
+       mutex_unlock(&u->iolock);
+
+       if (chunk < 0)
+               return -EFAULT;
+
+       state->msg->msg_flags |= MSG_OOB;
+       return 1;
+}
+
+static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
+                                 int flags, int copied)
+{
+       struct unix_sock *u = unix_sk(sk);
+
+       if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
+               skb_unlink(skb, &sk->sk_receive_queue);
+               consume_skb(skb);
+               skb = NULL;
+       } else {
+               if (skb == u->oob_skb) {
+                       if (copied) {
+                               skb = NULL;
+                       } else if (sock_flag(sk, SOCK_URGINLINE)) {
+                               if (!(flags & MSG_PEEK)) {
+                                       u->oob_skb = NULL;
+                                       consume_skb(skb);
+                               }
+                       } else if (!(flags & MSG_PEEK)) {
+                               skb_unlink(skb, &sk->sk_receive_queue);
+                               consume_skb(skb);
+                               skb = skb_peek(&sk->sk_receive_queue);
+                       }
+               }
+       }
+       return skb;
+}
+#endif
+
+static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
+                                sk_read_actor_t recv_actor)
+{
+       if (unlikely(sk->sk_state != TCP_ESTABLISHED))
+               return -ENOTCONN;
+
+       return unix_read_sock(sk, desc, recv_actor);
+}
+
 static int unix_stream_read_generic(struct unix_stream_read_state *state,
                                    bool freezable)
 {
@@ -2319,6 +2562,9 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
 
        if (unlikely(flags & MSG_OOB)) {
                err = -EOPNOTSUPP;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+               err = unix_stream_recv_urg(state);
+#endif
                goto out;
        }
 
@@ -2347,6 +2593,18 @@ redo:
                }
                last = skb = skb_peek(&sk->sk_receive_queue);
                last_len = last ? last->len : 0;
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+               if (skb) {
+                       skb = manage_oob(skb, sk, flags, copied);
+                       if (!skb) {
+                               unix_state_unlock(sk);
+                               if (copied)
+                                       break;
+                               goto redo;
+                       }
+               }
+#endif
 again:
                if (skb == NULL) {
                        if (copied >= target)
@@ -2504,6 +2762,20 @@ static int unix_stream_read_actor(struct sk_buff *skb,
        return ret ?: chunk;
 }
 
+int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
+                         size_t size, int flags)
+{
+       struct unix_stream_read_state state = {
+               .recv_actor = unix_stream_read_actor,
+               .socket = sk->sk_socket,
+               .msg = msg,
+               .size = size,
+               .flags = flags
+       };
+
+       return unix_stream_read_generic(&state, true);
+}
+
 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
                               size_t size, int flags)
 {
@@ -2515,6 +2787,14 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
                .flags = flags
        };
 
+#ifdef CONFIG_BPF_SYSCALL
+       struct sock *sk = sock->sk;
+       const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+       if (prot != &unix_stream_proto)
+               return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+                                           flags & ~MSG_DONTWAIT, NULL);
+#endif
        return unix_stream_read_generic(&state, true);
 }
 
@@ -2575,7 +2855,10 @@ static int unix_shutdown(struct socket *sock, int mode)
                (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
 
                int peer_mode = 0;
+               const struct proto *prot = READ_ONCE(other->sk_prot);
 
+               if (prot->unhash)
+                       prot->unhash(other);
                if (mode&RCV_SHUTDOWN)
                        peer_mode |= SEND_SHUTDOWN;
                if (mode&SEND_SHUTDOWN)
@@ -2584,10 +2867,12 @@ static int unix_shutdown(struct socket *sock, int mode)
                other->sk_shutdown |= peer_mode;
                unix_state_unlock(other);
                other->sk_state_change(other);
-               if (peer_mode == SHUTDOWN_MASK)
+               if (peer_mode == SHUTDOWN_MASK) {
                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
-               else if (peer_mode & RCV_SHUTDOWN)
+                       other->sk_state = TCP_CLOSE;
+               } else if (peer_mode & RCV_SHUTDOWN) {
                        sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
+               }
        }
        if (other)
                sock_put(other);
@@ -2682,6 +2967,20 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
        case SIOCUNIXFILE:
                err = unix_open_file(sk);
                break;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+       case SIOCATMARK:
+               {
+                       struct sk_buff *skb;
+                       struct unix_sock *u = unix_sk(sk);
+                       int answ = 0;
+
+                       skb = skb_peek(&sk->sk_receive_queue);
+                       if (skb && skb == u->oob_skb)
+                               answ = 1;
+                       err = put_user(answ, (int __user *)arg);
+               }
+               break;
+#endif
        default:
                err = -ENOIOCTLCMD;
                break;
@@ -2918,6 +3217,64 @@ static const struct seq_operations unix_seq_ops = {
        .stop   = unix_seq_stop,
        .show   = unix_seq_show,
 };
+
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_iter__unix {
+       __bpf_md_ptr(struct bpf_iter_meta *, meta);
+       __bpf_md_ptr(struct unix_sock *, unix_sk);
+       uid_t uid __aligned(8);
+};
+
+static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+                             struct unix_sock *unix_sk, uid_t uid)
+{
+       struct bpf_iter__unix ctx;
+
+       meta->seq_num--;  /* skip SEQ_START_TOKEN */
+       ctx.meta = meta;
+       ctx.unix_sk = unix_sk;
+       ctx.uid = uid;
+       return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
+{
+       struct bpf_iter_meta meta;
+       struct bpf_prog *prog;
+       struct sock *sk = v;
+       uid_t uid;
+
+       if (v == SEQ_START_TOKEN)
+               return 0;
+
+       uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+       meta.seq = seq;
+       prog = bpf_iter_get_info(&meta, false);
+       return unix_prog_seq_show(prog, &meta, v, uid);
+}
+
+static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
+{
+       struct bpf_iter_meta meta;
+       struct bpf_prog *prog;
+
+       if (!v) {
+               meta.seq = seq;
+               prog = bpf_iter_get_info(&meta, true);
+               if (prog)
+                       (void)unix_prog_seq_show(prog, &meta, v, 0);
+       }
+
+       unix_seq_stop(seq, v);
+}
+
+static const struct seq_operations bpf_iter_unix_seq_ops = {
+       .start  = unix_seq_start,
+       .next   = unix_seq_next,
+       .stop   = bpf_iter_unix_seq_stop,
+       .show   = bpf_iter_unix_seq_show,
+};
+#endif
 #endif
 
 static const struct net_proto_family unix_family_ops = {
@@ -2958,13 +3315,48 @@ static struct pernet_operations unix_net_ops = {
        .exit = unix_net_exit,
 };
 
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
+                    struct unix_sock *unix_sk, uid_t uid)
+
+static const struct bpf_iter_seq_info unix_seq_info = {
+       .seq_ops                = &bpf_iter_unix_seq_ops,
+       .init_seq_private       = bpf_iter_init_seq_net,
+       .fini_seq_private       = bpf_iter_fini_seq_net,
+       .seq_priv_size          = sizeof(struct seq_net_private),
+};
+
+static struct bpf_iter_reg unix_reg_info = {
+       .target                 = "unix",
+       .ctx_arg_info_size      = 1,
+       .ctx_arg_info           = {
+               { offsetof(struct bpf_iter__unix, unix_sk),
+                 PTR_TO_BTF_ID_OR_NULL },
+       },
+       .seq_info               = &unix_seq_info,
+};
+
+static void __init bpf_iter_register(void)
+{
+       unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
+       if (bpf_iter_reg_target(&unix_reg_info))
+               pr_warn("Warning: could not register bpf iterator unix\n");
+}
+#endif
+
 static int __init af_unix_init(void)
 {
        int rc = -1;
 
        BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
 
-       rc = proto_register(&unix_proto, 1);
+       rc = proto_register(&unix_dgram_proto, 1);
+       if (rc != 0) {
+               pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
+               goto out;
+       }
+
+       rc = proto_register(&unix_stream_proto, 1);
        if (rc != 0) {
                pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
                goto out;
@@ -2972,6 +3364,12 @@ static int __init af_unix_init(void)
 
        sock_register(&unix_family_ops);
        register_pernet_subsys(&unix_net_ops);
+       unix_bpf_build_proto();
+
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+       bpf_iter_register();
+#endif
+
 out:
        return rc;
 }
@@ -2979,7 +3377,8 @@ out:
 static void __exit af_unix_exit(void)
 {
        sock_unregister(PF_UNIX);
-       proto_unregister(&unix_proto);
+       proto_unregister(&unix_dgram_proto);
+       proto_unregister(&unix_stream_proto);
        unregister_pernet_subsys(&unix_net_ops);
 }
 
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
new file mode 100644 (file)
index 0000000..b927e2b
--- /dev/null
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
+
+#include <linux/skmsg.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <net/af_unix.h>
+
+#define unix_sk_has_data(__sk, __psock)                                        \
+               ({      !skb_queue_empty(&__sk->sk_receive_queue) ||    \
+                       !skb_queue_empty(&__psock->ingress_skb) ||      \
+                       !list_empty(&__psock->ingress_msg);             \
+               })
+
+static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
+                             long timeo)
+{
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
+       struct unix_sock *u = unix_sk(sk);
+       int ret = 0;
+
+       if (sk->sk_shutdown & RCV_SHUTDOWN)
+               return 1;
+
+       if (!timeo)
+               return ret;
+
+       add_wait_queue(sk_sleep(sk), &wait);
+       sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+       if (!unix_sk_has_data(sk, psock)) {
+               mutex_unlock(&u->iolock);
+               wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+               mutex_lock(&u->iolock);
+               ret = unix_sk_has_data(sk, psock);
+       }
+       sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+       remove_wait_queue(sk_sleep(sk), &wait);
+       return ret;
+}
+
+static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
+                         size_t len, int flags)
+{
+       if (sk->sk_type == SOCK_DGRAM)
+               return __unix_dgram_recvmsg(sk, msg, len, flags);
+       else
+               return __unix_stream_recvmsg(sk, msg, len, flags);
+}
+
+static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
+                           size_t len, int nonblock, int flags,
+                           int *addr_len)
+{
+       struct unix_sock *u = unix_sk(sk);
+       struct sk_psock *psock;
+       int copied;
+
+       psock = sk_psock_get(sk);
+       if (unlikely(!psock))
+               return __unix_recvmsg(sk, msg, len, flags);
+
+       mutex_lock(&u->iolock);
+       if (!skb_queue_empty(&sk->sk_receive_queue) &&
+           sk_psock_queue_empty(psock)) {
+               mutex_unlock(&u->iolock);
+               sk_psock_put(sk, psock);
+               return __unix_recvmsg(sk, msg, len, flags);
+       }
+
+msg_bytes_ready:
+       copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+       if (!copied) {
+               long timeo;
+               int data;
+
+               timeo = sock_rcvtimeo(sk, nonblock);
+               data = unix_msg_wait_data(sk, psock, timeo);
+               if (data) {
+                       if (!sk_psock_queue_empty(psock))
+                               goto msg_bytes_ready;
+                       mutex_unlock(&u->iolock);
+                       sk_psock_put(sk, psock);
+                       return __unix_recvmsg(sk, msg, len, flags);
+               }
+               copied = -EAGAIN;
+       }
+       mutex_unlock(&u->iolock);
+       sk_psock_put(sk, psock);
+       return copied;
+}
+
+static struct proto *unix_dgram_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(unix_dgram_prot_lock);
+static struct proto unix_dgram_bpf_prot;
+
+static struct proto *unix_stream_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(unix_stream_prot_lock);
+static struct proto unix_stream_bpf_prot;
+
+static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
+{
+       *prot        = *base;
+       prot->close  = sock_map_close;
+       prot->recvmsg = unix_bpf_recvmsg;
+}
+
+static void unix_stream_bpf_rebuild_protos(struct proto *prot,
+                                          const struct proto *base)
+{
+       *prot        = *base;
+       prot->close  = sock_map_close;
+       prot->recvmsg = unix_bpf_recvmsg;
+       prot->unhash  = sock_map_unhash;
+}
+
+static void unix_dgram_bpf_check_needs_rebuild(struct proto *ops)
+{
+       if (unlikely(ops != smp_load_acquire(&unix_dgram_prot_saved))) {
+               spin_lock_bh(&unix_dgram_prot_lock);
+               if (likely(ops != unix_dgram_prot_saved)) {
+                       unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, ops);
+                       smp_store_release(&unix_dgram_prot_saved, ops);
+               }
+               spin_unlock_bh(&unix_dgram_prot_lock);
+       }
+}
+
+static void unix_stream_bpf_check_needs_rebuild(struct proto *ops)
+{
+       if (unlikely(ops != smp_load_acquire(&unix_stream_prot_saved))) {
+               spin_lock_bh(&unix_stream_prot_lock);
+               if (likely(ops != unix_stream_prot_saved)) {
+                       unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, ops);
+                       smp_store_release(&unix_stream_prot_saved, ops);
+               }
+               spin_unlock_bh(&unix_stream_prot_lock);
+       }
+}
+
+int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+{
+       if (sk->sk_type != SOCK_DGRAM)
+               return -EOPNOTSUPP;
+
+       if (restore) {
+               sk->sk_write_space = psock->saved_write_space;
+               WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+               return 0;
+       }
+
+       unix_dgram_bpf_check_needs_rebuild(psock->sk_proto);
+       WRITE_ONCE(sk->sk_prot, &unix_dgram_bpf_prot);
+       return 0;
+}
+
+int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+{
+       if (restore) {
+               sk->sk_write_space = psock->saved_write_space;
+               WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+               return 0;
+       }
+
+       unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
+       WRITE_ONCE(sk->sk_prot, &unix_stream_bpf_prot);
+       return 0;
+}
+
+void __init unix_bpf_build_proto(void)
+{
+       unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, &unix_dgram_proto);
+       unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, &unix_stream_proto);
+
+}
index 16c88be..bf7cd47 100644 (file)
@@ -759,6 +759,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
        [NL80211_ATTR_RECONNECT_REQUESTED] = { .type = NLA_REJECT },
        [NL80211_ATTR_SAR_SPEC] = NLA_POLICY_NESTED(sar_policy),
        [NL80211_ATTR_DISABLE_HE] = { .type = NLA_FLAG },
+       [NL80211_ATTR_OBSS_COLOR_BITMAP] = { .type = NLA_U64 },
+       [NL80211_ATTR_COLOR_CHANGE_COUNT] = { .type = NLA_U8 },
+       [NL80211_ATTR_COLOR_CHANGE_COLOR] = { .type = NLA_U8 },
+       [NL80211_ATTR_COLOR_CHANGE_ELEMS] = NLA_POLICY_NESTED(nl80211_policy),
 };
 
 /* policy for the key attributes */
@@ -6527,8 +6531,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
        err = rdev_change_station(rdev, dev, mac_addr, &params);
 
  out_put_vlan:
-       if (params.vlan)
-               dev_put(params.vlan);
+       dev_put(params.vlan);
 
        return err;
 }
@@ -6763,8 +6766,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 
        err = rdev_add_station(rdev, dev, mac_addr, &params);
 
-       if (params.vlan)
-               dev_put(params.vlan);
+       dev_put(params.vlan);
        return err;
 }
 
@@ -8489,8 +8491,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
                goto out_free;
 
        nl80211_send_scan_start(rdev, wdev);
-       if (wdev->netdev)
-               dev_hold(wdev->netdev);
+       dev_hold(wdev->netdev);
 
        return 0;
 
@@ -14803,6 +14804,106 @@ bad_tid_conf:
        return ret;
 }
 
+static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info)
+{
+       struct cfg80211_registered_device *rdev = info->user_ptr[0];
+       struct cfg80211_color_change_settings params = {};
+       struct net_device *dev = info->user_ptr[1];
+       struct wireless_dev *wdev = dev->ieee80211_ptr;
+       struct nlattr **tb;
+       u16 offset;
+       int err;
+
+       if (!rdev->ops->color_change)
+               return -EOPNOTSUPP;
+
+       if (!wiphy_ext_feature_isset(&rdev->wiphy,
+                                    NL80211_EXT_FEATURE_BSS_COLOR))
+               return -EOPNOTSUPP;
+
+       if (wdev->iftype != NL80211_IFTYPE_AP)
+               return -EOPNOTSUPP;
+
+       if (!info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT] ||
+           !info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR] ||
+           !info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS])
+               return -EINVAL;
+
+       params.count = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT]);
+       params.color = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR]);
+
+       err = nl80211_parse_beacon(rdev, info->attrs, &params.beacon_next);
+       if (err)
+               return err;
+
+       tb = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*tb), GFP_KERNEL);
+       if (!tb)
+               return -ENOMEM;
+
+       err = nla_parse_nested(tb, NL80211_ATTR_MAX,
+                              info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS],
+                              nl80211_policy, info->extack);
+       if (err)
+               goto out;
+
+       err = nl80211_parse_beacon(rdev, tb, &params.beacon_color_change);
+       if (err)
+               goto out;
+
+       if (!tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) != sizeof(u16)) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
+       if (offset >= params.beacon_color_change.tail_len) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       if (params.beacon_color_change.tail[offset] != params.count) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       params.counter_offset_beacon = offset;
+
+       if (tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) {
+               if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) !=
+                   sizeof(u16)) {
+                       err = -EINVAL;
+                       goto out;
+               }
+
+               offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
+               if (offset >= params.beacon_color_change.probe_resp_len) {
+                       err = -EINVAL;
+                       goto out;
+               }
+
+               if (params.beacon_color_change.probe_resp[offset] !=
+                   params.count) {
+                       err = -EINVAL;
+                       goto out;
+               }
+
+               params.counter_offset_presp = offset;
+       }
+
+       wdev_lock(wdev);
+       err = rdev_color_change(rdev, dev, &params);
+       wdev_unlock(wdev);
+
+out:
+       kfree(tb);
+       return err;
+}
+
 #define NL80211_FLAG_NEED_WIPHY                0x01
 #define NL80211_FLAG_NEED_NETDEV       0x02
 #define NL80211_FLAG_NEED_RTNL         0x04
@@ -14860,9 +14961,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
                        return -ENETDOWN;
                }
 
-               if (dev)
-                       dev_hold(dev);
-
+               dev_hold(dev);
                info->user_ptr[0] = rdev;
        }
 
@@ -14884,8 +14983,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
                if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
                        struct wireless_dev *wdev = info->user_ptr[1];
 
-                       if (wdev->netdev)
-                               dev_put(wdev->netdev);
+                       dev_put(wdev->netdev);
                } else {
                        dev_put(info->user_ptr[1]);
                }
@@ -15801,6 +15899,14 @@ static const struct genl_small_ops nl80211_small_ops[] = {
                .internal_flags = NL80211_FLAG_NEED_WIPHY |
                                  NL80211_FLAG_NEED_RTNL,
        },
+       {
+               .cmd = NL80211_CMD_COLOR_CHANGE_REQUEST,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .doit = nl80211_color_change,
+               .flags = GENL_UNS_ADMIN_PERM,
+               .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+                                 NL80211_FLAG_NEED_RTNL,
+       },
 };
 
 static struct genl_family nl80211_fam __ro_after_init = {
@@ -17430,6 +17536,51 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev,
 }
 EXPORT_SYMBOL(cfg80211_ch_switch_started_notify);
 
+int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
+                             enum nl80211_commands cmd, u8 count,
+                             u64 color_bitmap)
+{
+       struct wireless_dev *wdev = dev->ieee80211_ptr;
+       struct wiphy *wiphy = wdev->wiphy;
+       struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+       struct sk_buff *msg;
+       void *hdr;
+
+       ASSERT_WDEV_LOCK(wdev);
+
+       trace_cfg80211_bss_color_notify(dev, cmd, count, color_bitmap);
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+       if (!msg)
+               return -ENOMEM;
+
+       hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
+       if (!hdr)
+               goto nla_put_failure;
+
+       if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
+               goto nla_put_failure;
+
+       if (cmd == NL80211_CMD_COLOR_CHANGE_STARTED &&
+           nla_put_u32(msg, NL80211_ATTR_COLOR_CHANGE_COUNT, count))
+               goto nla_put_failure;
+
+       if (cmd == NL80211_CMD_OBSS_COLOR_COLLISION &&
+           nla_put_u64_64bit(msg, NL80211_ATTR_OBSS_COLOR_BITMAP,
+                             color_bitmap, NL80211_ATTR_PAD))
+               goto nla_put_failure;
+
+       genlmsg_end(msg, hdr);
+
+       return genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
+                                      msg, 0, NL80211_MCGRP_MLME, gfp);
+
+nla_put_failure:
+       nlmsg_free(msg);
+       return -EINVAL;
+}
+EXPORT_SYMBOL(cfg80211_bss_color_notify);
+
 void
 nl80211_radar_notify(struct cfg80211_registered_device *rdev,
                     const struct cfg80211_chan_def *chandef,
index 36f1b59..ae2e1a8 100644 (file)
@@ -115,23 +115,22 @@ int ieee80211_radiotap_iterator_init(
        iterator->_max_length = get_unaligned_le16(&radiotap_header->it_len);
        iterator->_arg_index = 0;
        iterator->_bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present);
-       iterator->_arg = (uint8_t *)radiotap_header + sizeof(*radiotap_header);
+       iterator->_arg = (uint8_t *)radiotap_header->it_optional;
        iterator->_reset_on_ext = 0;
-       iterator->_next_bitmap = &radiotap_header->it_present;
-       iterator->_next_bitmap++;
+       iterator->_next_bitmap = radiotap_header->it_optional;
        iterator->_vns = vns;
        iterator->current_namespace = &radiotap_ns;
        iterator->is_radiotap_ns = 1;
 
        /* find payload start allowing for extended bitmap(s) */
 
-       if (iterator->_bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT)) {
+       if (iterator->_bitmap_shifter & (BIT(IEEE80211_RADIOTAP_EXT))) {
                if ((unsigned long)iterator->_arg -
                    (unsigned long)iterator->_rtheader + sizeof(uint32_t) >
                    (unsigned long)iterator->_max_length)
                        return -EINVAL;
                while (get_unaligned_le32(iterator->_arg) &
-                                       (1 << IEEE80211_RADIOTAP_EXT)) {
+                                       (BIT(IEEE80211_RADIOTAP_EXT))) {
                        iterator->_arg += sizeof(uint32_t);
 
                        /*
index b1d37f5..ce6bf21 100644 (file)
@@ -1368,4 +1368,17 @@ static inline int rdev_set_sar_specs(struct cfg80211_registered_device *rdev,
        return ret;
 }
 
+static inline int rdev_color_change(struct cfg80211_registered_device *rdev,
+                                   struct net_device *dev,
+                                   struct cfg80211_color_change_settings *params)
+{
+       int ret;
+
+       trace_rdev_color_change(&rdev->wiphy, dev, params);
+       ret = rdev->ops->color_change(&rdev->wiphy, dev, params);
+       trace_rdev_return_int(&rdev->wiphy, ret);
+
+       return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
index c2d0ff7..df87c7f 100644 (file)
@@ -171,9 +171,11 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
 {
        const struct ieee80211_regdomain *regd = NULL;
        const struct ieee80211_regdomain *wiphy_regd = NULL;
+       enum nl80211_dfs_regions dfs_region;
 
        rcu_read_lock();
        regd = get_cfg80211_regdom();
+       dfs_region = regd->dfs_region;
 
        if (!wiphy)
                goto out;
@@ -182,6 +184,11 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
        if (!wiphy_regd)
                goto out;
 
+       if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
+               dfs_region = wiphy_regd->dfs_region;
+               goto out;
+       }
+
        if (wiphy_regd->dfs_region == regd->dfs_region)
                goto out;
 
@@ -193,7 +200,7 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
 out:
        rcu_read_unlock();
 
-       return regd->dfs_region;
+       return dfs_region;
 }
 
 static void rcu_free_regdom(const struct ieee80211_regdomain *r)
index 7897b14..11c68b1 100644 (file)
@@ -975,8 +975,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
        }
 #endif
 
-       if (wdev->netdev)
-               dev_put(wdev->netdev);
+       dev_put(wdev->netdev);
 
        kfree(rdev->int_scan_req);
        rdev->int_scan_req = NULL;
index 440bce5..19b78d4 100644 (file)
@@ -3597,6 +3597,52 @@ TRACE_EVENT(rdev_set_sar_specs,
                  WIPHY_PR_ARG, __entry->type, __entry->num)
 );
 
+TRACE_EVENT(rdev_color_change,
+       TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+                struct cfg80211_color_change_settings *params),
+       TP_ARGS(wiphy, netdev, params),
+       TP_STRUCT__entry(
+               WIPHY_ENTRY
+               NETDEV_ENTRY
+               __field(u8, count)
+               __field(u16, bcn_ofs)
+               __field(u16, pres_ofs)
+       ),
+       TP_fast_assign(
+               WIPHY_ASSIGN;
+               NETDEV_ASSIGN;
+               __entry->count = params->count;
+               __entry->bcn_ofs = params->counter_offset_beacon;
+               __entry->pres_ofs = params->counter_offset_presp;
+       ),
+       TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
+                 ", count: %u",
+                 WIPHY_PR_ARG, NETDEV_PR_ARG,
+                 __entry->count)
+);
+
+TRACE_EVENT(cfg80211_bss_color_notify,
+       TP_PROTO(struct net_device *netdev,
+                enum nl80211_commands cmd,
+                u8 count, u64 color_bitmap),
+       TP_ARGS(netdev, cmd, count, color_bitmap),
+       TP_STRUCT__entry(
+               NETDEV_ENTRY
+               __field(u32, cmd)
+               __field(u8, count)
+               __field(u64, color_bitmap)
+       ),
+       TP_fast_assign(
+               NETDEV_ASSIGN;
+               __entry->cmd = cmd;
+               __entry->count = count;
+               __entry->color_bitmap = color_bitmap;
+       ),
+       TP_printk(NETDEV_PR_FMT ", cmd: %x, count: %u, bitmap: %llx",
+                 NETDEV_PR_ARG, __entry->cmd, __entry->count,
+                 __entry->color_bitmap)
+);
+
 #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
index 7f881f5..37d17a7 100644 (file)
@@ -3157,6 +3157,11 @@ ok:
        return dst;
 
 nopol:
+       if (!(dst_orig->dev->flags & IFF_LOOPBACK) &&
+           !xfrm_default_allow(net, dir)) {
+               err = -EPERM;
+               goto error;
+       }
        if (!(flags & XFRM_LOOKUP_ICMP)) {
                dst = dst_orig;
                goto ok;
@@ -3545,6 +3550,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
        }
 
        if (!pol) {
+               if (!xfrm_default_allow(net, dir)) {
+                       XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
+                       return 0;
+               }
+
                if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
                        xfrm_secpath_reject(xerr_idx, skb, &fl);
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3599,6 +3609,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
                                tpp[ti++] = &pols[pi]->xfrm_vec[i];
                }
                xfrm_nr = ti;
+
+               if (!xfrm_default_allow(net, dir) && !xfrm_nr) {
+                       XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
+                       goto reject;
+               }
+
                if (npols > 1) {
                        xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
                        tpp = stp;
index 7aff641..03b66d1 100644 (file)
@@ -1961,6 +1961,59 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
        return skb;
 }
 
+static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh,
+                           struct nlattr **attrs)
+{
+       struct net *net = sock_net(skb->sk);
+       struct xfrm_userpolicy_default *up = nlmsg_data(nlh);
+       u8 dirmask;
+       u8 old_default = net->xfrm.policy_default;
+
+       if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX)
+               return -EINVAL;
+
+       dirmask = (1 << up->dirmask) & XFRM_POL_DEFAULT_MASK;
+
+       net->xfrm.policy_default = (old_default & (0xff ^ dirmask))
+                                   | (up->action << up->dirmask);
+
+       rt_genid_bump_all(net);
+
+       return 0;
+}
+
+static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
+                           struct nlattr **attrs)
+{
+       struct sk_buff *r_skb;
+       struct nlmsghdr *r_nlh;
+       struct net *net = sock_net(skb->sk);
+       struct xfrm_userpolicy_default *r_up, *up;
+       int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default));
+       u32 portid = NETLINK_CB(skb).portid;
+       u32 seq = nlh->nlmsg_seq;
+
+       up = nlmsg_data(nlh);
+
+       r_skb = nlmsg_new(len, GFP_ATOMIC);
+       if (!r_skb)
+               return -ENOMEM;
+
+       r_nlh = nlmsg_put(r_skb, portid, seq, XFRM_MSG_GETDEFAULT, sizeof(*r_up), 0);
+       if (!r_nlh) {
+               kfree_skb(r_skb);
+               return -EMSGSIZE;
+       }
+
+       r_up = nlmsg_data(r_nlh);
+
+       r_up->action = ((net->xfrm.policy_default & (1 << up->dirmask)) >> up->dirmask);
+       r_up->dirmask = up->dirmask;
+       nlmsg_end(r_skb, r_nlh);
+
+       return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid);
+}
+
 static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
                struct nlattr **attrs)
 {
@@ -2664,6 +2717,8 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
        [XFRM_MSG_GETSADINFO  - XFRM_MSG_BASE] = sizeof(u32),
        [XFRM_MSG_NEWSPDINFO  - XFRM_MSG_BASE] = sizeof(u32),
        [XFRM_MSG_GETSPDINFO  - XFRM_MSG_BASE] = sizeof(u32),
+       [XFRM_MSG_SETDEFAULT  - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
+       [XFRM_MSG_GETDEFAULT  - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
 };
 EXPORT_SYMBOL_GPL(xfrm_msg_min);
 
@@ -2743,6 +2798,8 @@ static const struct xfrm_link {
                                                   .nla_pol = xfrma_spd_policy,
                                                   .nla_max = XFRMA_SPD_MAX },
        [XFRM_MSG_GETSPDINFO  - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo   },
+       [XFRM_MSG_SETDEFAULT  - XFRM_MSG_BASE] = { .doit = xfrm_set_default   },
+       [XFRM_MSG_GETDEFAULT  - XFRM_MSG_BASE] = { .doit = xfrm_get_default   },
 };
 
 static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
index 0b9548e..fcba217 100644 (file)
@@ -45,11 +45,13 @@ xdp_monitor
 xdp_redirect
 xdp_redirect_cpu
 xdp_redirect_map
+xdp_redirect_map_multi
 xdp_router_ipv4
 xdp_rxq_info
 xdp_sample_pkts
 xdp_tx_iptunnel
 xdpsock
+xdpsock_ctrl_proc
 xsk_fwd
 testfile.img
 hbm_out.log
index 036998d..4dc20be 100644 (file)
@@ -39,11 +39,6 @@ tprogs-y += lwt_len_hist
 tprogs-y += xdp_tx_iptunnel
 tprogs-y += test_map_in_map
 tprogs-y += per_socket_stats_example
-tprogs-y += xdp_redirect
-tprogs-y += xdp_redirect_map
-tprogs-y += xdp_redirect_map_multi
-tprogs-y += xdp_redirect_cpu
-tprogs-y += xdp_monitor
 tprogs-y += xdp_rxq_info
 tprogs-y += syscall_tp
 tprogs-y += cpustat
@@ -57,11 +52,18 @@ tprogs-y += xdp_sample_pkts
 tprogs-y += ibumad
 tprogs-y += hbm
 
+tprogs-y += xdp_redirect_cpu
+tprogs-y += xdp_redirect_map_multi
+tprogs-y += xdp_redirect_map
+tprogs-y += xdp_redirect
+tprogs-y += xdp_monitor
+
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
 
 CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
 TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
+XDP_SAMPLE := xdp_sample_user.o
 
 fds_example-objs := fds_example.o
 sockex1-objs := sockex1_user.o
@@ -98,11 +100,6 @@ lwt_len_hist-objs := lwt_len_hist_user.o
 xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o
 test_map_in_map-objs := test_map_in_map_user.o
 per_socket_stats_example-objs := cookie_uid_helper_example.o
-xdp_redirect-objs := xdp_redirect_user.o
-xdp_redirect_map-objs := xdp_redirect_map_user.o
-xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o
-xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o
-xdp_monitor-objs := xdp_monitor_user.o
 xdp_rxq_info-objs := xdp_rxq_info_user.o
 syscall_tp-objs := syscall_tp_user.o
 cpustat-objs := cpustat_user.o
@@ -116,6 +113,12 @@ xdp_sample_pkts-objs := xdp_sample_pkts_user.o
 ibumad-objs := ibumad_user.o
 hbm-objs := hbm.o $(CGROUP_HELPERS)
 
+xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o $(XDP_SAMPLE)
+xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE)
+xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE)
+xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE)
+xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE)
+
 # Tell kbuild to always build the programs
 always-y := $(tprogs-y)
 always-y += sockex1_kern.o
@@ -160,11 +163,6 @@ always-y += tcp_clamp_kern.o
 always-y += tcp_basertt_kern.o
 always-y += tcp_tos_reflect_kern.o
 always-y += tcp_dumpstats_kern.o
-always-y += xdp_redirect_kern.o
-always-y += xdp_redirect_map_kern.o
-always-y += xdp_redirect_map_multi_kern.o
-always-y += xdp_redirect_cpu_kern.o
-always-y += xdp_monitor_kern.o
 always-y += xdp_rxq_info_kern.o
 always-y += xdp2skb_meta_kern.o
 always-y += syscall_tp_kern.o
@@ -276,6 +274,11 @@ $(LIBBPF): FORCE
        $(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
                LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O=
 
+BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool
+BPFTOOL := $(BPFTOOLDIR)/bpftool
+$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)
+           $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../
+
 $(obj)/syscall_nrs.h:  $(obj)/syscall_nrs.s FORCE
        $(call filechk,offsets,__SYSCALL_NRS_H__)
 
@@ -306,6 +309,12 @@ verify_target_bpf: verify_cmds
 $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF)
 $(src)/*.c: verify_target_bpf $(LIBBPF)
 
+$(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h
+$(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h
+$(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h
+$(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h
+$(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h
+
 $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
 $(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
 $(obj)/hbm.o: $(src)/hbm.h
@@ -313,6 +322,76 @@ $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
 
 -include $(BPF_SAMPLES_PATH)/Makefile.target
 
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)                           \
+                    $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)    \
+                    ../../../../vmlinux                                \
+                    /sys/kernel/btf/vmlinux                            \
+                    /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
+ifeq ($(VMLINUX_BTF),)
+$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
+endif
+
+$(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
+ifeq ($(VMLINUX_H),)
+       $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+       $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+clean-files += vmlinux.h
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) -v -E - </dev/null 2>&1 \
+        | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/')
+endef
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
+
+$(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect_map_multi.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o
+
+$(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h
+       @echo "  CLANG-BPF " $@
+       $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(SRCARCH) \
+               -Wno-compare-distinct-pointer-types -I$(srctree)/include \
+               -I$(srctree)/samples/bpf -I$(srctree)/tools/include \
+               -I$(srctree)/tools/lib $(CLANG_SYS_INCLUDES) \
+               -c $(filter %.bpf.c,$^) -o $@
+
+LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \
+               xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h
+clean-files += $(LINKED_SKELS)
+
+xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o
+xdp_redirect_map_multi.skel.h-deps := xdp_redirect_map_multi.bpf.o xdp_sample.bpf.o
+xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o
+xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o
+xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o
+
+LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
+
+BPF_SRCS_LINKED := $(notdir $(wildcard $(src)/*.bpf.c))
+BPF_OBJS_LINKED := $(patsubst %.bpf.c,$(obj)/%.bpf.o, $(BPF_SRCS_LINKED))
+BPF_SKELS_LINKED := $(addprefix $(obj)/,$(LINKED_SKELS))
+
+$(BPF_SKELS_LINKED): $(BPF_OBJS_LINKED) $(BPFTOOL)
+       @echo "  BPF GEN-OBJ " $(@:.skel.h=)
+       $(Q)$(BPFTOOL) gen object $(@:.skel.h=.lbpf.o) $(addprefix $(obj)/,$($(@F)-deps))
+       @echo "  BPF GEN-SKEL" $(@:.skel.h=)
+       $(Q)$(BPFTOOL) gen skeleton $(@:.skel.h=.lbpf.o) name $(notdir $(@:.skel.h=)) > $@
+
 # asm/sysreg.h - inline assembly used by it is incompatible with llvm.
 # But, there is no easy way to fix it, so just exclude it since it is
 # useless for BPF samples.
index 7621f55..5a368af 100644 (file)
@@ -73,3 +73,14 @@ quiet_cmd_tprog-cobjs        = CC  $@
       cmd_tprog-cobjs  = $(CC) $(tprogc_flags) -c -o $@ $<
 $(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE
        $(call if_changed_dep,tprog-cobjs)
+
+# Override includes for xdp_sample_user.o because $(srctree)/usr/include in
+# TPROGS_CFLAGS causes conflicts
+XDP_SAMPLE_CFLAGS += -Wall -O2 -lm \
+                    -I./tools/include \
+                    -I./tools/include/uapi \
+                    -I./tools/lib \
+                    -I./tools/testing/selftests/bpf
+$(obj)/xdp_sample_user.o: $(src)/xdp_sample_user.c \
+       $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h
+       $(CC) $(XDP_SAMPLE_CFLAGS) -c -o $@ $<
index cc3bce8..5495880 100644 (file)
@@ -167,7 +167,7 @@ static void prog_load(void)
 static void prog_attach_iptables(char *file)
 {
        int ret;
-       char rules[100];
+       char rules[256];
 
        if (bpf_obj_pin(prog_fd, file))
                error(1, errno, "bpf_obj_pin");
@@ -175,8 +175,13 @@ static void prog_attach_iptables(char *file)
                printf("file path too long: %s\n", file);
                exit(1);
        }
-       sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT",
-               file);
+       ret = snprintf(rules, sizeof(rules),
+                      "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT",
+                      file);
+       if (ret < 0 || ret >= sizeof(rules)) {
+               printf("error constructing iptables command\n");
+               exit(1);
+       }
        ret = system(rules);
        if (ret < 0) {
                printf("iptables rule update failed: %d/n", WEXITSTATUS(ret));
index 14b7929..4866afd 100644 (file)
@@ -20,6 +20,7 @@
        })
 
 #define MINBLOCK_US    1
+#define MAX_ENTRIES    10000
 
 struct key_t {
        char waker[TASK_COMM_LEN];
@@ -32,14 +33,14 @@ struct {
        __uint(type, BPF_MAP_TYPE_HASH);
        __type(key, struct key_t);
        __type(value, u64);
-       __uint(max_entries, 10000);
+       __uint(max_entries, MAX_ENTRIES);
 } counts SEC(".maps");
 
 struct {
        __uint(type, BPF_MAP_TYPE_HASH);
        __type(key, u32);
        __type(value, u64);
-       __uint(max_entries, 10000);
+       __uint(max_entries, MAX_ENTRIES);
 } start SEC(".maps");
 
 struct wokeby_t {
@@ -51,14 +52,14 @@ struct {
        __uint(type, BPF_MAP_TYPE_HASH);
        __type(key, u32);
        __type(value, struct wokeby_t);
-       __uint(max_entries, 10000);
+       __uint(max_entries, MAX_ENTRIES);
 } wokeby SEC(".maps");
 
 struct {
        __uint(type, BPF_MAP_TYPE_STACK_TRACE);
        __uint(key_size, sizeof(u32));
        __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
-       __uint(max_entries, 10000);
+       __uint(max_entries, MAX_ENTRIES);
 } stackmap SEC(".maps");
 
 #define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
index e68b9ee..35db26f 100755 (executable)
@@ -1,5 +1,6 @@
 #!/bin/bash
 
+rm -r tmpmnt
 rm -f testfile.img
 dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
 DEVICE=$(losetup --show -f testfile.img)
index cea3994..566e644 100644 (file)
@@ -32,7 +32,7 @@ static void print_old_objects(int fd)
        __u64 key, next_key;
        struct pair v;
 
-       key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
+       key = write(1, "\e[1;1H\e[2J", 11); /* clear screen */
 
        key = -1;
        while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
index fdcd658..8be7ce1 100644 (file)
@@ -14,6 +14,11 @@ int main(int argc, char **argv)
        int ret = 0;
        FILE *f;
 
+       if (!argv[1]) {
+               fprintf(stderr, "ERROR: Run with the btrfs device argument!\n");
+               return 0;
+       }
+
        snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
        obj = bpf_object__open_file(filename, NULL);
        if (libbpf_get_error(obj)) {
index 34b6439..f0c5d95 100644 (file)
@@ -57,6 +57,7 @@ int xdp_prog1(struct xdp_md *ctx)
 
        h_proto = eth->h_proto;
 
+       /* Handle VLAN tagged packet */
        if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                struct vlan_hdr *vhdr;
 
@@ -66,6 +67,7 @@ int xdp_prog1(struct xdp_md *ctx)
                        return rc;
                h_proto = vhdr->h_vlan_encapsulated_proto;
        }
+       /* Handle double VLAN tagged packet */
        if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                struct vlan_hdr *vhdr;
 
index c787f4b..d8a64ab 100644 (file)
@@ -73,6 +73,7 @@ int xdp_prog1(struct xdp_md *ctx)
 
        h_proto = eth->h_proto;
 
+       /* Handle VLAN tagged packet */
        if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                struct vlan_hdr *vhdr;
 
@@ -82,6 +83,7 @@ int xdp_prog1(struct xdp_md *ctx)
                        return rc;
                h_proto = vhdr->h_vlan_encapsulated_proto;
        }
+       /* Handle double VLAN tagged packet */
        if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                struct vlan_hdr *vhdr;
 
diff --git a/samples/bpf/xdp_monitor.bpf.c b/samples/bpf/xdp_monitor.bpf.c
new file mode 100644 (file)
index 0000000..cfb41e2
--- /dev/null
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
+ *
+ * XDP monitor tool, based on tracepoints
+ */
+#include "xdp_sample.bpf.h"
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
deleted file mode 100644 (file)
index 5c955b8..0000000
+++ /dev/null
@@ -1,257 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *  Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
- *
- * XDP monitor tool, based on tracepoints
- */
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, u64);
-       __uint(max_entries, 2);
-       /* TODO: have entries for all possible errno's */
-} redirect_err_cnt SEC(".maps");
-
-#define XDP_UNKNOWN    XDP_REDIRECT + 1
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, u64);
-       __uint(max_entries, XDP_UNKNOWN + 1);
-} exception_cnt SEC(".maps");
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
- * Code in:                kernel/include/trace/events/xdp.h
- */
-struct xdp_redirect_ctx {
-       u64 __pad;              // First 8 bytes are not accessible by bpf code
-       int prog_id;            //      offset:8;  size:4; signed:1;
-       u32 act;                //      offset:12  size:4; signed:0;
-       int ifindex;            //      offset:16  size:4; signed:1;
-       int err;                //      offset:20  size:4; signed:1;
-       int to_ifindex;         //      offset:24  size:4; signed:1;
-       u32 map_id;             //      offset:28  size:4; signed:0;
-       int map_index;          //      offset:32  size:4; signed:1;
-};                             //      offset:36
-
-enum {
-       XDP_REDIRECT_SUCCESS = 0,
-       XDP_REDIRECT_ERROR = 1
-};
-
-static __always_inline
-int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
-{
-       u32 key = XDP_REDIRECT_ERROR;
-       int err = ctx->err;
-       u64 *cnt;
-
-       if (!err)
-               key = XDP_REDIRECT_SUCCESS;
-
-       cnt  = bpf_map_lookup_elem(&redirect_err_cnt, &key);
-       if (!cnt)
-               return 1;
-       *cnt += 1;
-
-       return 0; /* Indicate event was filtered (no further processing)*/
-       /*
-        * Returning 1 here would allow e.g. a perf-record tracepoint
-        * to see and record these events, but it doesn't work well
-        * in-practice as stopping perf-record also unload this
-        * bpf_prog.  Plus, there is additional overhead of doing so.
-        */
-}
-
-SEC("tracepoint/xdp/xdp_redirect_err")
-int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
-{
-       return xdp_redirect_collect_stat(ctx);
-}
-
-
-SEC("tracepoint/xdp/xdp_redirect_map_err")
-int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
-{
-       return xdp_redirect_collect_stat(ctx);
-}
-
-/* Likely unloaded when prog starts */
-SEC("tracepoint/xdp/xdp_redirect")
-int trace_xdp_redirect(struct xdp_redirect_ctx *ctx)
-{
-       return xdp_redirect_collect_stat(ctx);
-}
-
-/* Likely unloaded when prog starts */
-SEC("tracepoint/xdp/xdp_redirect_map")
-int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx)
-{
-       return xdp_redirect_collect_stat(ctx);
-}
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
- * Code in:                kernel/include/trace/events/xdp.h
- */
-struct xdp_exception_ctx {
-       u64 __pad;      // First 8 bytes are not accessible by bpf code
-       int prog_id;    //      offset:8;  size:4; signed:1;
-       u32 act;        //      offset:12; size:4; signed:0;
-       int ifindex;    //      offset:16; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_exception")
-int trace_xdp_exception(struct xdp_exception_ctx *ctx)
-{
-       u64 *cnt;
-       u32 key;
-
-       key = ctx->act;
-       if (key > XDP_REDIRECT)
-               key = XDP_UNKNOWN;
-
-       cnt = bpf_map_lookup_elem(&exception_cnt, &key);
-       if (!cnt)
-               return 1;
-       *cnt += 1;
-
-       return 0;
-}
-
-/* Common stats data record shared with _user.c */
-struct datarec {
-       u64 processed;
-       u64 dropped;
-       u64 info;
-       u64 err;
-};
-#define MAX_CPUS 64
-
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, MAX_CPUS);
-} cpumap_enqueue_cnt SEC(".maps");
-
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, 1);
-} cpumap_kthread_cnt SEC(".maps");
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
- * Code in:         kernel/include/trace/events/xdp.h
- */
-struct cpumap_enqueue_ctx {
-       u64 __pad;              // First 8 bytes are not accessible by bpf code
-       int map_id;             //      offset:8;  size:4; signed:1;
-       u32 act;                //      offset:12; size:4; signed:0;
-       int cpu;                //      offset:16; size:4; signed:1;
-       unsigned int drops;     //      offset:20; size:4; signed:0;
-       unsigned int processed; //      offset:24; size:4; signed:0;
-       int to_cpu;             //      offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_enqueue")
-int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
-{
-       u32 to_cpu = ctx->to_cpu;
-       struct datarec *rec;
-
-       if (to_cpu >= MAX_CPUS)
-               return 1;
-
-       rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
-       if (!rec)
-               return 0;
-       rec->processed += ctx->processed;
-       rec->dropped   += ctx->drops;
-
-       /* Record bulk events, then userspace can calc average bulk size */
-       if (ctx->processed > 0)
-               rec->info += 1;
-
-       return 0;
-}
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
- * Code in:         kernel/include/trace/events/xdp.h
- */
-struct cpumap_kthread_ctx {
-       u64 __pad;              // First 8 bytes are not accessible by bpf code
-       int map_id;             //      offset:8;  size:4; signed:1;
-       u32 act;                //      offset:12; size:4; signed:0;
-       int cpu;                //      offset:16; size:4; signed:1;
-       unsigned int drops;     //      offset:20; size:4; signed:0;
-       unsigned int processed; //      offset:24; size:4; signed:0;
-       int sched;              //      offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_kthread")
-int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
-{
-       struct datarec *rec;
-       u32 key = 0;
-
-       rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
-       if (!rec)
-               return 0;
-       rec->processed += ctx->processed;
-       rec->dropped   += ctx->drops;
-
-       /* Count times kthread yielded CPU via schedule call */
-       if (ctx->sched)
-               rec->info++;
-
-       return 0;
-}
-
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, 1);
-} devmap_xmit_cnt SEC(".maps");
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
- * Code in:         kernel/include/trace/events/xdp.h
- */
-struct devmap_xmit_ctx {
-       u64 __pad;              // First 8 bytes are not accessible by bpf code
-       int from_ifindex;       //      offset:8;  size:4; signed:1;
-       u32 act;                //      offset:12; size:4; signed:0;
-       int to_ifindex;         //      offset:16; size:4; signed:1;
-       int drops;              //      offset:20; size:4; signed:1;
-       int sent;               //      offset:24; size:4; signed:1;
-       int err;                //      offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_devmap_xmit")
-int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx)
-{
-       struct datarec *rec;
-       u32 key = 0;
-
-       rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key);
-       if (!rec)
-               return 0;
-       rec->processed += ctx->sent;
-       rec->dropped   += ctx->drops;
-
-       /* Record bulk events, then userspace can calc average bulk size */
-       rec->info += 1;
-
-       /* Record error cases, where no frame were sent */
-       if (ctx->err)
-               rec->err++;
-
-       /* Catch API error of drv ndo_xdp_xmit sent more than count */
-       if (ctx->drops < 0)
-               rec->err++;
-
-       return 1;
-}
index 49ebc49..fb9391a 100644 (file)
@@ -1,15 +1,12 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
- */
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
 static const char *__doc__=
- "XDP monitor tool, based on tracepoints\n"
-;
+"XDP monitor tool, based on tracepoints\n";
 
 static const char *__doc_err_only__=
- " NOTICE: Only tracking XDP redirect errors\n"
- "         Enable TX success stats via '--stats'\n"
- "         (which comes with a per packet processing overhead)\n"
-;
+" NOTICE: Only tracking XDP redirect errors\n"
+"         Enable redirect success stats via '-s/--stats'\n"
+"         (which comes with a per packet processing overhead)\n";
 
 #include <errno.h>
 #include <stdio.h>
@@ -20,768 +17,103 @@ static const char *__doc_err_only__=
 #include <ctype.h>
 #include <unistd.h>
 #include <locale.h>
-
 #include <sys/resource.h>
 #include <getopt.h>
 #include <net/if.h>
 #include <time.h>
-
 #include <signal.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 #include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_monitor.skel.h"
 
-enum map_type {
-       REDIRECT_ERR_CNT,
-       EXCEPTION_CNT,
-       CPUMAP_ENQUEUE_CNT,
-       CPUMAP_KTHREAD_CNT,
-       DEVMAP_XMIT_CNT,
-};
+static int mask = SAMPLE_REDIRECT_ERR_CNT | SAMPLE_CPUMAP_ENQUEUE_CNT |
+                 SAMPLE_CPUMAP_KTHREAD_CNT | SAMPLE_EXCEPTION_CNT |
+                 SAMPLE_DEVMAP_XMIT_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
 
-static const char *const map_type_strings[] = {
-       [REDIRECT_ERR_CNT] = "redirect_err_cnt",
-       [EXCEPTION_CNT] = "exception_cnt",
-       [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
-       [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
-       [DEVMAP_XMIT_CNT] = "devmap_xmit_cnt",
-};
-
-#define NUM_MAP 5
-#define NUM_TP 8
-
-static int tp_cnt;
-static int map_cnt;
-static int verbose = 1;
-static bool debug = false;
-struct bpf_map *map_data[NUM_MAP] = {};
-struct bpf_link *tp_links[NUM_TP] = {};
-struct bpf_object *obj;
+DEFINE_SAMPLE_INIT(xdp_monitor);
 
 static const struct option long_options[] = {
-       {"help",        no_argument,            NULL, 'h' },
-       {"debug",       no_argument,            NULL, 'D' },
-       {"stats",       no_argument,            NULL, 'S' },
-       {"sec",         required_argument,      NULL, 's' },
-       {0, 0, NULL,  0 }
-};
-
-static void int_exit(int sig)
-{
-       /* Detach tracepoints */
-       while (tp_cnt)
-               bpf_link__destroy(tp_links[--tp_cnt]);
-
-       bpf_object__close(obj);
-       exit(0);
-}
-
-/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
-#define EXIT_FAIL_MEM  5
-
-static void usage(char *argv[])
-{
-       int i;
-       printf("\nDOCUMENTATION:\n%s\n", __doc__);
-       printf("\n");
-       printf(" Usage: %s (options-see-below)\n",
-              argv[0]);
-       printf(" Listing options:\n");
-       for (i = 0; long_options[i].name != 0; i++) {
-               printf(" --%-15s", long_options[i].name);
-               if (long_options[i].flag != NULL)
-                       printf(" flag (internal value:%d)",
-                              *long_options[i].flag);
-               else
-                       printf("short-option: -%c",
-                              long_options[i].val);
-               printf("\n");
-       }
-       printf("\n");
-}
-
-#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
-static __u64 gettime(void)
-{
-       struct timespec t;
-       int res;
-
-       res = clock_gettime(CLOCK_MONOTONIC, &t);
-       if (res < 0) {
-               fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
-               exit(EXIT_FAILURE);
-       }
-       return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
-}
-
-enum {
-       REDIR_SUCCESS = 0,
-       REDIR_ERROR = 1,
-};
-#define REDIR_RES_MAX 2
-static const char *redir_names[REDIR_RES_MAX] = {
-       [REDIR_SUCCESS] = "Success",
-       [REDIR_ERROR]   = "Error",
-};
-static const char *err2str(int err)
-{
-       if (err < REDIR_RES_MAX)
-               return redir_names[err];
-       return NULL;
-}
-/* enum xdp_action */
-#define XDP_UNKNOWN    XDP_REDIRECT + 1
-#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
-static const char *xdp_action_names[XDP_ACTION_MAX] = {
-       [XDP_ABORTED]   = "XDP_ABORTED",
-       [XDP_DROP]      = "XDP_DROP",
-       [XDP_PASS]      = "XDP_PASS",
-       [XDP_TX]        = "XDP_TX",
-       [XDP_REDIRECT]  = "XDP_REDIRECT",
-       [XDP_UNKNOWN]   = "XDP_UNKNOWN",
-};
-static const char *action2str(int action)
-{
-       if (action < XDP_ACTION_MAX)
-               return xdp_action_names[action];
-       return NULL;
-}
-
-/* Common stats data record shared with _kern.c */
-struct datarec {
-       __u64 processed;
-       __u64 dropped;
-       __u64 info;
-       __u64 err;
-};
-#define MAX_CPUS 64
-
-/* Userspace structs for collection of stats from maps */
-struct record {
-       __u64 timestamp;
-       struct datarec total;
-       struct datarec *cpu;
+       { "help", no_argument, NULL, 'h' },
+       { "stats", no_argument, NULL, 's' },
+       { "interval", required_argument, NULL, 'i' },
+       { "verbose", no_argument, NULL, 'v' },
+       {}
 };
-struct u64rec {
-       __u64 processed;
-};
-struct record_u64 {
-       /* record for _kern side __u64 values */
-       __u64 timestamp;
-       struct u64rec total;
-       struct u64rec *cpu;
-};
-
-struct stats_record {
-       struct record_u64 xdp_redirect[REDIR_RES_MAX];
-       struct record_u64 xdp_exception[XDP_ACTION_MAX];
-       struct record xdp_cpumap_kthread;
-       struct record xdp_cpumap_enqueue[MAX_CPUS];
-       struct record xdp_devmap_xmit;
-};
-
-static bool map_collect_record(int fd, __u32 key, struct record *rec)
-{
-       /* For percpu maps, userspace gets a value per possible CPU */
-       unsigned int nr_cpus = bpf_num_possible_cpus();
-       struct datarec values[nr_cpus];
-       __u64 sum_processed = 0;
-       __u64 sum_dropped = 0;
-       __u64 sum_info = 0;
-       __u64 sum_err = 0;
-       int i;
-
-       if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
-               fprintf(stderr,
-                       "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
-               return false;
-       }
-       /* Get time as close as possible to reading map contents */
-       rec->timestamp = gettime();
-
-       /* Record and sum values from each CPU */
-       for (i = 0; i < nr_cpus; i++) {
-               rec->cpu[i].processed = values[i].processed;
-               sum_processed        += values[i].processed;
-               rec->cpu[i].dropped = values[i].dropped;
-               sum_dropped        += values[i].dropped;
-               rec->cpu[i].info = values[i].info;
-               sum_info        += values[i].info;
-               rec->cpu[i].err = values[i].err;
-               sum_err        += values[i].err;
-       }
-       rec->total.processed = sum_processed;
-       rec->total.dropped   = sum_dropped;
-       rec->total.info      = sum_info;
-       rec->total.err       = sum_err;
-       return true;
-}
-
-static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
-{
-       /* For percpu maps, userspace gets a value per possible CPU */
-       unsigned int nr_cpus = bpf_num_possible_cpus();
-       struct u64rec values[nr_cpus];
-       __u64 sum_total = 0;
-       int i;
-
-       if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
-               fprintf(stderr,
-                       "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
-               return false;
-       }
-       /* Get time as close as possible to reading map contents */
-       rec->timestamp = gettime();
-
-       /* Record and sum values from each CPU */
-       for (i = 0; i < nr_cpus; i++) {
-               rec->cpu[i].processed = values[i].processed;
-               sum_total            += values[i].processed;
-       }
-       rec->total.processed = sum_total;
-       return true;
-}
-
-static double calc_period(struct record *r, struct record *p)
-{
-       double period_ = 0;
-       __u64 period = 0;
-
-       period = r->timestamp - p->timestamp;
-       if (period > 0)
-               period_ = ((double) period / NANOSEC_PER_SEC);
-
-       return period_;
-}
-
-static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
-{
-       double period_ = 0;
-       __u64 period = 0;
-
-       period = r->timestamp - p->timestamp;
-       if (period > 0)
-               period_ = ((double) period / NANOSEC_PER_SEC);
-
-       return period_;
-}
-
-static double calc_pps(struct datarec *r, struct datarec *p, double period)
-{
-       __u64 packets = 0;
-       double pps = 0;
-
-       if (period > 0) {
-               packets = r->processed - p->processed;
-               pps = packets / period;
-       }
-       return pps;
-}
-
-static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
-{
-       __u64 packets = 0;
-       double pps = 0;
-
-       if (period > 0) {
-               packets = r->processed - p->processed;
-               pps = packets / period;
-       }
-       return pps;
-}
-
-static double calc_drop(struct datarec *r, struct datarec *p, double period)
-{
-       __u64 packets = 0;
-       double pps = 0;
-
-       if (period > 0) {
-               packets = r->dropped - p->dropped;
-               pps = packets / period;
-       }
-       return pps;
-}
-
-static double calc_info(struct datarec *r, struct datarec *p, double period)
-{
-       __u64 packets = 0;
-       double pps = 0;
-
-       if (period > 0) {
-               packets = r->info - p->info;
-               pps = packets / period;
-       }
-       return pps;
-}
-
-static double calc_err(struct datarec *r, struct datarec *p, double period)
-{
-       __u64 packets = 0;
-       double pps = 0;
-
-       if (period > 0) {
-               packets = r->err - p->err;
-               pps = packets / period;
-       }
-       return pps;
-}
-
-static void stats_print(struct stats_record *stats_rec,
-                       struct stats_record *stats_prev,
-                       bool err_only)
-{
-       unsigned int nr_cpus = bpf_num_possible_cpus();
-       int rec_i = 0, i, to_cpu;
-       double t = 0, pps = 0;
-
-       /* Header */
-       printf("%-15s %-7s %-12s %-12s %-9s\n",
-              "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
-
-       /* tracepoint: xdp:xdp_redirect_* */
-       if (err_only)
-               rec_i = REDIR_ERROR;
-
-       for (; rec_i < REDIR_RES_MAX; rec_i++) {
-               struct record_u64 *rec, *prev;
-               char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
-               char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
-
-               rec  =  &stats_rec->xdp_redirect[rec_i];
-               prev = &stats_prev->xdp_redirect[rec_i];
-               t = calc_period_u64(rec, prev);
-
-               for (i = 0; i < nr_cpus; i++) {
-                       struct u64rec *r = &rec->cpu[i];
-                       struct u64rec *p = &prev->cpu[i];
-
-                       pps = calc_pps_u64(r, p, t);
-                       if (pps > 0)
-                               printf(fmt1, "XDP_REDIRECT", i,
-                                      rec_i ? 0.0: pps, rec_i ? pps : 0.0,
-                                      err2str(rec_i));
-               }
-               pps = calc_pps_u64(&rec->total, &prev->total, t);
-               printf(fmt2, "XDP_REDIRECT", "total",
-                      rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
-       }
-
-       /* tracepoint: xdp:xdp_exception */
-       for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
-               struct record_u64 *rec, *prev;
-               char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
-               char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
-
-               rec  =  &stats_rec->xdp_exception[rec_i];
-               prev = &stats_prev->xdp_exception[rec_i];
-               t = calc_period_u64(rec, prev);
-
-               for (i = 0; i < nr_cpus; i++) {
-                       struct u64rec *r = &rec->cpu[i];
-                       struct u64rec *p = &prev->cpu[i];
-
-                       pps = calc_pps_u64(r, p, t);
-                       if (pps > 0)
-                               printf(fmt1, "Exception", i,
-                                      0.0, pps, action2str(rec_i));
-               }
-               pps = calc_pps_u64(&rec->total, &prev->total, t);
-               if (pps > 0)
-                       printf(fmt2, "Exception", "total",
-                              0.0, pps, action2str(rec_i));
-       }
-
-       /* cpumap enqueue stats */
-       for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
-               char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
-               char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
-               struct record *rec, *prev;
-               char *info_str = "";
-               double drop, info;
-
-               rec  =  &stats_rec->xdp_cpumap_enqueue[to_cpu];
-               prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
-               t = calc_period(rec, prev);
-               for (i = 0; i < nr_cpus; i++) {
-                       struct datarec *r = &rec->cpu[i];
-                       struct datarec *p = &prev->cpu[i];
-
-                       pps  = calc_pps(r, p, t);
-                       drop = calc_drop(r, p, t);
-                       info = calc_info(r, p, t);
-                       if (info > 0) {
-                               info_str = "bulk-average";
-                               info = pps / info; /* calc average bulk size */
-                       }
-                       if (pps > 0)
-                               printf(fmt1, "cpumap-enqueue",
-                                      i, to_cpu, pps, drop, info, info_str);
-               }
-               pps = calc_pps(&rec->total, &prev->total, t);
-               if (pps > 0) {
-                       drop = calc_drop(&rec->total, &prev->total, t);
-                       info = calc_info(&rec->total, &prev->total, t);
-                       if (info > 0) {
-                               info_str = "bulk-average";
-                               info = pps / info; /* calc average bulk size */
-                       }
-                       printf(fmt2, "cpumap-enqueue",
-                              "sum", to_cpu, pps, drop, info, info_str);
-               }
-       }
-
-       /* cpumap kthread stats */
-       {
-               char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
-               char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
-               struct record *rec, *prev;
-               double drop, info;
-               char *i_str = "";
-
-               rec  =  &stats_rec->xdp_cpumap_kthread;
-               prev = &stats_prev->xdp_cpumap_kthread;
-               t = calc_period(rec, prev);
-               for (i = 0; i < nr_cpus; i++) {
-                       struct datarec *r = &rec->cpu[i];
-                       struct datarec *p = &prev->cpu[i];
-
-                       pps  = calc_pps(r, p, t);
-                       drop = calc_drop(r, p, t);
-                       info = calc_info(r, p, t);
-                       if (info > 0)
-                               i_str = "sched";
-                       if (pps > 0 || drop > 0)
-                               printf(fmt1, "cpumap-kthread",
-                                      i, pps, drop, info, i_str);
-               }
-               pps = calc_pps(&rec->total, &prev->total, t);
-               drop = calc_drop(&rec->total, &prev->total, t);
-               info = calc_info(&rec->total, &prev->total, t);
-               if (info > 0)
-                       i_str = "sched-sum";
-               printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
-       }
-
-       /* devmap ndo_xdp_xmit stats */
-       {
-               char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
-               char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
-               struct record *rec, *prev;
-               double drop, info, err;
-               char *i_str = "";
-               char *err_str = "";
-
-               rec  =  &stats_rec->xdp_devmap_xmit;
-               prev = &stats_prev->xdp_devmap_xmit;
-               t = calc_period(rec, prev);
-               for (i = 0; i < nr_cpus; i++) {
-                       struct datarec *r = &rec->cpu[i];
-                       struct datarec *p = &prev->cpu[i];
-
-                       pps  = calc_pps(r, p, t);
-                       drop = calc_drop(r, p, t);
-                       info = calc_info(r, p, t);
-                       err  = calc_err(r, p, t);
-                       if (info > 0) {
-                               i_str = "bulk-average";
-                               info = (pps+drop) / info; /* calc avg bulk */
-                       }
-                       if (err > 0)
-                               err_str = "drv-err";
-                       if (pps > 0 || drop > 0)
-                               printf(fmt1, "devmap-xmit",
-                                      i, pps, drop, info, i_str, err_str);
-               }
-               pps = calc_pps(&rec->total, &prev->total, t);
-               drop = calc_drop(&rec->total, &prev->total, t);
-               info = calc_info(&rec->total, &prev->total, t);
-               err  = calc_err(&rec->total, &prev->total, t);
-               if (info > 0) {
-                       i_str = "bulk-average";
-                       info = (pps+drop) / info; /* calc avg bulk */
-               }
-               if (err > 0)
-                       err_str = "drv-err";
-               printf(fmt2, "devmap-xmit", "total", pps, drop,
-                      info, i_str, err_str);
-       }
-
-       printf("\n");
-}
-
-static bool stats_collect(struct stats_record *rec)
-{
-       int fd;
-       int i;
-
-       /* TODO: Detect if someone unloaded the perf event_fd's, as
-        * this can happen by someone running perf-record -e
-        */
-
-       fd = bpf_map__fd(map_data[REDIRECT_ERR_CNT]);
-       for (i = 0; i < REDIR_RES_MAX; i++)
-               map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
-
-       fd = bpf_map__fd(map_data[EXCEPTION_CNT]);
-       for (i = 0; i < XDP_ACTION_MAX; i++) {
-               map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
-       }
-
-       fd = bpf_map__fd(map_data[CPUMAP_ENQUEUE_CNT]);
-       for (i = 0; i < MAX_CPUS; i++)
-               map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
-
-       fd = bpf_map__fd(map_data[CPUMAP_KTHREAD_CNT]);
-       map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
-
-       fd = bpf_map__fd(map_data[DEVMAP_XMIT_CNT]);
-       map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
-
-       return true;
-}
-
-static void *alloc_rec_per_cpu(int record_size)
-{
-       unsigned int nr_cpus = bpf_num_possible_cpus();
-       void *array;
-
-       array = calloc(nr_cpus, record_size);
-       if (!array) {
-               fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
-               exit(EXIT_FAIL_MEM);
-       }
-       return array;
-}
-
-static struct stats_record *alloc_stats_record(void)
-{
-       struct stats_record *rec;
-       int rec_sz;
-       int i;
-
-       /* Alloc main stats_record structure */
-       rec = calloc(1, sizeof(*rec));
-       if (!rec) {
-               fprintf(stderr, "Mem alloc error\n");
-               exit(EXIT_FAIL_MEM);
-       }
-
-       /* Alloc stats stored per CPU for each record */
-       rec_sz = sizeof(struct u64rec);
-       for (i = 0; i < REDIR_RES_MAX; i++)
-               rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
-
-       for (i = 0; i < XDP_ACTION_MAX; i++)
-               rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
-
-       rec_sz = sizeof(struct datarec);
-       rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
-       rec->xdp_devmap_xmit.cpu    = alloc_rec_per_cpu(rec_sz);
-
-       for (i = 0; i < MAX_CPUS; i++)
-               rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
-
-       return rec;
-}
-
-static void free_stats_record(struct stats_record *r)
-{
-       int i;
-
-       for (i = 0; i < REDIR_RES_MAX; i++)
-               free(r->xdp_redirect[i].cpu);
-
-       for (i = 0; i < XDP_ACTION_MAX; i++)
-               free(r->xdp_exception[i].cpu);
-
-       free(r->xdp_cpumap_kthread.cpu);
-       free(r->xdp_devmap_xmit.cpu);
-
-       for (i = 0; i < MAX_CPUS; i++)
-               free(r->xdp_cpumap_enqueue[i].cpu);
-
-       free(r);
-}
-
-/* Pointer swap trick */
-static inline void swap(struct stats_record **a, struct stats_record **b)
-{
-       struct stats_record *tmp;
-
-       tmp = *a;
-       *a = *b;
-       *b = tmp;
-}
-
-static void stats_poll(int interval, bool err_only)
-{
-       struct stats_record *rec, *prev;
-
-       rec  = alloc_stats_record();
-       prev = alloc_stats_record();
-       stats_collect(rec);
-
-       if (err_only)
-               printf("\n%s\n", __doc_err_only__);
-
-       /* Trick to pretty printf with thousands separators use %' */
-       setlocale(LC_NUMERIC, "en_US");
-
-       /* Header */
-       if (verbose)
-               printf("\n%s", __doc__);
-
-       /* TODO Need more advanced stats on error types */
-       if (verbose) {
-               printf(" - Stats map0: %s\n", bpf_map__name(map_data[0]));
-               printf(" - Stats map1: %s\n", bpf_map__name(map_data[1]));
-               printf("\n");
-       }
-       fflush(stdout);
-
-       while (1) {
-               swap(&prev, &rec);
-               stats_collect(rec);
-               stats_print(rec, prev, err_only);
-               fflush(stdout);
-               sleep(interval);
-       }
-
-       free_stats_record(rec);
-       free_stats_record(prev);
-}
-
-static void print_bpf_prog_info(void)
-{
-       struct bpf_program *prog;
-       struct bpf_map *map;
-       int i = 0;
-
-       /* Prog info */
-       printf("Loaded BPF prog have %d bpf program(s)\n", tp_cnt);
-       bpf_object__for_each_program(prog, obj) {
-               printf(" - prog_fd[%d] = fd(%d)\n", i, bpf_program__fd(prog));
-               i++;
-       }
-
-       i = 0;
-       /* Maps info */
-       printf("Loaded BPF prog have %d map(s)\n", map_cnt);
-       bpf_object__for_each_map(map, obj) {
-               const char *name = bpf_map__name(map);
-               int fd           = bpf_map__fd(map);
-
-               printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
-               i++;
-       }
-
-       /* Event info */
-       printf("Searching for (max:%d) event file descriptor(s)\n", tp_cnt);
-       for (i = 0; i < tp_cnt; i++) {
-               int fd = bpf_link__fd(tp_links[i]);
-
-               if (fd != -1)
-                       printf(" - event_fd[%d] = fd(%d)\n", i, fd);
-       }
-}
 
 int main(int argc, char **argv)
 {
-       struct bpf_program *prog;
-       int longindex = 0, opt;
-       int ret = EXIT_FAILURE;
-       enum map_type type;
-       char filename[256];
-
-       /* Default settings: */
+       unsigned long interval = 2;
+       int ret = EXIT_FAIL_OPTION;
+       struct xdp_monitor *skel;
        bool errors_only = true;
-       int interval = 2;
+       int longindex = 0, opt;
+       bool error = true;
 
        /* Parse commands line args */
-       while ((opt = getopt_long(argc, argv, "hDSs:",
+       while ((opt = getopt_long(argc, argv, "si:vh",
                                  long_options, &longindex)) != -1) {
                switch (opt) {
-               case 'D':
-                       debug = true;
-                       break;
-               case 'S':
+               case 's':
                        errors_only = false;
+                       mask |= SAMPLE_REDIRECT_CNT;
                        break;
-               case 's':
-                       interval = atoi(optarg);
+               case 'i':
+                       interval = strtoul(optarg, NULL, 0);
+                       break;
+               case 'v':
+                       sample_switch_mode();
                        break;
                case 'h':
+                       error = false;
                default:
-                       usage(argv);
+                       sample_usage(argv, long_options, __doc__, mask, error);
                        return ret;
                }
        }
 
-       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
-       /* Remove tracepoint program when program is interrupted or killed */
-       signal(SIGINT, int_exit);
-       signal(SIGTERM, int_exit);
-
-       obj = bpf_object__open_file(filename, NULL);
-       if (libbpf_get_error(obj)) {
-               printf("ERROR: opening BPF object file failed\n");
-               obj = NULL;
-               goto cleanup;
-       }
-
-       /* load BPF program */
-       if (bpf_object__load(obj)) {
-               printf("ERROR: loading BPF object file failed\n");
-               goto cleanup;
+       skel = xdp_monitor__open();
+       if (!skel) {
+               fprintf(stderr, "Failed to xdp_monitor__open: %s\n",
+                       strerror(errno));
+               ret = EXIT_FAIL_BPF;
+               goto end;
        }
 
-       for (type = 0; type < NUM_MAP; type++) {
-               map_data[type] =
-                       bpf_object__find_map_by_name(obj, map_type_strings[type]);
-
-               if (libbpf_get_error(map_data[type])) {
-                       printf("ERROR: finding a map in obj file failed\n");
-                       goto cleanup;
-               }
-               map_cnt++;
+       ret = sample_init_pre_load(skel);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
        }
 
-       bpf_object__for_each_program(prog, obj) {
-               tp_links[tp_cnt] = bpf_program__attach(prog);
-               if (libbpf_get_error(tp_links[tp_cnt])) {
-                       printf("ERROR: bpf_program__attach failed\n");
-                       tp_links[tp_cnt] = NULL;
-                       goto cleanup;
-               }
-               tp_cnt++;
+       ret = xdp_monitor__load(skel);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to xdp_monitor__load: %s\n", strerror(errno));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
        }
 
-       if (debug) {
-               print_bpf_prog_info();
+       ret = sample_init(skel, mask);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
        }
 
-       /* Unload/stop tracepoint event by closing bpf_link's */
-       if (errors_only) {
-               /* The bpf_link[i] depend on the order of
-                * the functions was defined in _kern.c
-                */
-               bpf_link__destroy(tp_links[2]); /* tracepoint/xdp/xdp_redirect */
-               tp_links[2] = NULL;
+       if (errors_only)
+               printf("%s", __doc_err_only__);
 
-               bpf_link__destroy(tp_links[3]); /* tracepoint/xdp/xdp_redirect_map */
-               tp_links[3] = NULL;
+       ret = sample_run(interval, NULL, NULL);
+       if (ret < 0) {
+               fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+               ret = EXIT_FAIL;
+               goto end_destroy;
        }
-
-       stats_poll(interval, errors_only);
-
-       ret = EXIT_SUCCESS;
-
-cleanup:
-       /* Detach tracepoints */
-       while (tp_cnt)
-               bpf_link__destroy(tp_links[--tp_cnt]);
-
-       bpf_object__close(obj);
-       return ret;
+       ret = EXIT_OK;
+end_destroy:
+       xdp_monitor__destroy(skel);
+end:
+       sample_exit(ret);
 }
diff --git a/samples/bpf/xdp_redirect.bpf.c b/samples/bpf/xdp_redirect.bpf.c
new file mode 100644 (file)
index 0000000..7c02bac
--- /dev/null
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+const volatile int ifindex_out;
+
+SEC("xdp")
+int xdp_redirect_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct ethhdr *eth = data;
+       struct datarec *rec;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       swap_src_dst_mac(data);
+       return bpf_redirect(ifindex_out, 0);
+}
+
+/* Redirect require an XDP bpf_prog loaded on the TX device */
+SEC("xdp")
+int xdp_redirect_dummy_prog(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu.bpf.c b/samples/bpf/xdp_redirect_cpu.bpf.c
new file mode 100644 (file)
index 0000000..f10fe3c
--- /dev/null
@@ -0,0 +1,541 @@
+/*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
+ *
+ *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+ */
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+#include "hash_func01.h"
+
+/* Special map type that can XDP_REDIRECT frames to another CPU */
+struct {
+       __uint(type, BPF_MAP_TYPE_CPUMAP);
+       __uint(key_size, sizeof(u32));
+       __uint(value_size, sizeof(struct bpf_cpumap_val));
+} cpu_map SEC(".maps");
+
+/* Set of maps controlling available CPU, and for iterating through
+ * selectable redirect CPUs.
+ */
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, u32);
+       __type(value, u32);
+} cpus_available SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, u32);
+       __type(value, u32);
+       __uint(max_entries, 1);
+} cpus_count SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+       __type(key, u32);
+       __type(value, u32);
+       __uint(max_entries, 1);
+} cpus_iterator SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 1);
+} tx_port SEC(".maps");
+
+char tx_mac_addr[ETH_ALEN];
+
+/* Helper parse functions */
+
+static __always_inline
+bool parse_eth(struct ethhdr *eth, void *data_end,
+              u16 *eth_proto, u64 *l3_offset)
+{
+       u16 eth_type;
+       u64 offset;
+
+       offset = sizeof(*eth);
+       if ((void *)eth + offset > data_end)
+               return false;
+
+       eth_type = eth->h_proto;
+
+       /* Skip non 802.3 Ethertypes */
+       if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
+               return false;
+
+       /* Handle VLAN tagged packet */
+       if (eth_type == bpf_htons(ETH_P_8021Q) ||
+           eth_type == bpf_htons(ETH_P_8021AD)) {
+               struct vlan_hdr *vlan_hdr;
+
+               vlan_hdr = (void *)eth + offset;
+               offset += sizeof(*vlan_hdr);
+               if ((void *)eth + offset > data_end)
+                       return false;
+               eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+       }
+       /* Handle double VLAN tagged packet */
+       if (eth_type == bpf_htons(ETH_P_8021Q) ||
+           eth_type == bpf_htons(ETH_P_8021AD)) {
+               struct vlan_hdr *vlan_hdr;
+
+               vlan_hdr = (void *)eth + offset;
+               offset += sizeof(*vlan_hdr);
+               if ((void *)eth + offset > data_end)
+                       return false;
+               eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+       }
+
+       *eth_proto = bpf_ntohs(eth_type);
+       *l3_offset = offset;
+       return true;
+}
+
+static __always_inline
+u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       struct iphdr *iph = data + nh_off;
+       struct udphdr *udph;
+       u16 dport;
+
+       if (iph + 1 > data_end)
+               return 0;
+       if (!(iph->protocol == IPPROTO_UDP))
+               return 0;
+
+       udph = (void *)(iph + 1);
+       if (udph + 1 > data_end)
+               return 0;
+
+       dport = bpf_ntohs(udph->dest);
+       return dport;
+}
+
+static __always_inline
+int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       struct iphdr *iph = data + nh_off;
+
+       if (iph + 1 > data_end)
+               return 0;
+       return iph->protocol;
+}
+
+static __always_inline
+int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       struct ipv6hdr *ip6h = data + nh_off;
+
+       if (ip6h + 1 > data_end)
+               return 0;
+       return ip6h->nexthdr;
+}
+
+SEC("xdp")
+int  xdp_prognum0_no_touch(struct xdp_md *ctx)
+{
+       u32 key = bpf_get_smp_processor_id();
+       struct datarec *rec;
+       u32 *cpu_selected;
+       u32 cpu_dest = 0;
+       u32 key0 = 0;
+
+       /* Only use first entry in cpus_available */
+       cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
+       if (!cpu_selected)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_selected;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int  xdp_prognum1_touch_data(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct ethhdr *eth = data;
+       struct datarec *rec;
+       u32 *cpu_selected;
+       u32 cpu_dest = 0;
+       u32 key0 = 0;
+       u16 eth_type;
+
+       /* Only use first entry in cpus_available */
+       cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
+       if (!cpu_selected)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_selected;
+
+       /* Validate packet length is minimum Eth header size */
+       if (eth + 1 > data_end)
+               return XDP_ABORTED;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       /* Read packet data, and use it (drop non 802.3 Ethertypes) */
+       eth_type = eth->h_proto;
+       if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
+               NO_TEAR_INC(rec->dropped);
+               return XDP_DROP;
+       }
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int  xdp_prognum2_round_robin(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct datarec *rec;
+       u32 cpu_dest = 0;
+       u32 key0 = 0;
+
+       u32 *cpu_selected;
+       u32 *cpu_iterator;
+       u32 *cpu_max;
+       u32 cpu_idx;
+
+       cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
+       if (!cpu_max)
+               return XDP_ABORTED;
+
+       cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
+       if (!cpu_iterator)
+               return XDP_ABORTED;
+       cpu_idx = *cpu_iterator;
+
+       *cpu_iterator += 1;
+       if (*cpu_iterator == *cpu_max)
+               *cpu_iterator = 0;
+
+       cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+       if (!cpu_selected)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_selected;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct ethhdr *eth = data;
+       u8 ip_proto = IPPROTO_UDP;
+       struct datarec *rec;
+       u16 eth_proto = 0;
+       u64 l3_offset = 0;
+       u32 cpu_dest = 0;
+       u32 *cpu_lookup;
+       u32 cpu_idx = 0;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+               return XDP_PASS; /* Just skip */
+
+       /* Extract L4 protocol */
+       switch (eth_proto) {
+       case ETH_P_IP:
+               ip_proto = get_proto_ipv4(ctx, l3_offset);
+               break;
+       case ETH_P_IPV6:
+               ip_proto = get_proto_ipv6(ctx, l3_offset);
+               break;
+       case ETH_P_ARP:
+               cpu_idx = 0; /* ARP packet handled on separate CPU */
+               break;
+       default:
+               cpu_idx = 0;
+       }
+
+       /* Choose CPU based on L4 protocol */
+       switch (ip_proto) {
+       case IPPROTO_ICMP:
+       case IPPROTO_ICMPV6:
+               cpu_idx = 2;
+               break;
+       case IPPROTO_TCP:
+               cpu_idx = 0;
+               break;
+       case IPPROTO_UDP:
+               cpu_idx = 1;
+               break;
+       default:
+               cpu_idx = 0;
+       }
+
+       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+       if (!cpu_lookup)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_lookup;
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct ethhdr *eth = data;
+       u8 ip_proto = IPPROTO_UDP;
+       struct datarec *rec;
+       u16 eth_proto = 0;
+       u64 l3_offset = 0;
+       u32 cpu_dest = 0;
+       u32 *cpu_lookup;
+       u32 cpu_idx = 0;
+       u16 dest_port;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+               return XDP_PASS; /* Just skip */
+
+       /* Extract L4 protocol */
+       switch (eth_proto) {
+       case ETH_P_IP:
+               ip_proto = get_proto_ipv4(ctx, l3_offset);
+               break;
+       case ETH_P_IPV6:
+               ip_proto = get_proto_ipv6(ctx, l3_offset);
+               break;
+       case ETH_P_ARP:
+               cpu_idx = 0; /* ARP packet handled on separate CPU */
+               break;
+       default:
+               cpu_idx = 0;
+       }
+
+       /* Choose CPU based on L4 protocol */
+       switch (ip_proto) {
+       case IPPROTO_ICMP:
+       case IPPROTO_ICMPV6:
+               cpu_idx = 2;
+               break;
+       case IPPROTO_TCP:
+               cpu_idx = 0;
+               break;
+       case IPPROTO_UDP:
+               cpu_idx = 1;
+               /* DDoS filter UDP port 9 (pktgen) */
+               dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
+               if (dest_port == 9) {
+                       NO_TEAR_INC(rec->dropped);
+                       return XDP_DROP;
+               }
+               break;
+       default:
+               cpu_idx = 0;
+       }
+
+       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+       if (!cpu_lookup)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_lookup;
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+/* Hashing initval */
+#define INITVAL 15485863
+
+static __always_inline
+u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       struct iphdr *iph = data + nh_off;
+       u32 cpu_hash;
+
+       if (iph + 1 > data_end)
+               return 0;
+
+       cpu_hash = iph->saddr + iph->daddr;
+       cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
+
+       return cpu_hash;
+}
+
+static __always_inline
+u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       struct ipv6hdr *ip6h = data + nh_off;
+       u32 cpu_hash;
+
+       if (ip6h + 1 > data_end)
+               return 0;
+
+       cpu_hash  = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
+       cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
+       cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
+       cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
+       cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
+
+       return cpu_hash;
+}
+
+/* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
+ * hashing scheme is symmetric, meaning swapping IP src/dest still hit
+ * same CPU.
+ */
+SEC("xdp")
+int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct ethhdr *eth = data;
+       struct datarec *rec;
+       u16 eth_proto = 0;
+       u64 l3_offset = 0;
+       u32 cpu_dest = 0;
+       u32 cpu_idx = 0;
+       u32 *cpu_lookup;
+       u32 key0 = 0;
+       u32 *cpu_max;
+       u32 cpu_hash;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
+       if (!cpu_max)
+               return XDP_ABORTED;
+
+       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+               return XDP_PASS; /* Just skip */
+
+       /* Hash for IPv4 and IPv6 */
+       switch (eth_proto) {
+       case ETH_P_IP:
+               cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
+               break;
+       case ETH_P_IPV6:
+               cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
+               break;
+       case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
+       default:
+               cpu_hash = 0;
+       }
+
+       /* Choose CPU based on hash */
+       cpu_idx = cpu_hash % *cpu_max;
+
+       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+       if (!cpu_lookup)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_lookup;
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp_cpumap/redirect")
+int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       struct ethhdr *eth = data;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       swap_src_dst_mac(data);
+       return bpf_redirect_map(&tx_port, 0, 0);
+}
+
+SEC("xdp_cpumap/pass")
+int xdp_redirect_cpu_pass(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+SEC("xdp_cpumap/drop")
+int xdp_redirect_cpu_drop(struct xdp_md *ctx)
+{
+       return XDP_DROP;
+}
+
+SEC("xdp_devmap/egress")
+int xdp_redirect_egress_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       struct ethhdr *eth = data;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
deleted file mode 100644 (file)
index 8255025..0000000
+++ /dev/null
@@ -1,730 +0,0 @@
-/*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
- *
- *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
- */
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/if_vlan.h>
-#include <uapi/linux/ip.h>
-#include <uapi/linux/ipv6.h>
-#include <uapi/linux/in.h>
-#include <uapi/linux/tcp.h>
-#include <uapi/linux/udp.h>
-
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "hash_func01.h"
-
-#define MAX_CPUS NR_CPUS
-
-/* Special map type that can XDP_REDIRECT frames to another CPU */
-struct {
-       __uint(type, BPF_MAP_TYPE_CPUMAP);
-       __uint(key_size, sizeof(u32));
-       __uint(value_size, sizeof(struct bpf_cpumap_val));
-       __uint(max_entries, MAX_CPUS);
-} cpu_map SEC(".maps");
-
-/* Common stats data record to keep userspace more simple */
-struct datarec {
-       __u64 processed;
-       __u64 dropped;
-       __u64 issue;
-       __u64 xdp_pass;
-       __u64 xdp_drop;
-       __u64 xdp_redirect;
-};
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback.  Redirect TX errors can be caught via a tracepoint.
- */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, 1);
-} rx_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, 2);
-       /* TODO: have entries for all possible errno's */
-} redirect_err_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, MAX_CPUS);
-} cpumap_enqueue_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, 1);
-} cpumap_kthread_cnt SEC(".maps");
-
-/* Set of maps controlling available CPU, and for iterating through
- * selectable redirect CPUs.
- */
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __type(key, u32);
-       __type(value, u32);
-       __uint(max_entries, MAX_CPUS);
-} cpus_available SEC(".maps");
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __type(key, u32);
-       __type(value, u32);
-       __uint(max_entries, 1);
-} cpus_count SEC(".maps");
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, u32);
-       __uint(max_entries, 1);
-} cpus_iterator SEC(".maps");
-
-/* Used by trace point */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, 1);
-} exception_cnt SEC(".maps");
-
-/* Helper parse functions */
-
-/* Parse Ethernet layer 2, extract network layer 3 offset and protocol
- *
- * Returns false on error and non-supported ether-type
- */
-struct vlan_hdr {
-       __be16 h_vlan_TCI;
-       __be16 h_vlan_encapsulated_proto;
-};
-
-static __always_inline
-bool parse_eth(struct ethhdr *eth, void *data_end,
-              u16 *eth_proto, u64 *l3_offset)
-{
-       u16 eth_type;
-       u64 offset;
-
-       offset = sizeof(*eth);
-       if ((void *)eth + offset > data_end)
-               return false;
-
-       eth_type = eth->h_proto;
-
-       /* Skip non 802.3 Ethertypes */
-       if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
-               return false;
-
-       /* Handle VLAN tagged packet */
-       if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
-               struct vlan_hdr *vlan_hdr;
-
-               vlan_hdr = (void *)eth + offset;
-               offset += sizeof(*vlan_hdr);
-               if ((void *)eth + offset > data_end)
-                       return false;
-               eth_type = vlan_hdr->h_vlan_encapsulated_proto;
-       }
-       /* Handle double VLAN tagged packet */
-       if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
-               struct vlan_hdr *vlan_hdr;
-
-               vlan_hdr = (void *)eth + offset;
-               offset += sizeof(*vlan_hdr);
-               if ((void *)eth + offset > data_end)
-                       return false;
-               eth_type = vlan_hdr->h_vlan_encapsulated_proto;
-       }
-
-       *eth_proto = ntohs(eth_type);
-       *l3_offset = offset;
-       return true;
-}
-
-static __always_inline
-u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct iphdr *iph = data + nh_off;
-       struct udphdr *udph;
-       u16 dport;
-
-       if (iph + 1 > data_end)
-               return 0;
-       if (!(iph->protocol == IPPROTO_UDP))
-               return 0;
-
-       udph = (void *)(iph + 1);
-       if (udph + 1 > data_end)
-               return 0;
-
-       dport = ntohs(udph->dest);
-       return dport;
-}
-
-static __always_inline
-int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct iphdr *iph = data + nh_off;
-
-       if (iph + 1 > data_end)
-               return 0;
-       return iph->protocol;
-}
-
-static __always_inline
-int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ipv6hdr *ip6h = data + nh_off;
-
-       if (ip6h + 1 > data_end)
-               return 0;
-       return ip6h->nexthdr;
-}
-
-SEC("xdp_cpu_map0")
-int  xdp_prognum0_no_touch(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct datarec *rec;
-       u32 *cpu_selected;
-       u32 cpu_dest;
-       u32 key = 0;
-
-       /* Only use first entry in cpus_available */
-       cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
-       if (!cpu_selected)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_selected;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp_cpu_map1_touch_data")
-int  xdp_prognum1_touch_data(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       struct datarec *rec;
-       u32 *cpu_selected;
-       u32 cpu_dest;
-       u16 eth_type;
-       u32 key = 0;
-
-       /* Only use first entry in cpus_available */
-       cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
-       if (!cpu_selected)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_selected;
-
-       /* Validate packet length is minimum Eth header size */
-       if (eth + 1 > data_end)
-               return XDP_ABORTED;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       /* Read packet data, and use it (drop non 802.3 Ethertypes) */
-       eth_type = eth->h_proto;
-       if (ntohs(eth_type) < ETH_P_802_3_MIN) {
-               rec->dropped++;
-               return XDP_DROP;
-       }
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp_cpu_map2_round_robin")
-int  xdp_prognum2_round_robin(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       struct datarec *rec;
-       u32 cpu_dest;
-       u32 *cpu_lookup;
-       u32 key0 = 0;
-
-       u32 *cpu_selected;
-       u32 *cpu_iterator;
-       u32 *cpu_max;
-       u32 cpu_idx;
-
-       cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
-       if (!cpu_max)
-               return XDP_ABORTED;
-
-       cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
-       if (!cpu_iterator)
-               return XDP_ABORTED;
-       cpu_idx = *cpu_iterator;
-
-       *cpu_iterator += 1;
-       if (*cpu_iterator == *cpu_max)
-               *cpu_iterator = 0;
-
-       cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
-       if (!cpu_selected)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_selected;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key0);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp_cpu_map3_proto_separate")
-int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       u8 ip_proto = IPPROTO_UDP;
-       struct datarec *rec;
-       u16 eth_proto = 0;
-       u64 l3_offset = 0;
-       u32 cpu_dest = 0;
-       u32 cpu_idx = 0;
-       u32 *cpu_lookup;
-       u32 key = 0;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
-               return XDP_PASS; /* Just skip */
-
-       /* Extract L4 protocol */
-       switch (eth_proto) {
-       case ETH_P_IP:
-               ip_proto = get_proto_ipv4(ctx, l3_offset);
-               break;
-       case ETH_P_IPV6:
-               ip_proto = get_proto_ipv6(ctx, l3_offset);
-               break;
-       case ETH_P_ARP:
-               cpu_idx = 0; /* ARP packet handled on separate CPU */
-               break;
-       default:
-               cpu_idx = 0;
-       }
-
-       /* Choose CPU based on L4 protocol */
-       switch (ip_proto) {
-       case IPPROTO_ICMP:
-       case IPPROTO_ICMPV6:
-               cpu_idx = 2;
-               break;
-       case IPPROTO_TCP:
-               cpu_idx = 0;
-               break;
-       case IPPROTO_UDP:
-               cpu_idx = 1;
-               break;
-       default:
-               cpu_idx = 0;
-       }
-
-       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
-       if (!cpu_lookup)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_lookup;
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp_cpu_map4_ddos_filter_pktgen")
-int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       u8 ip_proto = IPPROTO_UDP;
-       struct datarec *rec;
-       u16 eth_proto = 0;
-       u64 l3_offset = 0;
-       u32 cpu_dest = 0;
-       u32 cpu_idx = 0;
-       u16 dest_port;
-       u32 *cpu_lookup;
-       u32 key = 0;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
-               return XDP_PASS; /* Just skip */
-
-       /* Extract L4 protocol */
-       switch (eth_proto) {
-       case ETH_P_IP:
-               ip_proto = get_proto_ipv4(ctx, l3_offset);
-               break;
-       case ETH_P_IPV6:
-               ip_proto = get_proto_ipv6(ctx, l3_offset);
-               break;
-       case ETH_P_ARP:
-               cpu_idx = 0; /* ARP packet handled on separate CPU */
-               break;
-       default:
-               cpu_idx = 0;
-       }
-
-       /* Choose CPU based on L4 protocol */
-       switch (ip_proto) {
-       case IPPROTO_ICMP:
-       case IPPROTO_ICMPV6:
-               cpu_idx = 2;
-               break;
-       case IPPROTO_TCP:
-               cpu_idx = 0;
-               break;
-       case IPPROTO_UDP:
-               cpu_idx = 1;
-               /* DDoS filter UDP port 9 (pktgen) */
-               dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
-               if (dest_port == 9) {
-                       if (rec)
-                               rec->dropped++;
-                       return XDP_DROP;
-               }
-               break;
-       default:
-               cpu_idx = 0;
-       }
-
-       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
-       if (!cpu_lookup)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_lookup;
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-/* Hashing initval */
-#define INITVAL 15485863
-
-static __always_inline
-u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct iphdr *iph = data + nh_off;
-       u32 cpu_hash;
-
-       if (iph + 1 > data_end)
-               return 0;
-
-       cpu_hash = iph->saddr + iph->daddr;
-       cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
-
-       return cpu_hash;
-}
-
-static __always_inline
-u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ipv6hdr *ip6h = data + nh_off;
-       u32 cpu_hash;
-
-       if (ip6h + 1 > data_end)
-               return 0;
-
-       cpu_hash  = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
-       cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
-       cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
-       cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
-       cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
-
-       return cpu_hash;
-}
-
-/* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
- * hashing scheme is symmetric, meaning swapping IP src/dest still hit
- * same CPU.
- */
-SEC("xdp_cpu_map5_lb_hash_ip_pairs")
-int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       u8 ip_proto = IPPROTO_UDP;
-       struct datarec *rec;
-       u16 eth_proto = 0;
-       u64 l3_offset = 0;
-       u32 cpu_dest = 0;
-       u32 cpu_idx = 0;
-       u32 *cpu_lookup;
-       u32 *cpu_max;
-       u32 cpu_hash;
-       u32 key = 0;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
-       if (!cpu_max)
-               return XDP_ABORTED;
-
-       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
-               return XDP_PASS; /* Just skip */
-
-       /* Hash for IPv4 and IPv6 */
-       switch (eth_proto) {
-       case ETH_P_IP:
-               cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
-               break;
-       case ETH_P_IPV6:
-               cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
-               break;
-       case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
-       default:
-               cpu_hash = 0;
-       }
-
-       /* Choose CPU based on hash */
-       cpu_idx = cpu_hash % *cpu_max;
-
-       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
-       if (!cpu_lookup)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_lookup;
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-char _license[] SEC("license") = "GPL";
-
-/*** Trace point code ***/
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
- * Code in:                kernel/include/trace/events/xdp.h
- */
-struct xdp_redirect_ctx {
-       u64 __pad;      // First 8 bytes are not accessible by bpf code
-       int prog_id;    //      offset:8;  size:4; signed:1;
-       u32 act;        //      offset:12  size:4; signed:0;
-       int ifindex;    //      offset:16  size:4; signed:1;
-       int err;        //      offset:20  size:4; signed:1;
-       int to_ifindex; //      offset:24  size:4; signed:1;
-       u32 map_id;     //      offset:28  size:4; signed:0;
-       int map_index;  //      offset:32  size:4; signed:1;
-};                     //      offset:36
-
-enum {
-       XDP_REDIRECT_SUCCESS = 0,
-       XDP_REDIRECT_ERROR = 1
-};
-
-static __always_inline
-int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
-{
-       u32 key = XDP_REDIRECT_ERROR;
-       struct datarec *rec;
-       int err = ctx->err;
-
-       if (!err)
-               key = XDP_REDIRECT_SUCCESS;
-
-       rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
-       if (!rec)
-               return 0;
-       rec->dropped += 1;
-
-       return 0; /* Indicate event was filtered (no further processing)*/
-       /*
-        * Returning 1 here would allow e.g. a perf-record tracepoint
-        * to see and record these events, but it doesn't work well
-        * in-practice as stopping perf-record also unload this
-        * bpf_prog.  Plus, there is additional overhead of doing so.
-        */
-}
-
-SEC("tracepoint/xdp/xdp_redirect_err")
-int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
-{
-       return xdp_redirect_collect_stat(ctx);
-}
-
-SEC("tracepoint/xdp/xdp_redirect_map_err")
-int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
-{
-       return xdp_redirect_collect_stat(ctx);
-}
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
- * Code in:                kernel/include/trace/events/xdp.h
- */
-struct xdp_exception_ctx {
-       u64 __pad;      // First 8 bytes are not accessible by bpf code
-       int prog_id;    //      offset:8;  size:4; signed:1;
-       u32 act;        //      offset:12; size:4; signed:0;
-       int ifindex;    //      offset:16; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_exception")
-int trace_xdp_exception(struct xdp_exception_ctx *ctx)
-{
-       struct datarec *rec;
-       u32 key = 0;
-
-       rec = bpf_map_lookup_elem(&exception_cnt, &key);
-       if (!rec)
-               return 1;
-       rec->dropped += 1;
-
-       return 0;
-}
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
- * Code in:         kernel/include/trace/events/xdp.h
- */
-struct cpumap_enqueue_ctx {
-       u64 __pad;              // First 8 bytes are not accessible by bpf code
-       int map_id;             //      offset:8;  size:4; signed:1;
-       u32 act;                //      offset:12; size:4; signed:0;
-       int cpu;                //      offset:16; size:4; signed:1;
-       unsigned int drops;     //      offset:20; size:4; signed:0;
-       unsigned int processed; //      offset:24; size:4; signed:0;
-       int to_cpu;             //      offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_enqueue")
-int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
-{
-       u32 to_cpu = ctx->to_cpu;
-       struct datarec *rec;
-
-       if (to_cpu >= MAX_CPUS)
-               return 1;
-
-       rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
-       if (!rec)
-               return 0;
-       rec->processed += ctx->processed;
-       rec->dropped   += ctx->drops;
-
-       /* Record bulk events, then userspace can calc average bulk size */
-       if (ctx->processed > 0)
-               rec->issue += 1;
-
-       /* Inception: It's possible to detect overload situations, via
-        * this tracepoint.  This can be used for creating a feedback
-        * loop to XDP, which can take appropriate actions to mitigate
-        * this overload situation.
-        */
-       return 0;
-}
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
- * Code in:         kernel/include/trace/events/xdp.h
- */
-struct cpumap_kthread_ctx {
-       u64 __pad;                      // First 8 bytes are not accessible
-       int map_id;                     //      offset:8;  size:4; signed:1;
-       u32 act;                        //      offset:12; size:4; signed:0;
-       int cpu;                        //      offset:16; size:4; signed:1;
-       unsigned int drops;             //      offset:20; size:4; signed:0;
-       unsigned int processed;         //      offset:24; size:4; signed:0;
-       int sched;                      //      offset:28; size:4; signed:1;
-       unsigned int xdp_pass;          //      offset:32; size:4; signed:0;
-       unsigned int xdp_drop;          //      offset:36; size:4; signed:0;
-       unsigned int xdp_redirect;      //      offset:40; size:4; signed:0;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_kthread")
-int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
-{
-       struct datarec *rec;
-       u32 key = 0;
-
-       rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
-       if (!rec)
-               return 0;
-       rec->processed += ctx->processed;
-       rec->dropped   += ctx->drops;
-       rec->xdp_pass  += ctx->xdp_pass;
-       rec->xdp_drop  += ctx->xdp_drop;
-       rec->xdp_redirect  += ctx->xdp_redirect;
-
-       /* Count times kthread yielded CPU via schedule call */
-       if (ctx->sched)
-               rec->issue++;
-
-       return 0;
-}
index 5764116..6e25fba 100644 (file)
@@ -2,7 +2,16 @@
 /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
  */
 static const char *__doc__ =
-       " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
+"XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n"
+"Usage: xdp_redirect_cpu -d <IFINDEX|IFNAME> -c 0 ... -c N\n"
+"Valid specification for CPUMAP BPF program:\n"
+"  --mprog-name/-e pass (use built-in XDP_PASS program)\n"
+"  --mprog-name/-e drop (use built-in XDP_DROP program)\n"
+"  --redirect-device/-r <ifindex|ifname> (use built-in DEVMAP redirect program)\n"
+"  Custom CPUMAP BPF program:\n"
+"    --mprog-filename/-f <filename> --mprog-name/-e <program>\n"
+"    Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n"
+"    to configure DEVMAP in BPF object <filename>\n";
 
 #include <errno.h>
 #include <signal.h>
@@ -18,558 +27,62 @@ static const char *__doc__ =
 #include <net/if.h>
 #include <time.h>
 #include <linux/limits.h>
-
 #include <arpa/inet.h>
 #include <linux/if_link.h>
-
-/* How many xdp_progs are defined in _kern.c */
-#define MAX_PROG 6
-
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
-
 #include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_cpu.skel.h"
 
-static int ifindex = -1;
-static char ifname_buf[IF_NAMESIZE];
-static char *ifname;
-static __u32 prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int n_cpus;
-
-enum map_type {
-       CPU_MAP,
-       RX_CNT,
-       REDIRECT_ERR_CNT,
-       CPUMAP_ENQUEUE_CNT,
-       CPUMAP_KTHREAD_CNT,
-       CPUS_AVAILABLE,
-       CPUS_COUNT,
-       CPUS_ITERATOR,
-       EXCEPTION_CNT,
-};
-
-static const char *const map_type_strings[] = {
-       [CPU_MAP] = "cpu_map",
-       [RX_CNT] = "rx_cnt",
-       [REDIRECT_ERR_CNT] = "redirect_err_cnt",
-       [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
-       [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
-       [CPUS_AVAILABLE] = "cpus_available",
-       [CPUS_COUNT] = "cpus_count",
-       [CPUS_ITERATOR] = "cpus_iterator",
-       [EXCEPTION_CNT] = "exception_cnt",
-};
+static int map_fd;
+static int avail_fd;
+static int count_fd;
 
-#define NUM_TP 5
-#define NUM_MAP 9
-struct bpf_link *tp_links[NUM_TP] = {};
-static int map_fds[NUM_MAP];
-static int tp_cnt = 0;
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+                 SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT |
+                 SAMPLE_EXCEPTION_CNT;
 
-/* Exit return codes */
-#define EXIT_OK                0
-#define EXIT_FAIL              1
-#define EXIT_FAIL_OPTION       2
-#define EXIT_FAIL_XDP          3
-#define EXIT_FAIL_BPF          4
-#define EXIT_FAIL_MEM          5
+DEFINE_SAMPLE_INIT(xdp_redirect_cpu);
 
 static const struct option long_options[] = {
-       {"help",        no_argument,            NULL, 'h' },
-       {"dev",         required_argument,      NULL, 'd' },
-       {"skb-mode",    no_argument,            NULL, 'S' },
-       {"sec",         required_argument,      NULL, 's' },
-       {"progname",    required_argument,      NULL, 'p' },
-       {"qsize",       required_argument,      NULL, 'q' },
-       {"cpu",         required_argument,      NULL, 'c' },
-       {"stress-mode", no_argument,            NULL, 'x' },
-       {"no-separators", no_argument,          NULL, 'z' },
-       {"force",       no_argument,            NULL, 'F' },
-       {"mprog-disable", no_argument,          NULL, 'n' },
-       {"mprog-name",  required_argument,      NULL, 'e' },
-       {"mprog-filename", required_argument,   NULL, 'f' },
-       {"redirect-device", required_argument,  NULL, 'r' },
-       {"redirect-map", required_argument,     NULL, 'm' },
-       {0, 0, NULL,  0 }
+       { "help", no_argument, NULL, 'h' },
+       { "dev", required_argument, NULL, 'd' },
+       { "skb-mode", no_argument, NULL, 'S' },
+       { "progname", required_argument, NULL, 'p' },
+       { "qsize", required_argument, NULL, 'q' },
+       { "cpu", required_argument, NULL, 'c' },
+       { "stress-mode", no_argument, NULL, 'x' },
+       { "force", no_argument, NULL, 'F' },
+       { "interval", required_argument, NULL, 'i' },
+       { "verbose", no_argument, NULL, 'v' },
+       { "stats", no_argument, NULL, 's' },
+       { "mprog-name", required_argument, NULL, 'e' },
+       { "mprog-filename", required_argument, NULL, 'f' },
+       { "redirect-device", required_argument, NULL, 'r' },
+       { "redirect-map", required_argument, NULL, 'm' },
+       {}
 };
 
-static void int_exit(int sig)
-{
-       __u32 curr_prog_id = 0;
-
-       if (ifindex > -1) {
-               if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
-                       exit(EXIT_FAIL);
-               }
-               if (prog_id == curr_prog_id) {
-                       fprintf(stderr,
-                               "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
-                               ifindex, ifname);
-                       bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
-               } else if (!curr_prog_id) {
-                       printf("couldn't find a prog id on a given iface\n");
-               } else {
-                       printf("program on interface changed, not removing\n");
-               }
-       }
-       /* Detach tracepoints */
-       while (tp_cnt)
-               bpf_link__destroy(tp_links[--tp_cnt]);
-
-       exit(EXIT_OK);
-}
-
 static void print_avail_progs(struct bpf_object *obj)
 {
        struct bpf_program *pos;
 
+       printf(" Programs to be used for -p/--progname:\n");
        bpf_object__for_each_program(pos, obj) {
-               if (bpf_program__is_xdp(pos))
-                       printf(" %s\n", bpf_program__section_name(pos));
-       }
-}
-
-static void usage(char *argv[], struct bpf_object *obj)
-{
-       int i;
-
-       printf("\nDOCUMENTATION:\n%s\n", __doc__);
-       printf("\n");
-       printf(" Usage: %s (options-see-below)\n", argv[0]);
-       printf(" Listing options:\n");
-       for (i = 0; long_options[i].name != 0; i++) {
-               printf(" --%-12s", long_options[i].name);
-               if (long_options[i].flag != NULL)
-                       printf(" flag (internal value:%d)",
-                               *long_options[i].flag);
-               else
-                       printf(" short-option: -%c",
-                               long_options[i].val);
-               printf("\n");
-       }
-       printf("\n Programs to be used for --progname:\n");
-       print_avail_progs(obj);
-       printf("\n");
-}
-
-/* gettime returns the current time of day in nanoseconds.
- * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC)
- *       clock_gettime (ns) =>  9ns (CLOCK_MONOTONIC_COARSE)
- */
-#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
-static __u64 gettime(void)
-{
-       struct timespec t;
-       int res;
-
-       res = clock_gettime(CLOCK_MONOTONIC, &t);
-       if (res < 0) {
-               fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
-               exit(EXIT_FAIL);
-       }
-       return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
-}
-
-/* Common stats data record shared with _kern.c */
-struct datarec {
-       __u64 processed;
-       __u64 dropped;
-       __u64 issue;
-       __u64 xdp_pass;
-       __u64 xdp_drop;
-       __u64 xdp_redirect;
-};
-struct record {
-       __u64 timestamp;
-       struct datarec total;
-       struct datarec *cpu;
-};
-struct stats_record {
-       struct record rx_cnt;
-       struct record redir_err;
-       struct record kthread;
-       struct record exception;
-       struct record enq[];
-};
-
-static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
-{
-       /* For percpu maps, userspace gets a value per possible CPU */
-       unsigned int nr_cpus = bpf_num_possible_cpus();
-       struct datarec values[nr_cpus];
-       __u64 sum_xdp_redirect = 0;
-       __u64 sum_xdp_pass = 0;
-       __u64 sum_xdp_drop = 0;
-       __u64 sum_processed = 0;
-       __u64 sum_dropped = 0;
-       __u64 sum_issue = 0;
-       int i;
-
-       if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
-               fprintf(stderr,
-                       "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
-               return false;
-       }
-       /* Get time as close as possible to reading map contents */
-       rec->timestamp = gettime();
-
-       /* Record and sum values from each CPU */
-       for (i = 0; i < nr_cpus; i++) {
-               rec->cpu[i].processed = values[i].processed;
-               sum_processed        += values[i].processed;
-               rec->cpu[i].dropped = values[i].dropped;
-               sum_dropped        += values[i].dropped;
-               rec->cpu[i].issue = values[i].issue;
-               sum_issue        += values[i].issue;
-               rec->cpu[i].xdp_pass = values[i].xdp_pass;
-               sum_xdp_pass += values[i].xdp_pass;
-               rec->cpu[i].xdp_drop = values[i].xdp_drop;
-               sum_xdp_drop += values[i].xdp_drop;
-               rec->cpu[i].xdp_redirect = values[i].xdp_redirect;
-               sum_xdp_redirect += values[i].xdp_redirect;
-       }
-       rec->total.processed = sum_processed;
-       rec->total.dropped   = sum_dropped;
-       rec->total.issue     = sum_issue;
-       rec->total.xdp_pass  = sum_xdp_pass;
-       rec->total.xdp_drop  = sum_xdp_drop;
-       rec->total.xdp_redirect = sum_xdp_redirect;
-       return true;
-}
-
-static struct datarec *alloc_record_per_cpu(void)
-{
-       unsigned int nr_cpus = bpf_num_possible_cpus();
-       struct datarec *array;
-
-       array = calloc(nr_cpus, sizeof(struct datarec));
-       if (!array) {
-               fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
-               exit(EXIT_FAIL_MEM);
-       }
-       return array;
-}
-
-static struct stats_record *alloc_stats_record(void)
-{
-       struct stats_record *rec;
-       int i, size;
-
-       size = sizeof(*rec) + n_cpus * sizeof(struct record);
-       rec = malloc(size);
-       if (!rec) {
-               fprintf(stderr, "Mem alloc error\n");
-               exit(EXIT_FAIL_MEM);
-       }
-       memset(rec, 0, size);
-       rec->rx_cnt.cpu    = alloc_record_per_cpu();
-       rec->redir_err.cpu = alloc_record_per_cpu();
-       rec->kthread.cpu   = alloc_record_per_cpu();
-       rec->exception.cpu = alloc_record_per_cpu();
-       for (i = 0; i < n_cpus; i++)
-               rec->enq[i].cpu = alloc_record_per_cpu();
-
-       return rec;
-}
-
-static void free_stats_record(struct stats_record *r)
-{
-       int i;
-
-       for (i = 0; i < n_cpus; i++)
-               free(r->enq[i].cpu);
-       free(r->exception.cpu);
-       free(r->kthread.cpu);
-       free(r->redir_err.cpu);
-       free(r->rx_cnt.cpu);
-       free(r);
-}
-
-static double calc_period(struct record *r, struct record *p)
-{
-       double period_ = 0;
-       __u64 period = 0;
-
-       period = r->timestamp - p->timestamp;
-       if (period > 0)
-               period_ = ((double) period / NANOSEC_PER_SEC);
-
-       return period_;
-}
-
-static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
-{
-       __u64 packets = 0;
-       __u64 pps = 0;
-
-       if (period_ > 0) {
-               packets = r->processed - p->processed;
-               pps = packets / period_;
-       }
-       return pps;
-}
-
-static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
-{
-       __u64 packets = 0;
-       __u64 pps = 0;
-
-       if (period_ > 0) {
-               packets = r->dropped - p->dropped;
-               pps = packets / period_;
-       }
-       return pps;
-}
-
-static __u64 calc_errs_pps(struct datarec *r,
-                           struct datarec *p, double period_)
-{
-       __u64 packets = 0;
-       __u64 pps = 0;
-
-       if (period_ > 0) {
-               packets = r->issue - p->issue;
-               pps = packets / period_;
-       }
-       return pps;
-}
-
-static void calc_xdp_pps(struct datarec *r, struct datarec *p,
-                        double *xdp_pass, double *xdp_drop,
-                        double *xdp_redirect, double period_)
-{
-       *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
-       if (period_ > 0) {
-               *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
-               *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
-               *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
-       }
-}
-
-static void stats_print(struct stats_record *stats_rec,
-                       struct stats_record *stats_prev,
-                       char *prog_name, char *mprog_name, int mprog_fd)
-{
-       unsigned int nr_cpus = bpf_num_possible_cpus();
-       double pps = 0, drop = 0, err = 0;
-       bool mprog_enabled = false;
-       struct record *rec, *prev;
-       int to_cpu;
-       double t;
-       int i;
-
-       if (mprog_fd > 0)
-               mprog_enabled = true;
-
-       /* Header */
-       printf("Running XDP/eBPF prog_name:%s\n", prog_name);
-       printf("%-15s %-7s %-14s %-11s %-9s\n",
-              "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
-
-       /* XDP rx_cnt */
-       {
-               char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
-               char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n";
-               char *errstr = "";
-
-               rec  = &stats_rec->rx_cnt;
-               prev = &stats_prev->rx_cnt;
-               t = calc_period(rec, prev);
-               for (i = 0; i < nr_cpus; i++) {
-                       struct datarec *r = &rec->cpu[i];
-                       struct datarec *p = &prev->cpu[i];
-
-                       pps = calc_pps(r, p, t);
-                       drop = calc_drop_pps(r, p, t);
-                       err  = calc_errs_pps(r, p, t);
-                       if (err > 0)
-                               errstr = "cpu-dest/err";
-                       if (pps > 0)
-                               printf(fmt_rx, "XDP-RX",
-                                       i, pps, drop, err, errstr);
-               }
-               pps  = calc_pps(&rec->total, &prev->total, t);
-               drop = calc_drop_pps(&rec->total, &prev->total, t);
-               err  = calc_errs_pps(&rec->total, &prev->total, t);
-               printf(fm2_rx, "XDP-RX", "total", pps, drop);
-       }
-
-       /* cpumap enqueue stats */
-       for (to_cpu = 0; to_cpu < n_cpus; to_cpu++) {
-               char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
-               char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
-               char *errstr = "";
-
-               rec  =  &stats_rec->enq[to_cpu];
-               prev = &stats_prev->enq[to_cpu];
-               t = calc_period(rec, prev);
-               for (i = 0; i < nr_cpus; i++) {
-                       struct datarec *r = &rec->cpu[i];
-                       struct datarec *p = &prev->cpu[i];
-
-                       pps  = calc_pps(r, p, t);
-                       drop = calc_drop_pps(r, p, t);
-                       err  = calc_errs_pps(r, p, t);
-                       if (err > 0) {
-                               errstr = "bulk-average";
-                               err = pps / err; /* calc average bulk size */
-                       }
-                       if (pps > 0)
-                               printf(fmt, "cpumap-enqueue",
-                                      i, to_cpu, pps, drop, err, errstr);
-               }
-               pps = calc_pps(&rec->total, &prev->total, t);
-               if (pps > 0) {
-                       drop = calc_drop_pps(&rec->total, &prev->total, t);
-                       err  = calc_errs_pps(&rec->total, &prev->total, t);
-                       if (err > 0) {
-                               errstr = "bulk-average";
-                               err = pps / err; /* calc average bulk size */
-                       }
-                       printf(fm2, "cpumap-enqueue",
-                              "sum", to_cpu, pps, drop, err, errstr);
-               }
-       }
-
-       /* cpumap kthread stats */
-       {
-               char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
-               char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n";
-               char *e_str = "";
-
-               rec  = &stats_rec->kthread;
-               prev = &stats_prev->kthread;
-               t = calc_period(rec, prev);
-               for (i = 0; i < nr_cpus; i++) {
-                       struct datarec *r = &rec->cpu[i];
-                       struct datarec *p = &prev->cpu[i];
-
-                       pps  = calc_pps(r, p, t);
-                       drop = calc_drop_pps(r, p, t);
-                       err  = calc_errs_pps(r, p, t);
-                       if (err > 0)
-                               e_str = "sched";
-                       if (pps > 0)
-                               printf(fmt_k, "cpumap_kthread",
-                                      i, pps, drop, err, e_str);
-               }
-               pps = calc_pps(&rec->total, &prev->total, t);
-               drop = calc_drop_pps(&rec->total, &prev->total, t);
-               err  = calc_errs_pps(&rec->total, &prev->total, t);
-               if (err > 0)
-                       e_str = "sched-sum";
-               printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str);
-       }
-
-       /* XDP redirect err tracepoints (very unlikely) */
-       {
-               char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
-               char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
-
-               rec  = &stats_rec->redir_err;
-               prev = &stats_prev->redir_err;
-               t = calc_period(rec, prev);
-               for (i = 0; i < nr_cpus; i++) {
-                       struct datarec *r = &rec->cpu[i];
-                       struct datarec *p = &prev->cpu[i];
-
-                       pps  = calc_pps(r, p, t);
-                       drop = calc_drop_pps(r, p, t);
-                       if (pps > 0)
-                               printf(fmt_err, "redirect_err", i, pps, drop);
-               }
-               pps = calc_pps(&rec->total, &prev->total, t);
-               drop = calc_drop_pps(&rec->total, &prev->total, t);
-               printf(fm2_err, "redirect_err", "total", pps, drop);
-       }
-
-       /* XDP general exception tracepoints */
-       {
-               char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
-               char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
-
-               rec  = &stats_rec->exception;
-               prev = &stats_prev->exception;
-               t = calc_period(rec, prev);
-               for (i = 0; i < nr_cpus; i++) {
-                       struct datarec *r = &rec->cpu[i];
-                       struct datarec *p = &prev->cpu[i];
-
-                       pps  = calc_pps(r, p, t);
-                       drop = calc_drop_pps(r, p, t);
-                       if (pps > 0)
-                               printf(fmt_err, "xdp_exception", i, pps, drop);
-               }
-               pps = calc_pps(&rec->total, &prev->total, t);
-               drop = calc_drop_pps(&rec->total, &prev->total, t);
-               printf(fm2_err, "xdp_exception", "total", pps, drop);
-       }
-
-       /* CPUMAP attached XDP program that runs on remote/destination CPU */
-       if (mprog_enabled) {
-               char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n";
-               char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n";
-               double xdp_pass, xdp_drop, xdp_redirect;
-
-               printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name);
-               printf("%-15s %-7s %-14s %-11s %-9s\n",
-                      "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir");
-
-               rec  = &stats_rec->kthread;
-               prev = &stats_prev->kthread;
-               t = calc_period(rec, prev);
-               for (i = 0; i < nr_cpus; i++) {
-                       struct datarec *r = &rec->cpu[i];
-                       struct datarec *p = &prev->cpu[i];
-
-                       calc_xdp_pps(r, p, &xdp_pass, &xdp_drop,
-                                    &xdp_redirect, t);
-                       if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0)
-                               printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop,
-                                      xdp_redirect);
+               if (bpf_program__is_xdp(pos)) {
+                       if (!strncmp(bpf_program__name(pos), "xdp_prognum",
+                                    sizeof("xdp_prognum") - 1))
+                               printf(" %s\n", bpf_program__name(pos));
                }
-               calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
-                            &xdp_redirect, t);
-               printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect);
        }
-
-       printf("\n");
-       fflush(stdout);
 }
 
-static void stats_collect(struct stats_record *rec)
+static void usage(char *argv[], const struct option *long_options,
+                 const char *doc, int mask, bool error, struct bpf_object *obj)
 {
-       int fd, i;
-
-       fd = map_fds[RX_CNT];
-       map_collect_percpu(fd, 0, &rec->rx_cnt);
-
-       fd = map_fds[REDIRECT_ERR_CNT];
-       map_collect_percpu(fd, 1, &rec->redir_err);
-
-       fd = map_fds[CPUMAP_ENQUEUE_CNT];
-       for (i = 0; i < n_cpus; i++)
-               map_collect_percpu(fd, i, &rec->enq[i]);
-
-       fd = map_fds[CPUMAP_KTHREAD_CNT];
-       map_collect_percpu(fd, 0, &rec->kthread);
-
-       fd = map_fds[EXCEPTION_CNT];
-       map_collect_percpu(fd, 0, &rec->exception);
-}
-
-
-/* Pointer swap trick */
-static inline void swap(struct stats_record **a, struct stats_record **b)
-{
-       struct stats_record *tmp;
-
-       tmp = *a;
-       *a = *b;
-       *b = tmp;
+       sample_usage(argv, long_options, doc, mask, error);
+       print_avail_progs(obj);
 }
 
 static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
@@ -582,39 +95,41 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
        /* Add a CPU entry to cpumap, as this allocate a cpu entry in
         * the kernel for the cpu.
         */
-       ret = bpf_map_update_elem(map_fds[CPU_MAP], &cpu, value, 0);
-       if (ret) {
-               fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
-               exit(EXIT_FAIL_BPF);
+       ret = bpf_map_update_elem(map_fd, &cpu, value, 0);
+       if (ret < 0) {
+               fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno));
+               return ret;
        }
 
        /* Inform bpf_prog's that a new CPU is available to select
         * from via some control maps.
         */
-       ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &avail_idx, &cpu, 0);
-       if (ret) {
-               fprintf(stderr, "Add to avail CPUs failed\n");
-               exit(EXIT_FAIL_BPF);
+       ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0);
+       if (ret < 0) {
+               fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno));
+               return ret;
        }
 
        /* When not replacing/updating existing entry, bump the count */
-       ret = bpf_map_lookup_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count);
-       if (ret) {
-               fprintf(stderr, "Failed reading curr cpus_count\n");
-               exit(EXIT_FAIL_BPF);
+       ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count);
+       if (ret < 0) {
+               fprintf(stderr, "Failed reading curr cpus_count: %s\n",
+                       strerror(errno));
+               return ret;
        }
        if (new) {
                curr_cpus_count++;
-               ret = bpf_map_update_elem(map_fds[CPUS_COUNT], &key,
+               ret = bpf_map_update_elem(count_fd, &key,
                                          &curr_cpus_count, 0);
-               if (ret) {
-                       fprintf(stderr, "Failed write curr cpus_count\n");
-                       exit(EXIT_FAIL_BPF);
+               if (ret < 0) {
+                       fprintf(stderr, "Failed write curr cpus_count: %s\n",
+                               strerror(errno));
+                       return ret;
                }
        }
-       /* map_fd[7] = cpus_iterator */
-       printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n",
-              new ? "Add-new":"Replace", cpu, avail_idx,
+
+       printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n",
+              new ? "Add new" : "Replace", cpu, avail_idx,
               value->qsize, value->bpf_prog.fd, curr_cpus_count);
 
        return 0;
@@ -623,24 +138,29 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
 /* CPUs are zero-indexed. Thus, add a special sentinel default value
  * in map cpus_available to mark CPU index'es not configured
  */
-static void mark_cpus_unavailable(void)
+static int mark_cpus_unavailable(void)
 {
+       int ret, i, n_cpus = libbpf_num_possible_cpus();
        __u32 invalid_cpu = n_cpus;
-       int ret, i;
 
        for (i = 0; i < n_cpus; i++) {
-               ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &i,
+               ret = bpf_map_update_elem(avail_fd, &i,
                                          &invalid_cpu, 0);
-               if (ret) {
-                       fprintf(stderr, "Failed marking CPU unavailable\n");
-                       exit(EXIT_FAIL_BPF);
+               if (ret < 0) {
+                       fprintf(stderr, "Failed marking CPU unavailable: %s\n",
+                               strerror(errno));
+                       return ret;
                }
        }
+
+       return 0;
 }
 
 /* Stress cpumap management code by concurrently changing underlying cpumap */
-static void stress_cpumap(struct bpf_cpumap_val *value)
+static void stress_cpumap(void *ctx)
 {
+       struct bpf_cpumap_val *value = ctx;
+
        /* Changing qsize will cause kernel to free and alloc a new
         * bpf_cpu_map_entry, with an associated/complicated tear-down
         * procedure.
@@ -653,220 +173,263 @@ static void stress_cpumap(struct bpf_cpumap_val *value)
        create_cpu_entry(1, value, 0, false);
 }
 
-static void stats_poll(int interval, bool use_separators, char *prog_name,
-                      char *mprog_name, struct bpf_cpumap_val *value,
-                      bool stress_mode)
+static int set_cpumap_prog(struct xdp_redirect_cpu *skel,
+                          const char *redir_interface, const char *redir_map,
+                          const char *mprog_filename, const char *mprog_name)
 {
-       struct stats_record *record, *prev;
-       int mprog_fd;
-
-       record = alloc_stats_record();
-       prev   = alloc_stats_record();
-       stats_collect(record);
-
-       /* Trick to pretty printf with thousands separators use %' */
-       if (use_separators)
-               setlocale(LC_NUMERIC, "en_US");
-
-       while (1) {
-               swap(&prev, &record);
-               mprog_fd = value->bpf_prog.fd;
-               stats_collect(record);
-               stats_print(record, prev, prog_name, mprog_name, mprog_fd);
-               sleep(interval);
-               if (stress_mode)
-                       stress_cpumap(value);
-       }
-
-       free_stats_record(record);
-       free_stats_record(prev);
-}
-
-static int init_tracepoints(struct bpf_object *obj)
-{
-       struct bpf_program *prog;
-
-       bpf_object__for_each_program(prog, obj) {
-               if (bpf_program__is_tracepoint(prog) != true)
-                       continue;
-
-               tp_links[tp_cnt] = bpf_program__attach(prog);
-               if (libbpf_get_error(tp_links[tp_cnt])) {
-                       tp_links[tp_cnt] = NULL;
-                       return -EINVAL;
+       if (mprog_filename) {
+               struct bpf_program *prog;
+               struct bpf_object *obj;
+               int ret;
+
+               if (!mprog_name) {
+                       fprintf(stderr, "BPF program not specified for file %s\n",
+                               mprog_filename);
+                       goto end;
+               }
+               if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) {
+                       fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n",
+                               redir_interface ? "device" : "map", redir_interface ? "map" : "device");
+                       goto end;
                }
-               tp_cnt++;
-       }
-
-       return 0;
-}
-
-static int init_map_fds(struct bpf_object *obj)
-{
-       enum map_type type;
-
-       for (type = 0; type < NUM_MAP; type++) {
-               map_fds[type] =
-                       bpf_object__find_map_fd_by_name(obj,
-                                                       map_type_strings[type]);
-
-               if (map_fds[type] < 0)
-                       return -ENOENT;
-       }
-
-       return 0;
-}
 
-static int load_cpumap_prog(char *file_name, char *prog_name,
-                           char *redir_interface, char *redir_map)
-{
-       struct bpf_prog_load_attr prog_load_attr = {
-               .prog_type              = BPF_PROG_TYPE_XDP,
-               .expected_attach_type   = BPF_XDP_CPUMAP,
-               .file = file_name,
-       };
-       struct bpf_program *prog;
-       struct bpf_object *obj;
-       int fd;
+               /* Custom BPF program */
+               obj = bpf_object__open_file(mprog_filename, NULL);
+               if (!obj) {
+                       ret = -errno;
+                       fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n",
+                               strerror(errno));
+                       return ret;
+               }
 
-       if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd))
-               return -1;
+               ret = bpf_object__load(obj);
+               if (ret < 0) {
+                       ret = -errno;
+                       fprintf(stderr, "Failed to bpf_object__load: %s\n",
+                               strerror(errno));
+                       return ret;
+               }
 
-       if (fd < 0) {
-               fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
-                       strerror(errno));
-               return fd;
-       }
+               if (redir_map) {
+                       int err, redir_map_fd, ifindex_out, key = 0;
 
-       if (redir_interface && redir_map) {
-               int err, map_fd, ifindex_out, key = 0;
+                       redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
+                       if (redir_map_fd < 0) {
+                               fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n",
+                                       strerror(errno));
+                               return redir_map_fd;
+                       }
 
-               map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
-               if (map_fd < 0)
-                       return map_fd;
+                       ifindex_out = if_nametoindex(redir_interface);
+                       if (!ifindex_out)
+                               ifindex_out = strtoul(redir_interface, NULL, 0);
+                       if (!ifindex_out) {
+                               fprintf(stderr, "Bad interface name or index\n");
+                               return -EINVAL;
+                       }
 
-               ifindex_out = if_nametoindex(redir_interface);
-               if (!ifindex_out)
-                       return -1;
+                       err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0);
+                       if (err < 0)
+                               return err;
+               }
 
-               err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0);
-               if (err < 0)
-                       return err;
-       }
+               prog = bpf_object__find_program_by_name(obj, mprog_name);
+               if (!prog) {
+                       ret = -errno;
+                       fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n",
+                               strerror(errno));
+                       return ret;
+               }
 
-       prog = bpf_object__find_program_by_title(obj, prog_name);
-       if (!prog) {
-               fprintf(stderr, "bpf_object__find_program_by_title failed\n");
-               return EXIT_FAIL;
+               return bpf_program__fd(prog);
+       } else {
+               if (mprog_name) {
+                       if (redir_interface || redir_map) {
+                               fprintf(stderr, "Need to specify --mprog-filename/-f\n");
+                               goto end;
+                       }
+                       if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) {
+                               /* Use built-in pass/drop programs */
+                               return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass)
+                                       : bpf_program__fd(skel->progs.xdp_redirect_cpu_drop);
+                       } else {
+                               fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n",
+                                       mprog_name);
+                               goto end;
+                       }
+               } else {
+                       if (redir_map) {
+                               fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and"
+                                       " --redirect-device with --redirect-map\n");
+                               goto end;
+                       }
+                       if (redir_interface) {
+                               /* Use built-in devmap redirect */
+                               struct bpf_devmap_val val = {};
+                               int ifindex_out, err;
+                               __u32 key = 0;
+
+                               if (!redir_interface)
+                                       return 0;
+
+                               ifindex_out = if_nametoindex(redir_interface);
+                               if (!ifindex_out)
+                                       ifindex_out = strtoul(redir_interface, NULL, 0);
+                               if (!ifindex_out) {
+                                       fprintf(stderr, "Bad interface name or index\n");
+                                       return -EINVAL;
+                               }
+
+                               if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) {
+                                       printf("Get interface %d mac failed\n", ifindex_out);
+                                       return -EINVAL;
+                               }
+
+                               val.ifindex = ifindex_out;
+                               val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog);
+                               err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0);
+                               if (err < 0)
+                                       return -errno;
+
+                               return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap);
+                       }
+               }
        }
 
-       return bpf_program__fd(prog);
+       /* Disabled */
+       return 0;
+end:
+       fprintf(stderr, "Invalid options for CPUMAP BPF program\n");
+       return -EINVAL;
 }
 
 int main(int argc, char **argv)
 {
-       char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
-       char *mprog_filename = "xdp_redirect_kern.o";
-       char *redir_interface = NULL, *redir_map = NULL;
-       char *mprog_name = "xdp_redirect_dummy";
-       bool mprog_disable = false;
-       struct bpf_prog_load_attr prog_load_attr = {
-               .prog_type      = BPF_PROG_TYPE_UNSPEC,
-       };
-       struct bpf_prog_info info = {};
-       __u32 info_len = sizeof(info);
+       const char *redir_interface = NULL, *redir_map = NULL;
+       const char *mprog_filename = NULL, *mprog_name = NULL;
+       struct xdp_redirect_cpu *skel;
+       struct bpf_map_info info = {};
+       char ifname_buf[IF_NAMESIZE];
        struct bpf_cpumap_val value;
-       bool use_separators = true;
+       __u32 infosz = sizeof(info);
+       int ret = EXIT_FAIL_OPTION;
+       unsigned long interval = 2;
        bool stress_mode = false;
        struct bpf_program *prog;
-       struct bpf_object *obj;
-       int err = EXIT_FAIL;
-       char filename[256];
+       const char *prog_name;
+       bool generic = false;
+       bool force = false;
        int added_cpus = 0;
+       bool error = true;
        int longindex = 0;
-       int interval = 2;
        int add_cpu = -1;
-       int opt, prog_fd;
-       int *cpu, i;
+       int ifindex = -1;
+       int *cpu, i, opt;
+       char *ifname;
        __u32 qsize;
-
-       n_cpus = get_nprocs_conf();
-
-       /* Notice: choosing he queue size is very important with the
-        * ixgbe driver, because it's driver page recycling trick is
-        * dependend on pages being returned quickly.  The number of
-        * out-standing packets in the system must be less-than 2x
-        * RX-ring size.
+       int n_cpus;
+
+       n_cpus = libbpf_num_possible_cpus();
+
+       /* Notice: Choosing the queue size is very important when CPU is
+        * configured with power-saving states.
+        *
+        * If deepest state take 133 usec to wakeup from (133/10^6). When link
+        * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can
+        * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) =
+        * 166250 bytes. With MTU size packets this is 110 packets, and with
+        * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets.
+        *
+        * Setting default cpumap queue to 2048 as worst-case (small packet)
+        * should be +64 packet due kthread wakeup call (due to xdp_do_flush)
+        * worst-case is 2043 packets.
+        *
+        * Sysadm can configured system to avoid deep-sleep via:
+        *   tuned-adm profile network-latency
         */
-       qsize = 128+64;
-
-       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-       prog_load_attr.file = filename;
-
-       if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
-               return err;
+       qsize = 2048;
 
-       if (prog_fd < 0) {
-               fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
+       skel = xdp_redirect_cpu__open();
+       if (!skel) {
+               fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n",
                        strerror(errno));
-               return err;
+               ret = EXIT_FAIL_BPF;
+               goto end;
+       }
+
+       ret = sample_init_pre_load(skel);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
        }
 
-       if (init_tracepoints(obj) < 0) {
-               fprintf(stderr, "ERR: bpf_program__attach failed\n");
-               return err;
+       if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) {
+               fprintf(stderr, "Failed to set max entries for cpu_map map: %s",
+                       strerror(errno));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
        }
 
-       if (init_map_fds(obj) < 0) {
-               fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
-               return err;
+       if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) {
+               fprintf(stderr, "Failed to set max entries for cpus_available map: %s",
+                       strerror(errno));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
        }
-       mark_cpus_unavailable();
 
-       cpu = malloc(n_cpus * sizeof(int));
+       cpu = calloc(n_cpus, sizeof(int));
        if (!cpu) {
-               fprintf(stderr, "failed to allocate cpu array\n");
-               return err;
+               fprintf(stderr, "Failed to allocate cpu array\n");
+               goto end_destroy;
        }
-       memset(cpu, 0, n_cpus * sizeof(int));
 
-       /* Parse commands line args */
-       while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
+       prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs;
+       while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh",
                                  long_options, &longindex)) != -1) {
                switch (opt) {
                case 'd':
                        if (strlen(optarg) >= IF_NAMESIZE) {
-                               fprintf(stderr, "ERR: --dev name too long\n");
-                               goto error;
+                               fprintf(stderr, "-d/--dev name too long\n");
+                               goto end_cpu;
                        }
                        ifname = (char *)&ifname_buf;
-                       strncpy(ifname, optarg, IF_NAMESIZE);
+                       safe_strncpy(ifname, optarg, sizeof(ifname));
                        ifindex = if_nametoindex(ifname);
-                       if (ifindex == 0) {
-                               fprintf(stderr,
-                                       "ERR: --dev name unknown err(%d):%s\n",
+                       if (!ifindex)
+                               ifindex = strtoul(optarg, NULL, 0);
+                       if (!ifindex) {
+                               fprintf(stderr, "Bad interface index or name (%d): %s\n",
                                        errno, strerror(errno));
-                               goto error;
+                               usage(argv, long_options, __doc__, mask, true, skel->obj);
+                               goto end_cpu;
                        }
                        break;
                case 's':
-                       interval = atoi(optarg);
+                       mask |= SAMPLE_REDIRECT_MAP_CNT;
+                       break;
+               case 'i':
+                       interval = strtoul(optarg, NULL, 0);
                        break;
                case 'S':
-                       xdp_flags |= XDP_FLAGS_SKB_MODE;
+                       generic = true;
                        break;
                case 'x':
                        stress_mode = true;
                        break;
-               case 'z':
-                       use_separators = false;
-                       break;
                case 'p':
                        /* Selecting eBPF prog to load */
                        prog_name = optarg;
-                       break;
-               case 'n':
-                       mprog_disable = true;
+                       prog = bpf_object__find_program_by_name(skel->obj,
+                                                               prog_name);
+                       if (!prog) {
+                               fprintf(stderr,
+                                       "Failed to find program %s specified by"
+                                       " option -p/--progname\n",
+                                       prog_name);
+                               print_avail_progs(skel->obj);
+                               goto end_cpu;
+                       }
                        break;
                case 'f':
                        mprog_filename = optarg;
@@ -876,6 +439,7 @@ int main(int argc, char **argv)
                        break;
                case 'r':
                        redir_interface = optarg;
+                       mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI;
                        break;
                case 'm':
                        redir_map = optarg;
@@ -885,93 +449,115 @@ int main(int argc, char **argv)
                        add_cpu = strtoul(optarg, NULL, 0);
                        if (add_cpu >= n_cpus) {
                                fprintf(stderr,
-                               "--cpu nr too large for cpumap err(%d):%s\n",
+                               "--cpu nr too large for cpumap err (%d):%s\n",
                                        errno, strerror(errno));
-                               goto error;
+                               usage(argv, long_options, __doc__, mask, true, skel->obj);
+                               goto end_cpu;
                        }
                        cpu[added_cpus++] = add_cpu;
                        break;
                case 'q':
-                       qsize = atoi(optarg);
+                       qsize = strtoul(optarg, NULL, 0);
                        break;
                case 'F':
-                       xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+                       force = true;
+                       break;
+               case 'v':
+                       sample_switch_mode();
                        break;
                case 'h':
-               error:
+                       error = false;
                default:
-                       free(cpu);
-                       usage(argv, obj);
-                       return EXIT_FAIL_OPTION;
+                       usage(argv, long_options, __doc__, mask, error, skel->obj);
+                       goto end_cpu;
                }
        }
 
-       if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
-               xdp_flags |= XDP_FLAGS_DRV_MODE;
-
-       /* Required option */
+       ret = EXIT_FAIL_OPTION;
        if (ifindex == -1) {
-               fprintf(stderr, "ERR: required option --dev missing\n");
-               usage(argv, obj);
-               err = EXIT_FAIL_OPTION;
-               goto out;
+               fprintf(stderr, "Required option --dev missing\n");
+               usage(argv, long_options, __doc__, mask, true, skel->obj);
+               goto end_cpu;
        }
-       /* Required option */
+
        if (add_cpu == -1) {
-               fprintf(stderr, "ERR: required option --cpu missing\n");
-               fprintf(stderr, " Specify multiple --cpu option to add more\n");
-               usage(argv, obj);
-               err = EXIT_FAIL_OPTION;
-               goto out;
+               fprintf(stderr, "Required option --cpu missing\n"
+                               "Specify multiple --cpu option to add more\n");
+               usage(argv, long_options, __doc__, mask, true, skel->obj);
+               goto end_cpu;
        }
 
-       value.bpf_prog.fd = 0;
-       if (!mprog_disable)
-               value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name,
-                                                    redir_interface, redir_map);
-       if (value.bpf_prog.fd < 0) {
-               err = value.bpf_prog.fd;
-               goto out;
+       skel->rodata->from_match[0] = ifindex;
+       if (redir_interface)
+               skel->rodata->to_match[0] = if_nametoindex(redir_interface);
+
+       ret = xdp_redirect_cpu__load(skel);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n",
+                       strerror(errno));
+               goto end_cpu;
        }
-       value.qsize = qsize;
 
-       for (i = 0; i < added_cpus; i++)
-               create_cpu_entry(cpu[i], &value, i, true);
+       ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz);
+       if (ret < 0) {
+               fprintf(stderr, "Failed bpf_obj_get_info_by_fd for cpumap: %s\n",
+                       strerror(errno));
+               goto end_cpu;
+       }
 
-       /* Remove XDP program when program is interrupted or killed */
-       signal(SIGINT, int_exit);
-       signal(SIGTERM, int_exit);
+       skel->bss->cpumap_map_id = info.id;
 
-       prog = bpf_object__find_program_by_title(obj, prog_name);
-       if (!prog) {
-               fprintf(stderr, "bpf_object__find_program_by_title failed\n");
-               goto out;
+       map_fd = bpf_map__fd(skel->maps.cpu_map);
+       avail_fd = bpf_map__fd(skel->maps.cpus_available);
+       count_fd = bpf_map__fd(skel->maps.cpus_count);
+
+       ret = mark_cpus_unavailable();
+       if (ret < 0) {
+               fprintf(stderr, "Unable to mark CPUs as unavailable\n");
+               goto end_cpu;
        }
 
-       prog_fd = bpf_program__fd(prog);
-       if (prog_fd < 0) {
-               fprintf(stderr, "bpf_program__fd failed\n");
-               goto out;
+       ret = sample_init(skel, mask);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+               ret = EXIT_FAIL;
+               goto end_cpu;
        }
 
-       if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
-               fprintf(stderr, "link set xdp fd failed\n");
-               err = EXIT_FAIL_XDP;
-               goto out;
+       value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map,
+                                           mprog_filename, mprog_name);
+       if (value.bpf_prog.fd < 0) {
+               fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n",
+                       strerror(-value.bpf_prog.fd));
+               usage(argv, long_options, __doc__, mask, true, skel->obj);
+               ret = EXIT_FAIL_BPF;
+               goto end_cpu;
        }
+       value.qsize = qsize;
 
-       err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-       if (err) {
-               printf("can't get prog info - %s\n", strerror(errno));
-               goto out;
+       for (i = 0; i < added_cpus; i++) {
+               if (create_cpu_entry(cpu[i], &value, i, true) < 0) {
+                       fprintf(stderr, "Cannot proceed, exiting\n");
+                       usage(argv, long_options, __doc__, mask, true, skel->obj);
+                       goto end_cpu;
+               }
        }
-       prog_id = info.id;
 
-       stats_poll(interval, use_separators, prog_name, mprog_name,
-                  &value, stress_mode);
+       ret = EXIT_FAIL_XDP;
+       if (sample_install_xdp(prog, ifindex, generic, force) < 0)
+               goto end_cpu;
 
-       err = EXIT_OK;
-out:
+       ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value);
+       if (ret < 0) {
+               fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+               ret = EXIT_FAIL;
+               goto end_cpu;
+       }
+       ret = EXIT_OK;
+end_cpu:
        free(cpu);
-       return err;
+end_destroy:
+       xdp_redirect_cpu__destroy(skel);
+end:
+       sample_exit(ret);
 }
diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c
deleted file mode 100644 (file)
index d26ec3a..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
-
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __type(key, int);
-       __type(value, int);
-       __uint(max_entries, 1);
-} tx_port SEC(".maps");
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback.  Redirect TX errors can be caught via a tracepoint.
- */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, long);
-       __uint(max_entries, 1);
-} rxcnt SEC(".maps");
-
-static void swap_src_dst_mac(void *data)
-{
-       unsigned short *p = data;
-       unsigned short dst[3];
-
-       dst[0] = p[0];
-       dst[1] = p[1];
-       dst[2] = p[2];
-       p[0] = p[3];
-       p[1] = p[4];
-       p[2] = p[5];
-       p[3] = dst[0];
-       p[4] = dst[1];
-       p[5] = dst[2];
-}
-
-SEC("xdp_redirect")
-int xdp_redirect_prog(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       int rc = XDP_DROP;
-       int *ifindex, port = 0;
-       long *value;
-       u32 key = 0;
-       u64 nh_off;
-
-       nh_off = sizeof(*eth);
-       if (data + nh_off > data_end)
-               return rc;
-
-       ifindex = bpf_map_lookup_elem(&tx_port, &port);
-       if (!ifindex)
-               return rc;
-
-       value = bpf_map_lookup_elem(&rxcnt, &key);
-       if (value)
-               *value += 1;
-
-       swap_src_dst_mac(data);
-       return bpf_redirect(*ifindex, 0);
-}
-
-/* Redirect require an XDP bpf_prog loaded on the TX device */
-SEC("xdp_redirect_dummy")
-int xdp_redirect_dummy_prog(struct xdp_md *ctx)
-{
-       return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map.bpf.c b/samples/bpf/xdp_redirect_map.bpf.c
new file mode 100644 (file)
index 0000000..59efd65
--- /dev/null
@@ -0,0 +1,95 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+/* The 2nd xdp prog on egress does not support skb mode, so we define two
+ * maps, tx_port_general and tx_port_native.
+ */
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+       __uint(max_entries, 1);
+} tx_port_general SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 1);
+} tx_port_native SEC(".maps");
+
+/* store egress interface mac address */
+const volatile char tx_mac_addr[ETH_ALEN];
+
+static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct ethhdr *eth = data;
+       struct datarec *rec;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+       swap_src_dst_mac(data);
+       return bpf_redirect_map(redirect_map, 0, 0);
+}
+
+SEC("xdp")
+int xdp_redirect_map_general(struct xdp_md *ctx)
+{
+       return xdp_redirect_map(ctx, &tx_port_general);
+}
+
+SEC("xdp")
+int xdp_redirect_map_native(struct xdp_md *ctx)
+{
+       return xdp_redirect_map(ctx, &tx_port_native);
+}
+
+SEC("xdp_devmap/egress")
+int xdp_redirect_map_egress(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       struct ethhdr *eth = data;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
+
+       return XDP_PASS;
+}
+
+/* Redirect require an XDP bpf_prog loaded on the TX device */
+SEC("xdp")
+int xdp_redirect_dummy_prog(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c
deleted file mode 100644 (file)
index a92b8e5..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
-
-/* The 2nd xdp prog on egress does not support skb mode, so we define two
- * maps, tx_port_general and tx_port_native.
- */
-struct {
-       __uint(type, BPF_MAP_TYPE_DEVMAP);
-       __uint(key_size, sizeof(int));
-       __uint(value_size, sizeof(int));
-       __uint(max_entries, 100);
-} tx_port_general SEC(".maps");
-
-struct {
-       __uint(type, BPF_MAP_TYPE_DEVMAP);
-       __uint(key_size, sizeof(int));
-       __uint(value_size, sizeof(struct bpf_devmap_val));
-       __uint(max_entries, 100);
-} tx_port_native SEC(".maps");
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback.  Redirect TX errors can be caught via a tracepoint.
- */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, long);
-       __uint(max_entries, 1);
-} rxcnt SEC(".maps");
-
-/* map to store egress interface mac address */
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __type(key, u32);
-       __type(value, __be64);
-       __uint(max_entries, 1);
-} tx_mac SEC(".maps");
-
-static void swap_src_dst_mac(void *data)
-{
-       unsigned short *p = data;
-       unsigned short dst[3];
-
-       dst[0] = p[0];
-       dst[1] = p[1];
-       dst[2] = p[2];
-       p[0] = p[3];
-       p[1] = p[4];
-       p[2] = p[5];
-       p[3] = dst[0];
-       p[4] = dst[1];
-       p[5] = dst[2];
-}
-
-static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       int rc = XDP_DROP;
-       long *value;
-       u32 key = 0;
-       u64 nh_off;
-       int vport;
-
-       nh_off = sizeof(*eth);
-       if (data + nh_off > data_end)
-               return rc;
-
-       /* constant virtual port */
-       vport = 0;
-
-       /* count packet in global counter */
-       value = bpf_map_lookup_elem(&rxcnt, &key);
-       if (value)
-               *value += 1;
-
-       swap_src_dst_mac(data);
-
-       /* send packet out physical port */
-       return bpf_redirect_map(redirect_map, vport, 0);
-}
-
-SEC("xdp_redirect_general")
-int xdp_redirect_map_general(struct xdp_md *ctx)
-{
-       return xdp_redirect_map(ctx, &tx_port_general);
-}
-
-SEC("xdp_redirect_native")
-int xdp_redirect_map_native(struct xdp_md *ctx)
-{
-       return xdp_redirect_map(ctx, &tx_port_native);
-}
-
-SEC("xdp_devmap/map_prog")
-int xdp_redirect_map_egress(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       __be64 *mac;
-       u32 key = 0;
-       u64 nh_off;
-
-       nh_off = sizeof(*eth);
-       if (data + nh_off > data_end)
-               return XDP_DROP;
-
-       mac = bpf_map_lookup_elem(&tx_mac, &key);
-       if (mac)
-               __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
-
-       return XDP_PASS;
-}
-
-/* Redirect require an XDP bpf_prog loaded on the TX device */
-SEC("xdp_redirect_dummy")
-int xdp_redirect_dummy_prog(struct xdp_md *ctx)
-{
-       return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_multi.bpf.c b/samples/bpf/xdp_redirect_map_multi.bpf.c
new file mode 100644 (file)
index 0000000..8f59d43
--- /dev/null
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+enum {
+       BPF_F_BROADCAST         = (1ULL << 3),
+       BPF_F_EXCLUDE_INGRESS   = (1ULL << 4),
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+       __uint(max_entries, 32);
+} forward_map_general SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 32);
+} forward_map_native SEC(".maps");
+
+/* map to store egress interfaces mac addresses */
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __type(key, u32);
+       __type(value, __be64);
+       __uint(max_entries, 32);
+} mac_map SEC(".maps");
+
+static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map)
+{
+       u32 key = bpf_get_smp_processor_id();
+       struct datarec *rec;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       return bpf_redirect_map(forward_map, 0,
+                               BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp")
+int xdp_redirect_map_general(struct xdp_md *ctx)
+{
+       return xdp_redirect_map(ctx, &forward_map_general);
+}
+
+SEC("xdp")
+int xdp_redirect_map_native(struct xdp_md *ctx)
+{
+       return xdp_redirect_map(ctx, &forward_map_native);
+}
+
+SEC("xdp_devmap/egress")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       u32 key = ctx->egress_ifindex;
+       struct ethhdr *eth = data;
+       __be64 *mac;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       mac = bpf_map_lookup_elem(&mac_map, &key);
+       if (mac)
+               __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_multi_kern.c b/samples/bpf/xdp_redirect_map_multi_kern.c
deleted file mode 100644 (file)
index 71aa23d..0000000
+++ /dev/null
@@ -1,88 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
-
-struct {
-       __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
-       __uint(key_size, sizeof(int));
-       __uint(value_size, sizeof(int));
-       __uint(max_entries, 32);
-} forward_map_general SEC(".maps");
-
-struct {
-       __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
-       __uint(key_size, sizeof(int));
-       __uint(value_size, sizeof(struct bpf_devmap_val));
-       __uint(max_entries, 32);
-} forward_map_native SEC(".maps");
-
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, long);
-       __uint(max_entries, 1);
-} rxcnt SEC(".maps");
-
-/* map to store egress interfaces mac addresses, set the
- * max_entries to 1 and extend it in user sapce prog.
- */
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __type(key, u32);
-       __type(value, __be64);
-       __uint(max_entries, 1);
-} mac_map SEC(".maps");
-
-static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map)
-{
-       long *value;
-       u32 key = 0;
-
-       /* count packet in global counter */
-       value = bpf_map_lookup_elem(&rxcnt, &key);
-       if (value)
-               *value += 1;
-
-       return bpf_redirect_map(forward_map, key,
-                               BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
-}
-
-SEC("xdp_redirect_general")
-int xdp_redirect_map_general(struct xdp_md *ctx)
-{
-       return xdp_redirect_map(ctx, &forward_map_general);
-}
-
-SEC("xdp_redirect_native")
-int xdp_redirect_map_native(struct xdp_md *ctx)
-{
-       return xdp_redirect_map(ctx, &forward_map_native);
-}
-
-SEC("xdp_devmap/map_prog")
-int xdp_devmap_prog(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data = (void *)(long)ctx->data;
-       u32 key = ctx->egress_ifindex;
-       struct ethhdr *eth = data;
-       __be64 *mac;
-       u64 nh_off;
-
-       nh_off = sizeof(*eth);
-       if (data + nh_off > data_end)
-               return XDP_DROP;
-
-       mac = bpf_map_lookup_elem(&mac_map, &key);
-       if (mac)
-               __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
-
-       return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
index 84cdbbe..3153147 100644 (file)
@@ -1,7 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
+static const char *__doc__ =
+"XDP multi redirect tool, using BPF_MAP_TYPE_DEVMAP and BPF_F_BROADCAST flag for bpf_redirect_map\n"
+"Usage: xdp_redirect_map_multi <IFINDEX|IFNAME> <IFINDEX|IFNAME> ... <IFINDEX|IFNAME>\n";
+
 #include <linux/bpf.h>
 #include <linux/if_link.h>
 #include <assert.h>
+#include <getopt.h>
 #include <errno.h>
 #include <signal.h>
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
-
-#include "bpf_util.h"
+#include <linux/if_ether.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_map_multi.skel.h"
 
 #define MAX_IFACE_NUM 32
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static int ifaces[MAX_IFACE_NUM] = {};
-static int rxcnt_map_fd;
-
-static void int_exit(int sig)
-{
-       __u32 prog_id = 0;
-       int i;
-
-       for (i = 0; ifaces[i] > 0; i++) {
-               if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
-                       exit(1);
-               }
-               if (prog_id)
-                       bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
-       }
-
-       exit(0);
-}
-
-static void poll_stats(int interval)
-{
-       unsigned int nr_cpus = bpf_num_possible_cpus();
-       __u64 values[nr_cpus], prev[nr_cpus];
-
-       memset(prev, 0, sizeof(prev));
-
-       while (1) {
-               __u64 sum = 0;
-               __u32 key = 0;
-               int i;
 
-               sleep(interval);
-               assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
-               for (i = 0; i < nr_cpus; i++)
-                       sum += (values[i] - prev[i]);
-               if (sum)
-                       printf("Forwarding %10llu pkt/s\n", sum / interval);
-               memcpy(prev, values, sizeof(values));
-       }
-}
-
-static int get_mac_addr(unsigned int ifindex, void *mac_addr)
-{
-       char ifname[IF_NAMESIZE];
-       struct ifreq ifr;
-       int fd, ret = -1;
-
-       fd = socket(AF_INET, SOCK_DGRAM, 0);
-       if (fd < 0)
-               return ret;
-
-       if (!if_indextoname(ifindex, ifname))
-               goto err_out;
-
-       strcpy(ifr.ifr_name, ifname);
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+                 SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT |
+                 SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_SKIP_HEADING;
 
-       if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
-               goto err_out;
+DEFINE_SAMPLE_INIT(xdp_redirect_map_multi);
 
-       memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
-       ret = 0;
+static const struct option long_options[] = {
+       { "help", no_argument, NULL, 'h' },
+       { "skb-mode", no_argument, NULL, 'S' },
+       { "force", no_argument, NULL, 'F' },
+       { "load-egress", no_argument, NULL, 'X' },
+       { "stats", no_argument, NULL, 's' },
+       { "interval", required_argument, NULL, 'i' },
+       { "verbose", no_argument, NULL, 'v' },
+       {}
+};
 
-err_out:
-       close(fd);
-       return ret;
-}
-
-static int update_mac_map(struct bpf_object *obj)
+static int update_mac_map(struct bpf_map *map)
 {
-       int i, ret = -1, mac_map_fd;
+       int mac_map_fd = bpf_map__fd(map);
        unsigned char mac_addr[6];
        unsigned int ifindex;
-
-       mac_map_fd = bpf_object__find_map_fd_by_name(obj, "mac_map");
-       if (mac_map_fd < 0) {
-               printf("find mac map fd failed\n");
-               return ret;
-       }
+       int i, ret = -1;
 
        for (i = 0; ifaces[i] > 0; i++) {
                ifindex = ifaces[i];
 
                ret = get_mac_addr(ifindex, mac_addr);
                if (ret < 0) {
-                       printf("get interface %d mac failed\n", ifindex);
+                       fprintf(stderr, "get interface %d mac failed\n",
+                               ifindex);
                        return ret;
                }
 
                ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0);
-               if (ret) {
-                       perror("bpf_update_elem mac_map_fd");
+               if (ret < 0) {
+                       fprintf(stderr, "Failed to update mac address for ifindex %d\n",
+                               ifindex);
                        return ret;
                }
        }
@@ -122,181 +75,159 @@ static int update_mac_map(struct bpf_object *obj)
        return 0;
 }
 
-static void usage(const char *prog)
-{
-       fprintf(stderr,
-               "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
-               "OPTS:\n"
-               "    -S    use skb-mode\n"
-               "    -N    enforce native mode\n"
-               "    -F    force loading prog\n"
-               "    -X    load xdp program on egress\n",
-               prog);
-}
-
 int main(int argc, char **argv)
 {
-       int i, ret, opt, forward_map_fd, max_ifindex = 0;
-       struct bpf_program *ingress_prog, *egress_prog;
-       int ingress_prog_fd, egress_prog_fd = 0;
-       struct bpf_devmap_val devmap_val;
-       bool attach_egress_prog = false;
+       struct bpf_devmap_val devmap_val = {};
+       struct xdp_redirect_map_multi *skel;
+       struct bpf_program *ingress_prog;
+       bool xdp_devmap_attached = false;
+       struct bpf_map *forward_map;
+       int ret = EXIT_FAIL_OPTION;
+       unsigned long interval = 2;
        char ifname[IF_NAMESIZE];
-       struct bpf_map *mac_map;
-       struct bpf_object *obj;
        unsigned int ifindex;
-       char filename[256];
-
-       while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+       bool generic = false;
+       bool force = false;
+       bool tried = false;
+       bool error = true;
+       int i, opt;
+
+       while ((opt = getopt_long(argc, argv, "hSFXi:vs",
+                                 long_options, NULL)) != -1) {
                switch (opt) {
                case 'S':
-                       xdp_flags |= XDP_FLAGS_SKB_MODE;
-                       break;
-               case 'N':
-                       /* default, set below */
+                       generic = true;
+                       /* devmap_xmit tracepoint not available */
+                       mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+                                 SAMPLE_DEVMAP_XMIT_CNT_MULTI);
                        break;
                case 'F':
-                       xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+                       force = true;
                        break;
                case 'X':
-                       attach_egress_prog = true;
+                       xdp_devmap_attached = true;
+                       break;
+               case 'i':
+                       interval = strtoul(optarg, NULL, 0);
+                       break;
+               case 'v':
+                       sample_switch_mode();
                        break;
+               case 's':
+                       mask |= SAMPLE_REDIRECT_MAP_CNT;
+                       break;
+               case 'h':
+                       error = false;
                default:
-                       usage(basename(argv[0]));
-                       return 1;
+                       sample_usage(argv, long_options, __doc__, mask, error);
+                       return ret;
                }
        }
 
-       if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
-               xdp_flags |= XDP_FLAGS_DRV_MODE;
-       } else if (attach_egress_prog) {
-               printf("Load xdp program on egress with SKB mode not supported yet\n");
-               return 1;
+       if (argc <= optind + 1) {
+               sample_usage(argv, long_options, __doc__, mask, error);
+               return ret;
        }
 
-       if (optind == argc) {
-               printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
-               return 1;
+       skel = xdp_redirect_map_multi__open();
+       if (!skel) {
+               fprintf(stderr, "Failed to xdp_redirect_map_multi__open: %s\n",
+                       strerror(errno));
+               ret = EXIT_FAIL_BPF;
+               goto end;
        }
 
-       printf("Get interfaces");
+       ret = sample_init_pre_load(skel);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
+       }
+
+       ret = EXIT_FAIL_OPTION;
        for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
                ifaces[i] = if_nametoindex(argv[optind + i]);
                if (!ifaces[i])
                        ifaces[i] = strtoul(argv[optind + i], NULL, 0);
                if (!if_indextoname(ifaces[i], ifname)) {
-                       perror("Invalid interface name or i");
-                       return 1;
+                       fprintf(stderr, "Bad interface index or name\n");
+                       sample_usage(argv, long_options, __doc__, mask, true);
+                       goto end_destroy;
                }
 
-               /* Find the largest index number */
-               if (ifaces[i] > max_ifindex)
-                       max_ifindex = ifaces[i];
-
-               printf(" %d", ifaces[i]);
-       }
-       printf("\n");
-
-       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
-       obj = bpf_object__open(filename);
-       if (libbpf_get_error(obj)) {
-               printf("ERROR: opening BPF object file failed\n");
-               obj = NULL;
-               goto err_out;
+               skel->rodata->from_match[i] = ifaces[i];
+               skel->rodata->to_match[i] = ifaces[i];
        }
 
-       /* Reset the map size to max ifindex + 1 */
-       if (attach_egress_prog) {
-               mac_map = bpf_object__find_map_by_name(obj, "mac_map");
-               ret = bpf_map__resize(mac_map, max_ifindex + 1);
-               if (ret < 0) {
-                       printf("ERROR: reset mac map size failed\n");
-                       goto err_out;
-               }
+       ret = xdp_redirect_map_multi__load(skel);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to xdp_redirect_map_multi__load: %s\n",
+                       strerror(errno));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
        }
 
-       /* load BPF program */
-       if (bpf_object__load(obj)) {
-               printf("ERROR: loading BPF object file failed\n");
-               goto err_out;
-       }
-
-       if (xdp_flags & XDP_FLAGS_SKB_MODE) {
-               ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_general");
-               forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_general");
-       } else {
-               ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_native");
-               forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_native");
-       }
-       if (!ingress_prog || forward_map_fd < 0) {
-               printf("finding ingress_prog/forward_map in obj file failed\n");
-               goto err_out;
-       }
-
-       ingress_prog_fd = bpf_program__fd(ingress_prog);
-       if (ingress_prog_fd < 0) {
-               printf("find ingress_prog fd failed\n");
-               goto err_out;
-       }
-
-       rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
-       if (rxcnt_map_fd < 0) {
-               printf("bpf_object__find_map_fd_by_name failed\n");
-               goto err_out;
-       }
-
-       if (attach_egress_prog) {
+       if (xdp_devmap_attached) {
                /* Update mac_map with all egress interfaces' mac addr */
-               if (update_mac_map(obj) < 0) {
-                       printf("Error: update mac map failed");
-                       goto err_out;
+               if (update_mac_map(skel->maps.mac_map) < 0) {
+                       fprintf(stderr, "Updating mac address failed\n");
+                       ret = EXIT_FAIL;
+                       goto end_destroy;
                }
+       }
 
-               /* Find egress prog fd */
-               egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
-               if (!egress_prog) {
-                       printf("finding egress_prog in obj file failed\n");
-                       goto err_out;
-               }
-               egress_prog_fd = bpf_program__fd(egress_prog);
-               if (egress_prog_fd < 0) {
-                       printf("find egress_prog fd failed\n");
-                       goto err_out;
-               }
+       ret = sample_init(skel, mask);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+               ret = EXIT_FAIL;
+               goto end_destroy;
        }
 
-       /* Remove attached program when program is interrupted or killed */
-       signal(SIGINT, int_exit);
-       signal(SIGTERM, int_exit);
+       ingress_prog = skel->progs.xdp_redirect_map_native;
+       forward_map = skel->maps.forward_map_native;
 
-       /* Init forward multicast groups */
        for (i = 0; ifaces[i] > 0; i++) {
                ifindex = ifaces[i];
 
+               ret = EXIT_FAIL_XDP;
+restart:
                /* bind prog_fd to each interface */
-               ret = bpf_set_link_xdp_fd(ifindex, ingress_prog_fd, xdp_flags);
-               if (ret) {
-                       printf("Set xdp fd failed on %d\n", ifindex);
-                       goto err_out;
+               if (sample_install_xdp(ingress_prog, ifindex, generic, force) < 0) {
+                       if (generic && !tried) {
+                               fprintf(stderr,
+                                       "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n");
+                               ingress_prog = skel->progs.xdp_redirect_map_general;
+                               forward_map = skel->maps.forward_map_general;
+                               tried = true;
+                               goto restart;
+                       }
+                       goto end_destroy;
                }
 
                /* Add all the interfaces to forward group and attach
-                * egress devmap programe if exist
+                * egress devmap program if exist
                 */
                devmap_val.ifindex = ifindex;
-               devmap_val.bpf_prog.fd = egress_prog_fd;
-               ret = bpf_map_update_elem(forward_map_fd, &ifindex, &devmap_val, 0);
-               if (ret) {
-                       perror("bpf_map_update_elem forward_map");
-                       goto err_out;
+               if (xdp_devmap_attached)
+                       devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_devmap_prog);
+               ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0);
+               if (ret < 0) {
+                       fprintf(stderr, "Failed to update devmap value: %s\n",
+                               strerror(errno));
+                       ret = EXIT_FAIL_BPF;
+                       goto end_destroy;
                }
        }
 
-       poll_stats(2);
-
-       return 0;
-
-err_out:
-       return 1;
+       ret = sample_run(interval, NULL, NULL);
+       if (ret < 0) {
+               fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+               ret = EXIT_FAIL;
+               goto end_destroy;
+       }
+       ret = EXIT_OK;
+end_destroy:
+       xdp_redirect_map_multi__destroy(skel);
+end:
+       sample_exit(ret);
 }
index 0e81926..b6e4fc8 100644 (file)
@@ -1,6 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
  */
+static const char *__doc__ =
+"XDP redirect tool, using BPF_MAP_TYPE_DEVMAP\n"
+"Usage: xdp_redirect_map <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n";
+
 #include <linux/bpf.h>
 #include <linux/if_link.h>
 #include <assert.h>
 #include <net/if.h>
 #include <unistd.h>
 #include <libgen.h>
-#include <sys/resource.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include "bpf_util.h"
+#include <getopt.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_map.skel.h"
 
-static int ifindex_in;
-static int ifindex_out;
-static bool ifindex_out_xdp_dummy_attached = true;
-static bool xdp_devmap_attached;
-static __u32 prog_id;
-static __u32 dummy_prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int rxcnt_map_fd;
-
-static void int_exit(int sig)
-{
-       __u32 curr_prog_id = 0;
-
-       if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
-               printf("bpf_get_link_xdp_id failed\n");
-               exit(1);
-       }
-       if (prog_id == curr_prog_id)
-               bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
-       else if (!curr_prog_id)
-               printf("couldn't find a prog id on iface IN\n");
-       else
-               printf("program on iface IN changed, not removing\n");
-
-       if (ifindex_out_xdp_dummy_attached) {
-               curr_prog_id = 0;
-               if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
-                                       xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
-                       exit(1);
-               }
-               if (dummy_prog_id == curr_prog_id)
-                       bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
-               else if (!curr_prog_id)
-                       printf("couldn't find a prog id on iface OUT\n");
-               else
-                       printf("program on iface OUT changed, not removing\n");
-       }
-       exit(0);
-}
-
-static void poll_stats(int interval, int ifindex)
-{
-       unsigned int nr_cpus = bpf_num_possible_cpus();
-       __u64 values[nr_cpus], prev[nr_cpus];
-
-       memset(prev, 0, sizeof(prev));
-
-       while (1) {
-               __u64 sum = 0;
-               __u32 key = 0;
-               int i;
-
-               sleep(interval);
-               assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
-               for (i = 0; i < nr_cpus; i++)
-                       sum += (values[i] - prev[i]);
-               if (sum)
-                       printf("ifindex %i: %10llu pkt/s\n",
-                              ifindex, sum / interval);
-               memcpy(prev, values, sizeof(values));
-       }
-}
-
-static int get_mac_addr(unsigned int ifindex_out, void *mac_addr)
-{
-       char ifname[IF_NAMESIZE];
-       struct ifreq ifr;
-       int fd, ret = -1;
-
-       fd = socket(AF_INET, SOCK_DGRAM, 0);
-       if (fd < 0)
-               return ret;
-
-       if (!if_indextoname(ifindex_out, ifname))
-               goto err_out;
-
-       strcpy(ifr.ifr_name, ifname);
-
-       if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
-               goto err_out;
-
-       memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
-       ret = 0;
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+                 SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
 
-err_out:
-       close(fd);
-       return ret;
-}
+DEFINE_SAMPLE_INIT(xdp_redirect_map);
 
-static void usage(const char *prog)
-{
-       fprintf(stderr,
-               "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
-               "OPTS:\n"
-               "    -S    use skb-mode\n"
-               "    -N    enforce native mode\n"
-               "    -F    force loading prog\n"
-               "    -X    load xdp program on egress\n",
-               prog);
-}
+static const struct option long_options[] = {
+       { "help", no_argument, NULL, 'h' },
+       { "skb-mode", no_argument, NULL, 'S' },
+       { "force", no_argument, NULL, 'F' },
+       { "load-egress", no_argument, NULL, 'X' },
+       { "stats", no_argument, NULL, 's' },
+       { "interval", required_argument, NULL, 'i' },
+       { "verbose", no_argument, NULL, 'v' },
+       {}
+};
 
 int main(int argc, char **argv)
 {
-       struct bpf_prog_load_attr prog_load_attr = {
-               .prog_type      = BPF_PROG_TYPE_UNSPEC,
-       };
-       struct bpf_program *prog, *dummy_prog, *devmap_prog;
-       int prog_fd, dummy_prog_fd, devmap_prog_fd = 0;
-       int tx_port_map_fd, tx_mac_map_fd;
-       struct bpf_devmap_val devmap_val;
-       struct bpf_prog_info info = {};
-       __u32 info_len = sizeof(info);
-       const char *optstr = "FSNX";
-       struct bpf_object *obj;
-       int ret, opt, key = 0;
-       char filename[256];
-
-       while ((opt = getopt(argc, argv, optstr)) != -1) {
+       struct bpf_devmap_val devmap_val = {};
+       bool xdp_devmap_attached = false;
+       struct xdp_redirect_map *skel;
+       char str[2 * IF_NAMESIZE + 1];
+       char ifname_out[IF_NAMESIZE];
+       struct bpf_map *tx_port_map;
+       char ifname_in[IF_NAMESIZE];
+       int ifindex_in, ifindex_out;
+       unsigned long interval = 2;
+       int ret = EXIT_FAIL_OPTION;
+       struct bpf_program *prog;
+       bool generic = false;
+       bool force = false;
+       bool tried = false;
+       bool error = true;
+       int opt, key = 0;
+
+       while ((opt = getopt_long(argc, argv, "hSFXi:vs",
+                                 long_options, NULL)) != -1) {
                switch (opt) {
                case 'S':
-                       xdp_flags |= XDP_FLAGS_SKB_MODE;
-                       break;
-               case 'N':
-                       /* default, set below */
+                       generic = true;
+                       /* devmap_xmit tracepoint not available */
+                       mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+                                 SAMPLE_DEVMAP_XMIT_CNT_MULTI);
                        break;
                case 'F':
-                       xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+                       force = true;
                        break;
                case 'X':
                        xdp_devmap_attached = true;
                        break;
+               case 'i':
+                       interval = strtoul(optarg, NULL, 0);
+                       break;
+               case 'v':
+                       sample_switch_mode();
+                       break;
+               case 's':
+                       mask |= SAMPLE_REDIRECT_MAP_CNT;
+                       break;
+               case 'h':
+                       error = false;
                default:
-                       usage(basename(argv[0]));
-                       return 1;
+                       sample_usage(argv, long_options, __doc__, mask, error);
+                       return ret;
                }
        }
 
-       if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
-               xdp_flags |= XDP_FLAGS_DRV_MODE;
-       } else if (xdp_devmap_attached) {
-               printf("Load xdp program on egress with SKB mode not supported yet\n");
-               return 1;
-       }
-
-       if (optind == argc) {
-               printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
-               return 1;
+       if (argc <= optind + 1) {
+               sample_usage(argv, long_options, __doc__, mask, true);
+               goto end;
        }
 
        ifindex_in = if_nametoindex(argv[optind]);
@@ -182,107 +104,116 @@ int main(int argc, char **argv)
        if (!ifindex_out)
                ifindex_out = strtoul(argv[optind + 1], NULL, 0);
 
-       printf("input: %d output: %d\n", ifindex_in, ifindex_out);
-
-       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-       prog_load_attr.file = filename;
-
-       if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
-               return 1;
-
-       if (xdp_flags & XDP_FLAGS_SKB_MODE) {
-               prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_general");
-               tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port_general");
-       } else {
-               prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_native");
-               tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port_native");
-       }
-       dummy_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_dummy_prog");
-       if (!prog || dummy_prog < 0 || tx_port_map_fd < 0) {
-               printf("finding prog/dummy_prog/tx_port_map in obj file failed\n");
-               goto out;
-       }
-       prog_fd = bpf_program__fd(prog);
-       dummy_prog_fd = bpf_program__fd(dummy_prog);
-       if (prog_fd < 0 || dummy_prog_fd < 0 || tx_port_map_fd < 0) {
-               printf("bpf_prog_load_xattr: %s\n", strerror(errno));
-               return 1;
-       }
-
-       tx_mac_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_mac");
-       rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
-       if (tx_mac_map_fd < 0 || rxcnt_map_fd < 0) {
-               printf("bpf_object__find_map_fd_by_name failed\n");
-               return 1;
+       if (!ifindex_in || !ifindex_out) {
+               fprintf(stderr, "Bad interface index or name\n");
+               sample_usage(argv, long_options, __doc__, mask, true);
+               goto end;
        }
 
-       if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
-               printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
-               return 1;
+       skel = xdp_redirect_map__open();
+       if (!skel) {
+               fprintf(stderr, "Failed to xdp_redirect_map__open: %s\n",
+                       strerror(errno));
+               ret = EXIT_FAIL_BPF;
+               goto end;
        }
 
-       ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-       if (ret) {
-               printf("can't get prog info - %s\n", strerror(errno));
-               return ret;
+       ret = sample_init_pre_load(skel);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
        }
-       prog_id = info.id;
-
-       /* Loading dummy XDP prog on out-device */
-       if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
-                           (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
-               printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
-               ifindex_out_xdp_dummy_attached = false;
-       }
-
-       memset(&info, 0, sizeof(info));
-       ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len);
-       if (ret) {
-               printf("can't get prog info - %s\n", strerror(errno));
-               return ret;
-       }
-       dummy_prog_id = info.id;
 
        /* Load 2nd xdp prog on egress. */
        if (xdp_devmap_attached) {
-               unsigned char mac_addr[6];
-
-               devmap_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_egress");
-               if (!devmap_prog) {
-                       printf("finding devmap_prog in obj file failed\n");
-                       goto out;
-               }
-               devmap_prog_fd = bpf_program__fd(devmap_prog);
-               if (devmap_prog_fd < 0) {
-                       printf("finding devmap_prog fd failed\n");
-                       goto out;
-               }
-
-               if (get_mac_addr(ifindex_out, mac_addr) < 0) {
-                       printf("get interface %d mac failed\n", ifindex_out);
-                       goto out;
+               ret = get_mac_addr(ifindex_out, skel->rodata->tx_mac_addr);
+               if (ret < 0) {
+                       fprintf(stderr, "Failed to get interface %d mac address: %s\n",
+                               ifindex_out, strerror(-ret));
+                       ret = EXIT_FAIL;
+                       goto end_destroy;
                }
+       }
 
-               ret = bpf_map_update_elem(tx_mac_map_fd, &key, mac_addr, 0);
-               if (ret) {
-                       perror("bpf_update_elem tx_mac_map_fd");
-                       goto out;
+       skel->rodata->from_match[0] = ifindex_in;
+       skel->rodata->to_match[0] = ifindex_out;
+
+       ret = xdp_redirect_map__load(skel);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to xdp_redirect_map__load: %s\n",
+                       strerror(errno));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
+       }
+
+       ret = sample_init(skel, mask);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+               ret = EXIT_FAIL;
+               goto end_destroy;
+       }
+
+       prog = skel->progs.xdp_redirect_map_native;
+       tx_port_map = skel->maps.tx_port_native;
+restart:
+       if (sample_install_xdp(prog, ifindex_in, generic, force) < 0) {
+               /* First try with struct bpf_devmap_val as value for generic
+                * mode, then fallback to sizeof(int) for older kernels.
+                */
+               fprintf(stderr,
+                       "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n");
+               if (generic && !tried) {
+                       prog = skel->progs.xdp_redirect_map_general;
+                       tx_port_map = skel->maps.tx_port_general;
+                       tried = true;
+                       goto restart;
                }
+               ret = EXIT_FAIL_XDP;
+               goto end_destroy;
        }
 
-       signal(SIGINT, int_exit);
-       signal(SIGTERM, int_exit);
+       /* Loading dummy XDP prog on out-device */
+       sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, generic, force);
 
        devmap_val.ifindex = ifindex_out;
-       devmap_val.bpf_prog.fd = devmap_prog_fd;
-       ret = bpf_map_update_elem(tx_port_map_fd, &key, &devmap_val, 0);
-       if (ret) {
-               perror("bpf_update_elem");
-               goto out;
-       }
-
-       poll_stats(2, ifindex_out);
-
-out:
-       return 0;
+       if (xdp_devmap_attached)
+               devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_map_egress);
+       ret = bpf_map_update_elem(bpf_map__fd(tx_port_map), &key, &devmap_val, 0);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to update devmap value: %s\n",
+                       strerror(errno));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
+       }
+
+       ret = EXIT_FAIL;
+       if (!if_indextoname(ifindex_in, ifname_in)) {
+               fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in,
+                       strerror(errno));
+               goto end_destroy;
+       }
+
+       if (!if_indextoname(ifindex_out, ifname_out)) {
+               fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out,
+                       strerror(errno));
+               goto end_destroy;
+       }
+
+       safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str));
+       printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
+              ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out));
+       snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out);
+
+       ret = sample_run(interval, NULL, NULL);
+       if (ret < 0) {
+               fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+               ret = EXIT_FAIL;
+               goto end_destroy;
+       }
+       ret = EXIT_OK;
+end_destroy:
+       xdp_redirect_map__destroy(skel);
+end:
+       sample_exit(ret);
 }
index 93854e1..7af5b07 100644 (file)
@@ -1,6 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
  */
+static const char *__doc__ =
+"XDP redirect tool, using bpf_redirect helper\n"
+"Usage: xdp_redirect <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n";
+
 #include <linux/bpf.h>
 #include <linux/if_link.h>
 #include <assert.h>
 #include <net/if.h>
 #include <unistd.h>
 #include <libgen.h>
+#include <getopt.h>
 #include <sys/resource.h>
-
-#include "bpf_util.h"
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect.skel.h"
 
-static int ifindex_in;
-static int ifindex_out;
-static bool ifindex_out_xdp_dummy_attached = true;
-static __u32 prog_id;
-static __u32 dummy_prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int rxcnt_map_fd;
-
-static void int_exit(int sig)
-{
-       __u32 curr_prog_id = 0;
-
-       if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
-               printf("bpf_get_link_xdp_id failed\n");
-               exit(1);
-       }
-       if (prog_id == curr_prog_id)
-               bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
-       else if (!curr_prog_id)
-               printf("couldn't find a prog id on iface IN\n");
-       else
-               printf("program on iface IN changed, not removing\n");
-
-       if (ifindex_out_xdp_dummy_attached) {
-               curr_prog_id = 0;
-               if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
-                                       xdp_flags)) {
-                       printf("bpf_get_link_xdp_id failed\n");
-                       exit(1);
-               }
-               if (dummy_prog_id == curr_prog_id)
-                       bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
-               else if (!curr_prog_id)
-                       printf("couldn't find a prog id on iface OUT\n");
-               else
-                       printf("program on iface OUT changed, not removing\n");
-       }
-       exit(0);
-}
-
-static void poll_stats(int interval, int ifindex)
-{
-       unsigned int nr_cpus = bpf_num_possible_cpus();
-       __u64 values[nr_cpus], prev[nr_cpus];
-
-       memset(prev, 0, sizeof(prev));
-
-       while (1) {
-               __u64 sum = 0;
-               __u32 key = 0;
-               int i;
-
-               sleep(interval);
-               assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
-               for (i = 0; i < nr_cpus; i++)
-                       sum += (values[i] - prev[i]);
-               if (sum)
-                       printf("ifindex %i: %10llu pkt/s\n",
-                              ifindex, sum / interval);
-               memcpy(prev, values, sizeof(values));
-       }
-}
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_CNT |
+                 SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
 
-static void usage(const char *prog)
-{
-       fprintf(stderr,
-               "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
-               "OPTS:\n"
-               "    -S    use skb-mode\n"
-               "    -N    enforce native mode\n"
-               "    -F    force loading prog\n",
-               prog);
-}
+DEFINE_SAMPLE_INIT(xdp_redirect);
 
+static const struct option long_options[] = {
+       {"help",        no_argument,            NULL, 'h' },
+       {"skb-mode",    no_argument,            NULL, 'S' },
+       {"force",       no_argument,            NULL, 'F' },
+       {"stats",       no_argument,            NULL, 's' },
+       {"interval",    required_argument,      NULL, 'i' },
+       {"verbose",     no_argument,            NULL, 'v' },
+       {}
+};
 
 int main(int argc, char **argv)
 {
-       struct bpf_prog_load_attr prog_load_attr = {
-               .prog_type      = BPF_PROG_TYPE_XDP,
-       };
-       struct bpf_program *prog, *dummy_prog;
-       int prog_fd, tx_port_map_fd, opt;
-       struct bpf_prog_info info = {};
-       __u32 info_len = sizeof(info);
-       const char *optstr = "FSN";
-       struct bpf_object *obj;
-       char filename[256];
-       int dummy_prog_fd;
-       int ret, key = 0;
-
-       while ((opt = getopt(argc, argv, optstr)) != -1) {
+       int ifindex_in, ifindex_out, opt;
+       char str[2 * IF_NAMESIZE + 1];
+       char ifname_out[IF_NAMESIZE];
+       char ifname_in[IF_NAMESIZE];
+       int ret = EXIT_FAIL_OPTION;
+       unsigned long interval = 2;
+       struct xdp_redirect *skel;
+       bool generic = false;
+       bool force = false;
+       bool error = true;
+
+       while ((opt = getopt_long(argc, argv, "hSFi:vs",
+                                 long_options, NULL)) != -1) {
                switch (opt) {
                case 'S':
-                       xdp_flags |= XDP_FLAGS_SKB_MODE;
-                       break;
-               case 'N':
-                       /* default, set below */
+                       generic = true;
+                       mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+                                 SAMPLE_DEVMAP_XMIT_CNT_MULTI);
                        break;
                case 'F':
-                       xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+                       force = true;
+                       break;
+               case 'i':
+                       interval = strtoul(optarg, NULL, 0);
+                       break;
+               case 'v':
+                       sample_switch_mode();
+                       break;
+               case 's':
+                       mask |= SAMPLE_REDIRECT_CNT;
                        break;
+               case 'h':
+                       error = false;
                default:
-                       usage(basename(argv[0]));
-                       return 1;
+                       sample_usage(argv, long_options, __doc__, mask, error);
+                       return ret;
                }
        }
 
-       if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
-               xdp_flags |= XDP_FLAGS_DRV_MODE;
-
-       if (optind + 2 != argc) {
-               printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
-               return 1;
+       if (argc <= optind + 1) {
+               sample_usage(argv, long_options, __doc__, mask, true);
+               return ret;
        }
 
        ifindex_in = if_nametoindex(argv[optind]);
@@ -143,75 +94,80 @@ int main(int argc, char **argv)
        if (!ifindex_out)
                ifindex_out = strtoul(argv[optind + 1], NULL, 0);
 
-       printf("input: %d output: %d\n", ifindex_in, ifindex_out);
-
-       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-       prog_load_attr.file = filename;
-
-       if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
-               return 1;
-
-       prog = bpf_program__next(NULL, obj);
-       dummy_prog = bpf_program__next(prog, obj);
-       if (!prog || !dummy_prog) {
-               printf("finding a prog in obj file failed\n");
-               return 1;
+       if (!ifindex_in || !ifindex_out) {
+               fprintf(stderr, "Bad interface index or name\n");
+               sample_usage(argv, long_options, __doc__, mask, true);
+               goto end;
        }
-       /* bpf_prog_load_xattr gives us the pointer to first prog's fd,
-        * so we're missing only the fd for dummy prog
-        */
-       dummy_prog_fd = bpf_program__fd(dummy_prog);
-       if (prog_fd < 0 || dummy_prog_fd < 0) {
-               printf("bpf_prog_load_xattr: %s\n", strerror(errno));
-               return 1;
+
+       skel = xdp_redirect__open();
+       if (!skel) {
+               fprintf(stderr, "Failed to xdp_redirect__open: %s\n", strerror(errno));
+               ret = EXIT_FAIL_BPF;
+               goto end;
        }
 
-       tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
-       rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
-       if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) {
-               printf("bpf_object__find_map_fd_by_name failed\n");
-               return 1;
+       ret = sample_init_pre_load(skel);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
        }
 
-       if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
-               printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
-               return 1;
+       skel->rodata->from_match[0] = ifindex_in;
+       skel->rodata->to_match[0] = ifindex_out;
+       skel->rodata->ifindex_out = ifindex_out;
+
+       ret = xdp_redirect__load(skel);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to xdp_redirect__load: %s\n", strerror(errno));
+               ret = EXIT_FAIL_BPF;
+               goto end_destroy;
        }
 
-       ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-       if (ret) {
-               printf("can't get prog info - %s\n", strerror(errno));
-               return ret;
+       ret = sample_init(skel, mask);
+       if (ret < 0) {
+               fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+               ret = EXIT_FAIL;
+               goto end_destroy;
        }
-       prog_id = info.id;
+
+       ret = EXIT_FAIL_XDP;
+       if (sample_install_xdp(skel->progs.xdp_redirect_prog, ifindex_in,
+                              generic, force) < 0)
+               goto end_destroy;
 
        /* Loading dummy XDP prog on out-device */
-       if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
-                           (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
-               printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
-               ifindex_out_xdp_dummy_attached = false;
+       sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out,
+                          generic, force);
+
+       ret = EXIT_FAIL;
+       if (!if_indextoname(ifindex_in, ifname_in)) {
+               fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in,
+                       strerror(errno));
+               goto end_destroy;
        }
 
-       memset(&info, 0, sizeof(info));
-       ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len);
-       if (ret) {
-               printf("can't get prog info - %s\n", strerror(errno));
-               return ret;
+       if (!if_indextoname(ifindex_out, ifname_out)) {
+               fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out,
+                       strerror(errno));
+               goto end_destroy;
        }
-       dummy_prog_id = info.id;
 
-       signal(SIGINT, int_exit);
-       signal(SIGTERM, int_exit);
+       safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str));
+       printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
+              ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out));
+       snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out);
 
-       /* bpf redirect port */
-       ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0);
-       if (ret) {
-               perror("bpf_update_elem");
-               goto out;
+       ret = sample_run(interval, NULL, NULL);
+       if (ret < 0) {
+               fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+               ret = EXIT_FAIL;
+               goto end_destroy;
        }
-
-       poll_stats(2, ifindex_out);
-
-out:
-       return ret;
+       ret = EXIT_OK;
+end_destroy:
+       xdp_redirect__destroy(skel);
+end:
+       sample_exit(ret);
 }
diff --git a/samples/bpf/xdp_sample.bpf.c b/samples/bpf/xdp_sample.bpf.c
new file mode 100644 (file)
index 0000000..0eb7e1d
--- /dev/null
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
+#include "xdp_sample.bpf.h"
+
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+array_map rx_cnt SEC(".maps");
+array_map redir_err_cnt SEC(".maps");
+array_map cpumap_enqueue_cnt SEC(".maps");
+array_map cpumap_kthread_cnt SEC(".maps");
+array_map exception_cnt SEC(".maps");
+array_map devmap_xmit_cnt SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+       __uint(max_entries, 32 * 32);
+       __type(key, u64);
+       __type(value, struct datarec);
+} devmap_xmit_cnt_multi SEC(".maps");
+
+const volatile int nr_cpus = 0;
+
+/* These can be set before loading so that redundant comparisons can be DCE'd by
+ * the verifier, and only actual matches are tried after loading tp_btf program.
+ * This allows sample to filter tracepoint stats based on net_device.
+ */
+const volatile int from_match[32] = {};
+const volatile int to_match[32] = {};
+
+int cpumap_map_id = 0;
+
+/* Find if b is part of set a, but if a is empty set then evaluate to true */
+#define IN_SET(a, b)                                                 \
+       ({                                                           \
+               bool __res = !(a)[0];                                \
+               for (int i = 0; i < ARRAY_SIZE(a) && (a)[i]; i++) { \
+                       __res = (a)[i] == (b);                       \
+                       if (__res)                                   \
+                               break;                               \
+               }                                                    \
+               __res;                                               \
+       })
+
+static __always_inline __u32 xdp_get_err_key(int err)
+{
+       switch (err) {
+       case 0:
+               return 0;
+       case -EINVAL:
+               return 2;
+       case -ENETDOWN:
+               return 3;
+       case -EMSGSIZE:
+               return 4;
+       case -EOPNOTSUPP:
+               return 5;
+       case -ENOSPC:
+               return 6;
+       default:
+               return 1;
+       }
+}
+
+static __always_inline int xdp_redirect_collect_stat(int from, int err)
+{
+       u32 cpu = bpf_get_smp_processor_id();
+       u32 key = XDP_REDIRECT_ERROR;
+       struct datarec *rec;
+       u32 idx;
+
+       if (!IN_SET(from_match, from))
+               return 0;
+
+       key = xdp_get_err_key(err);
+
+       idx = key * nr_cpus + cpu;
+       rec = bpf_map_lookup_elem(&redir_err_cnt, &idx);
+       if (!rec)
+               return 0;
+       if (key)
+               NO_TEAR_INC(rec->dropped);
+       else
+               NO_TEAR_INC(rec->processed);
+       return 0; /* Indicate event was filtered (no further processing)*/
+       /*
+        * Returning 1 here would allow e.g. a perf-record tracepoint
+        * to see and record these events, but it doesn't work well
+        * in-practice as stopping perf-record also unload this
+        * bpf_prog.  Plus, there is additional overhead of doing so.
+        */
+}
+
+SEC("tp_btf/xdp_redirect_err")
+int BPF_PROG(tp_xdp_redirect_err, const struct net_device *dev,
+            const struct bpf_prog *xdp, const void *tgt, int err,
+            const struct bpf_map *map, u32 index)
+{
+       return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map_err")
+int BPF_PROG(tp_xdp_redirect_map_err, const struct net_device *dev,
+            const struct bpf_prog *xdp, const void *tgt, int err,
+            const struct bpf_map *map, u32 index)
+{
+       return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect")
+int BPF_PROG(tp_xdp_redirect, const struct net_device *dev,
+            const struct bpf_prog *xdp, const void *tgt, int err,
+            const struct bpf_map *map, u32 index)
+{
+       return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map")
+int BPF_PROG(tp_xdp_redirect_map, const struct net_device *dev,
+            const struct bpf_prog *xdp, const void *tgt, int err,
+            const struct bpf_map *map, u32 index)
+{
+       return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_cpumap_enqueue")
+int BPF_PROG(tp_xdp_cpumap_enqueue, int map_id, unsigned int processed,
+            unsigned int drops, int to_cpu)
+{
+       u32 cpu = bpf_get_smp_processor_id();
+       struct datarec *rec;
+       u32 idx;
+
+       if (cpumap_map_id && cpumap_map_id != map_id)
+               return 0;
+
+       idx = to_cpu * nr_cpus + cpu;
+       rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &idx);
+       if (!rec)
+               return 0;
+       NO_TEAR_ADD(rec->processed, processed);
+       NO_TEAR_ADD(rec->dropped, drops);
+       /* Record bulk events, then userspace can calc average bulk size */
+       if (processed > 0)
+               NO_TEAR_INC(rec->issue);
+       /* Inception: It's possible to detect overload situations, via
+        * this tracepoint.  This can be used for creating a feedback
+        * loop to XDP, which can take appropriate actions to mitigate
+        * this overload situation.
+        */
+       return 0;
+}
+
+SEC("tp_btf/xdp_cpumap_kthread")
+int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
+            unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
+{
+       struct datarec *rec;
+       u32 cpu;
+
+       if (cpumap_map_id && cpumap_map_id != map_id)
+               return 0;
+
+       cpu = bpf_get_smp_processor_id();
+       rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &cpu);
+       if (!rec)
+               return 0;
+       NO_TEAR_ADD(rec->processed, processed);
+       NO_TEAR_ADD(rec->dropped, drops);
+       NO_TEAR_ADD(rec->xdp_pass, xdp_stats->pass);
+       NO_TEAR_ADD(rec->xdp_drop, xdp_stats->drop);
+       NO_TEAR_ADD(rec->xdp_redirect, xdp_stats->redirect);
+       /* Count times kthread yielded CPU via schedule call */
+       if (sched)
+               NO_TEAR_INC(rec->issue);
+       return 0;
+}
+
+SEC("tp_btf/xdp_exception")
+int BPF_PROG(tp_xdp_exception, const struct net_device *dev,
+            const struct bpf_prog *xdp, u32 act)
+{
+       u32 cpu = bpf_get_smp_processor_id();
+       struct datarec *rec;
+       u32 key = act, idx;
+
+       if (!IN_SET(from_match, dev->ifindex))
+               return 0;
+       if (!IN_SET(to_match, dev->ifindex))
+               return 0;
+
+       if (key > XDP_REDIRECT)
+               key = XDP_REDIRECT + 1;
+
+       idx = key * nr_cpus + cpu;
+       rec = bpf_map_lookup_elem(&exception_cnt, &idx);
+       if (!rec)
+               return 0;
+       NO_TEAR_INC(rec->dropped);
+
+       return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit, const struct net_device *from_dev,
+            const struct net_device *to_dev, int sent, int drops, int err)
+{
+       struct datarec *rec;
+       int idx_in, idx_out;
+       u32 cpu;
+
+       idx_in = from_dev->ifindex;
+       idx_out = to_dev->ifindex;
+
+       if (!IN_SET(from_match, idx_in))
+               return 0;
+       if (!IN_SET(to_match, idx_out))
+               return 0;
+
+       cpu = bpf_get_smp_processor_id();
+       rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &cpu);
+       if (!rec)
+               return 0;
+       NO_TEAR_ADD(rec->processed, sent);
+       NO_TEAR_ADD(rec->dropped, drops);
+       /* Record bulk events, then userspace can calc average bulk size */
+       NO_TEAR_INC(rec->info);
+       /* Record error cases, where no frame were sent */
+       /* Catch API error of drv ndo_xdp_xmit sent more than count */
+       if (err || drops < 0)
+               NO_TEAR_INC(rec->issue);
+       return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device *from_dev,
+            const struct net_device *to_dev, int sent, int drops, int err)
+{
+       struct datarec empty = {};
+       struct datarec *rec;
+       int idx_in, idx_out;
+       u64 idx;
+
+       idx_in = from_dev->ifindex;
+       idx_out = to_dev->ifindex;
+       idx = idx_in;
+       idx = idx << 32 | idx_out;
+
+       if (!IN_SET(from_match, idx_in))
+               return 0;
+       if (!IN_SET(to_match, idx_out))
+               return 0;
+
+       bpf_map_update_elem(&devmap_xmit_cnt_multi, &idx, &empty, BPF_NOEXIST);
+       rec = bpf_map_lookup_elem(&devmap_xmit_cnt_multi, &idx);
+       if (!rec)
+               return 0;
+
+       NO_TEAR_ADD(rec->processed, sent);
+       NO_TEAR_ADD(rec->dropped, drops);
+       NO_TEAR_INC(rec->info);
+       if (err || drops < 0)
+               NO_TEAR_INC(rec->issue);
+       return 0;
+}
diff --git a/samples/bpf/xdp_sample.bpf.h b/samples/bpf/xdp_sample.bpf.h
new file mode 100644 (file)
index 0000000..25b1dbe
--- /dev/null
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _XDP_SAMPLE_BPF_H
+#define _XDP_SAMPLE_BPF_H
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+#include "xdp_sample_shared.h"
+
+#define ETH_ALEN 6
+#define ETH_P_802_3_MIN 0x0600
+#define ETH_P_8021Q 0x8100
+#define ETH_P_8021AD 0x88A8
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+#define ETH_P_ARP 0x0806
+#define IPPROTO_ICMPV6 58
+
+#define EINVAL 22
+#define ENETDOWN 100
+#define EMSGSIZE 90
+#define EOPNOTSUPP 95
+#define ENOSPC 28
+
+typedef struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(map_flags, BPF_F_MMAPABLE);
+       __type(key, unsigned int);
+       __type(value, struct datarec);
+} array_map;
+
+extern array_map rx_cnt;
+extern const volatile int nr_cpus;
+
+enum {
+       XDP_REDIRECT_SUCCESS = 0,
+       XDP_REDIRECT_ERROR = 1
+};
+
+static __always_inline void swap_src_dst_mac(void *data)
+{
+       unsigned short *p = data;
+       unsigned short dst[3];
+
+       dst[0] = p[0];
+       dst[1] = p[1];
+       dst[2] = p[2];
+       p[0] = p[3];
+       p[1] = p[4];
+       p[2] = p[5];
+       p[3] = dst[0];
+       p[4] = dst[1];
+       p[5] = dst[2];
+}
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
+       __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_ntohs(x)           __builtin_bswap16(x)
+#define bpf_htons(x)           __builtin_bswap16(x)
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
+       __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define bpf_ntohs(x)           (x)
+#define bpf_htons(x)           (x)
+#else
+# error "Endianness detection needs to be set up for your compiler?!"
+#endif
+
+/*
+ * Note: including linux/compiler.h or linux/kernel.h for the macros below
+ * conflicts with vmlinux.h include in BPF files, so we define them here.
+ *
+ * Following functions are taken from kernel sources and
+ * break aliasing rules in their original form.
+ *
+ * While kernel is compiled with -fno-strict-aliasing,
+ * perf uses -Wstrict-aliasing=3 which makes build fail
+ * under gcc 4.4.
+ *
+ * Using extra __may_alias__ type to allow aliasing
+ * in this case.
+ */
+typedef __u8  __attribute__((__may_alias__))  __u8_alias_t;
+typedef __u16 __attribute__((__may_alias__)) __u16_alias_t;
+typedef __u32 __attribute__((__may_alias__)) __u32_alias_t;
+typedef __u64 __attribute__((__may_alias__)) __u64_alias_t;
+
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+{
+       switch (size) {
+       case 1: *(__u8_alias_t  *) res = *(volatile __u8_alias_t  *) p; break;
+       case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break;
+       case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break;
+       case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break;
+       default:
+               asm volatile ("" : : : "memory");
+               __builtin_memcpy((void *)res, (const void *)p, size);
+               asm volatile ("" : : : "memory");
+       }
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+       switch (size) {
+       case 1: *(volatile  __u8_alias_t *) p = *(__u8_alias_t  *) res; break;
+       case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break;
+       case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break;
+       case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break;
+       default:
+               asm volatile ("" : : : "memory");
+               __builtin_memcpy((void *)p, (const void *)res, size);
+               asm volatile ("" : : : "memory");
+       }
+}
+
+#define READ_ONCE(x)                                   \
+({                                                     \
+       union { typeof(x) __val; char __c[1]; } __u =   \
+               { .__c = { 0 } };                       \
+       __read_once_size(&(x), __u.__c, sizeof(x));     \
+       __u.__val;                                      \
+})
+
+#define WRITE_ONCE(x, val)                             \
+({                                                     \
+       union { typeof(x) __val; char __c[1]; } __u =   \
+               { .__val = (val) };                     \
+       __write_once_size(&(x), __u.__c, sizeof(x));    \
+       __u.__val;                                      \
+})
+
+/* Add a value using relaxed read and relaxed write. Less expensive than
+ * fetch_add when there is no write concurrency.
+ */
+#define NO_TEAR_ADD(x, val) WRITE_ONCE((x), READ_ONCE(x) + (val))
+#define NO_TEAR_INC(x) NO_TEAR_ADD((x), 1)
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#endif
diff --git a/samples/bpf/xdp_sample_shared.h b/samples/bpf/xdp_sample_shared.h
new file mode 100644 (file)
index 0000000..8a7669a
--- /dev/null
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef _XDP_SAMPLE_SHARED_H
+#define _XDP_SAMPLE_SHARED_H
+
+struct datarec {
+       size_t processed;
+       size_t dropped;
+       size_t issue;
+       union {
+               size_t xdp_pass;
+               size_t info;
+       };
+       size_t xdp_drop;
+       size_t xdp_redirect;
+} __attribute__((aligned(64)));
+
+#endif
diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c
new file mode 100644 (file)
index 0000000..b32d821
--- /dev/null
@@ -0,0 +1,1673 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/ethtool.h>
+#include <linux/hashtable.h>
+#include <linux/if_link.h>
+#include <linux/jhash.h>
+#include <linux/limits.h>
+#include <linux/list.h>
+#include <linux/sockios.h>
+#include <locale.h>
+#include <math.h>
+#include <net/if.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/signalfd.h>
+#include <sys/sysinfo.h>
+#include <sys/timerfd.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+
+#define __sample_print(fmt, cond, ...)                                         \
+       ({                                                                     \
+               if (cond)                                                      \
+                       printf(fmt, ##__VA_ARGS__);                            \
+       })
+
+#define print_always(fmt, ...) __sample_print(fmt, 1, ##__VA_ARGS__)
+#define print_default(fmt, ...)                                                \
+       __sample_print(fmt, sample_log_level & LL_DEFAULT, ##__VA_ARGS__)
+#define __print_err(err, fmt, ...)                                             \
+       ({                                                                     \
+               __sample_print(fmt, err > 0 || sample_log_level & LL_DEFAULT,  \
+                              ##__VA_ARGS__);                                 \
+               sample_err_exp = sample_err_exp ? true : err > 0;              \
+       })
+#define print_err(err, fmt, ...) __print_err(err, fmt, ##__VA_ARGS__)
+
+#define __COLUMN(x) "%'10" x " %-13s"
+#define FMT_COLUMNf __COLUMN(".0f")
+#define FMT_COLUMNd __COLUMN("d")
+#define FMT_COLUMNl __COLUMN("llu")
+#define RX(rx) rx, "rx/s"
+#define PPS(pps) pps, "pkt/s"
+#define DROP(drop) drop, "drop/s"
+#define ERR(err) err, "error/s"
+#define HITS(hits) hits, "hit/s"
+#define XMIT(xmit) xmit, "xmit/s"
+#define PASS(pass) pass, "pass/s"
+#define REDIR(redir) redir, "redir/s"
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+
+#define XDP_UNKNOWN (XDP_REDIRECT + 1)
+#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
+#define XDP_REDIRECT_ERR_MAX 7
+
+enum map_type {
+       MAP_RX,
+       MAP_REDIRECT_ERR,
+       MAP_CPUMAP_ENQUEUE,
+       MAP_CPUMAP_KTHREAD,
+       MAP_EXCEPTION,
+       MAP_DEVMAP_XMIT,
+       MAP_DEVMAP_XMIT_MULTI,
+       NUM_MAP,
+};
+
+enum log_level {
+       LL_DEFAULT = 1U << 0,
+       LL_SIMPLE = 1U << 1,
+       LL_DEBUG = 1U << 2,
+};
+
+struct record {
+       __u64 timestamp;
+       struct datarec total;
+       struct datarec *cpu;
+};
+
+struct map_entry {
+       struct hlist_node node;
+       __u64 pair;
+       struct record val;
+};
+
+struct stats_record {
+       struct record rx_cnt;
+       struct record redir_err[XDP_REDIRECT_ERR_MAX];
+       struct record kthread;
+       struct record exception[XDP_ACTION_MAX];
+       struct record devmap_xmit;
+       DECLARE_HASHTABLE(xmit_map, 5);
+       struct record enq[];
+};
+
+struct sample_output {
+       struct {
+               __u64 rx;
+               __u64 redir;
+               __u64 drop;
+               __u64 drop_xmit;
+               __u64 err;
+               __u64 xmit;
+       } totals;
+       struct {
+               __u64 pps;
+               __u64 drop;
+               __u64 err;
+       } rx_cnt;
+       struct {
+               __u64 suc;
+               __u64 err;
+       } redir_cnt;
+       struct {
+               __u64 hits;
+       } except_cnt;
+       struct {
+               __u64 pps;
+               __u64 drop;
+               __u64 err;
+               double bavg;
+       } xmit_cnt;
+};
+
+struct xdp_desc {
+       int ifindex;
+       __u32 prog_id;
+       int flags;
+} sample_xdp_progs[32];
+
+struct datarec *sample_mmap[NUM_MAP];
+struct bpf_map *sample_map[NUM_MAP];
+size_t sample_map_count[NUM_MAP];
+enum log_level sample_log_level;
+struct sample_output sample_out;
+unsigned long sample_interval;
+bool sample_err_exp;
+int sample_xdp_cnt;
+int sample_n_cpus;
+int sample_sig_fd;
+int sample_mask;
+
+static const char *xdp_redirect_err_names[XDP_REDIRECT_ERR_MAX] = {
+       /* Key=1 keeps unknown errors */
+       "Success",
+       "Unknown",
+       "EINVAL",
+       "ENETDOWN",
+       "EMSGSIZE",
+       "EOPNOTSUPP",
+       "ENOSPC",
+};
+
+/* Keyed from Unknown */
+static const char *xdp_redirect_err_help[XDP_REDIRECT_ERR_MAX - 1] = {
+       "Unknown error",
+       "Invalid redirection",
+       "Device being redirected to is down",
+       "Packet length too large for device",
+       "Operation not supported",
+       "No space in ptr_ring of cpumap kthread",
+};
+
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+       [XDP_ABORTED]  = "XDP_ABORTED",
+       [XDP_DROP]     = "XDP_DROP",
+       [XDP_PASS]     = "XDP_PASS",
+       [XDP_TX]       = "XDP_TX",
+       [XDP_REDIRECT] = "XDP_REDIRECT",
+       [XDP_UNKNOWN]  = "XDP_UNKNOWN",
+};
+
+static __u64 gettime(void)
+{
+       struct timespec t;
+       int res;
+
+       res = clock_gettime(CLOCK_MONOTONIC, &t);
+       if (res < 0) {
+               fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
+               return UINT64_MAX;
+       }
+       return (__u64)t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+static const char *action2str(int action)
+{
+       if (action < XDP_ACTION_MAX)
+               return xdp_action_names[action];
+       return NULL;
+}
+
+static void sample_print_help(int mask)
+{
+       printf("Output format description\n\n"
+              "By default, redirect success statistics are disabled, use -s to enable.\n"
+              "The terse output mode is default, verbose mode can be activated using -v\n"
+              "Use SIGQUIT (Ctrl + \\) to switch the mode dynamically at runtime\n\n"
+              "Terse mode displays at most the following fields:\n"
+              "  rx/s        Number of packets received per second\n"
+              "  redir/s     Number of packets successfully redirected per second\n"
+              "  err,drop/s  Aggregated count of errors per second (including dropped packets)\n"
+              "  xmit/s      Number of packets transmitted on the output device per second\n\n"
+              "Output description for verbose mode:\n"
+              "  FIELD                 DESCRIPTION\n");
+
+       if (mask & SAMPLE_RX_CNT) {
+               printf("  receive\t\tDisplays the number of packets received & errors encountered\n"
+                      " \t\t\tWhenever an error or packet drop occurs, details of per CPU error\n"
+                      " \t\t\tand drop statistics will be expanded inline in terse mode.\n"
+                      " \t\t\t\tpkt/s     - Packets received per second\n"
+                      " \t\t\t\tdrop/s    - Packets dropped per second\n"
+                      " \t\t\t\terror/s   - Errors encountered per second\n\n");
+       }
+       if (mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) {
+               printf("  redirect\t\tDisplays the number of packets successfully redirected\n"
+                      "  \t\t\tErrors encountered are expanded under redirect_err field\n"
+                      "  \t\t\tNote that passing -s to enable it has a per packet overhead\n"
+                      "  \t\t\t\tredir/s   - Packets redirected successfully per second\n\n"
+                      "  redirect_err\t\tDisplays the number of packets that failed redirection\n"
+                      "  \t\t\tThe errno is expanded under this field with per CPU count\n"
+                      "  \t\t\tThe recognized errors are:\n");
+
+               for (int i = 2; i < XDP_REDIRECT_ERR_MAX; i++)
+                       printf("\t\t\t  %s: %s\n", xdp_redirect_err_names[i],
+                              xdp_redirect_err_help[i - 1]);
+
+               printf("  \n\t\t\t\terror/s   - Packets that failed redirection per second\n\n");
+       }
+
+       if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
+               printf("  enqueue to cpu N\tDisplays the number of packets enqueued to bulk queue of CPU N\n"
+                      "  \t\t\tExpands to cpu:FROM->N to display enqueue stats for each CPU enqueuing to CPU N\n"
+                      "  \t\t\tReceived packets can be associated with the CPU redirect program is enqueuing \n"
+                      "  \t\t\tpackets to.\n"
+                      "  \t\t\t\tpkt/s    - Packets enqueued per second from other CPU to CPU N\n"
+                      "  \t\t\t\tdrop/s   - Packets dropped when trying to enqueue to CPU N\n"
+                      "  \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n");
+       }
+
+       if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+               printf("  kthread\t\tDisplays the number of packets processed in CPUMAP kthread for each CPU\n"
+                      "  \t\t\tPackets consumed from ptr_ring in kthread, and its xdp_stats (after calling \n"
+                      "  \t\t\tCPUMAP bpf prog) are expanded below this. xdp_stats are expanded as a total and\n"
+                      "  \t\t\tthen per-CPU to associate it to each CPU's pinned CPUMAP kthread.\n"
+                      "  \t\t\t\tpkt/s    - Packets consumed per second from ptr_ring\n"
+                      "  \t\t\t\tdrop/s   - Packets dropped per second in kthread\n"
+                      "  \t\t\t\tsched    - Number of times kthread called schedule()\n\n"
+                      "  \t\t\txdp_stats (also expands to per-CPU counts)\n"
+                      "  \t\t\t\tpass/s  - XDP_PASS count for CPUMAP program execution\n"
+                      "  \t\t\t\tdrop/s  - XDP_DROP count for CPUMAP program execution\n"
+                      "  \t\t\t\tredir/s - XDP_REDIRECT count for CPUMAP program execution\n\n");
+       }
+
+       if (mask & SAMPLE_EXCEPTION_CNT) {
+               printf("  xdp_exception\t\tDisplays xdp_exception tracepoint events\n"
+                      "  \t\t\tThis can occur due to internal driver errors, unrecognized\n"
+                      "  \t\t\tXDP actions and due to explicit user trigger by use of XDP_ABORTED\n"
+                      "  \t\t\tEach action is expanded below this field with its count\n"
+                      "  \t\t\t\thit/s     - Number of times the tracepoint was hit per second\n\n");
+       }
+
+       if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
+               printf("  devmap_xmit\t\tDisplays devmap_xmit tracepoint events\n"
+                      "  \t\t\tThis tracepoint is invoked for successful transmissions on output\n"
+                      "  \t\t\tdevice but these statistics are not available for generic XDP mode,\n"
+                      "  \t\t\thence they will be omitted from the output when using SKB mode\n"
+                      "  \t\t\t\txmit/s    - Number of packets that were transmitted per second\n"
+                      "  \t\t\t\tdrop/s    - Number of packets that failed transmissions per second\n"
+                      "  \t\t\t\tdrv_err/s - Number of internal driver errors per second\n"
+                      "  \t\t\t\tbulk-avg  - Average number of packets processed for each event\n\n");
+       }
+}
+
+void sample_usage(char *argv[], const struct option *long_options,
+                 const char *doc, int mask, bool error)
+{
+       int i;
+
+       if (!error)
+               sample_print_help(mask);
+
+       printf("\n%s\nOption for %s:\n", doc, argv[0]);
+       for (i = 0; long_options[i].name != 0; i++) {
+               printf(" --%-15s", long_options[i].name);
+               if (long_options[i].flag != NULL)
+                       printf(" flag (internal value: %d)",
+                              *long_options[i].flag);
+               else
+                       printf("\t short-option: -%c", long_options[i].val);
+               printf("\n");
+       }
+       printf("\n");
+}
+
+static struct datarec *alloc_record_per_cpu(void)
+{
+       unsigned int nr_cpus = libbpf_num_possible_cpus();
+       struct datarec *array;
+
+       array = calloc(nr_cpus, sizeof(*array));
+       if (!array) {
+               fprintf(stderr, "Failed to allocate memory (nr_cpus: %u)\n",
+                       nr_cpus);
+               return NULL;
+       }
+       return array;
+}
+
+static int map_entry_init(struct map_entry *e, __u64 pair)
+{
+       e->pair = pair;
+       INIT_HLIST_NODE(&e->node);
+       e->val.timestamp = gettime();
+       e->val.cpu = alloc_record_per_cpu();
+       if (!e->val.cpu)
+               return -ENOMEM;
+       return 0;
+}
+
+static void map_collect_percpu(struct datarec *values, struct record *rec)
+{
+       /* For percpu maps, userspace gets a value per possible CPU */
+       unsigned int nr_cpus = libbpf_num_possible_cpus();
+       __u64 sum_xdp_redirect = 0;
+       __u64 sum_processed = 0;
+       __u64 sum_xdp_pass = 0;
+       __u64 sum_xdp_drop = 0;
+       __u64 sum_dropped = 0;
+       __u64 sum_issue = 0;
+       int i;
+
+       /* Get time as close as possible to reading map contents */
+       rec->timestamp = gettime();
+
+       /* Record and sum values from each CPU */
+       for (i = 0; i < nr_cpus; i++) {
+               rec->cpu[i].processed = READ_ONCE(values[i].processed);
+               rec->cpu[i].dropped = READ_ONCE(values[i].dropped);
+               rec->cpu[i].issue = READ_ONCE(values[i].issue);
+               rec->cpu[i].xdp_pass = READ_ONCE(values[i].xdp_pass);
+               rec->cpu[i].xdp_drop = READ_ONCE(values[i].xdp_drop);
+               rec->cpu[i].xdp_redirect = READ_ONCE(values[i].xdp_redirect);
+
+               sum_processed += rec->cpu[i].processed;
+               sum_dropped += rec->cpu[i].dropped;
+               sum_issue += rec->cpu[i].issue;
+               sum_xdp_pass += rec->cpu[i].xdp_pass;
+               sum_xdp_drop += rec->cpu[i].xdp_drop;
+               sum_xdp_redirect += rec->cpu[i].xdp_redirect;
+       }
+
+       rec->total.processed = sum_processed;
+       rec->total.dropped = sum_dropped;
+       rec->total.issue = sum_issue;
+       rec->total.xdp_pass = sum_xdp_pass;
+       rec->total.xdp_drop = sum_xdp_drop;
+       rec->total.xdp_redirect = sum_xdp_redirect;
+}
+
+static int map_collect_percpu_devmap(int map_fd, struct stats_record *rec)
+{
+       unsigned int nr_cpus = bpf_num_possible_cpus();
+       __u32 batch, count = 32;
+       struct datarec *values;
+       bool init = false;
+       __u64 *keys;
+       int i, ret;
+
+       keys = calloc(count, sizeof(__u64));
+       if (!keys)
+               return -ENOMEM;
+       values = calloc(count * nr_cpus, sizeof(struct datarec));
+       if (!values) {
+               free(keys);
+               return -ENOMEM;
+       }
+
+       for (;;) {
+               bool exit = false;
+
+               ret = bpf_map_lookup_batch(map_fd, init ? &batch : NULL, &batch,
+                                          keys, values, &count, NULL);
+               if (ret < 0 && errno != ENOENT)
+                       break;
+               if (errno == ENOENT)
+                       exit = true;
+
+               init = true;
+               for (i = 0; i < count; i++) {
+                       struct map_entry *e, *x = NULL;
+                       __u64 pair = keys[i];
+                       struct datarec *arr;
+
+                       arr = &values[i * nr_cpus];
+                       hash_for_each_possible(rec->xmit_map, e, node, pair) {
+                               if (e->pair == pair) {
+                                       x = e;
+                                       break;
+                               }
+                       }
+                       if (!x) {
+                               x = calloc(1, sizeof(*x));
+                               if (!x)
+                                       goto cleanup;
+                               if (map_entry_init(x, pair) < 0) {
+                                       free(x);
+                                       goto cleanup;
+                               }
+                               hash_add(rec->xmit_map, &x->node, pair);
+                       }
+                       map_collect_percpu(arr, &x->val);
+               }
+
+               if (exit)
+                       break;
+               count = 32;
+       }
+
+       free(values);
+       free(keys);
+       return 0;
+cleanup:
+       free(values);
+       free(keys);
+       return -ENOMEM;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+       struct stats_record *rec;
+       int i;
+
+       rec = calloc(1, sizeof(*rec) + sample_n_cpus * sizeof(struct record));
+       if (!rec) {
+               fprintf(stderr, "Failed to allocate memory\n");
+               return NULL;
+       }
+
+       if (sample_mask & SAMPLE_RX_CNT) {
+               rec->rx_cnt.cpu = alloc_record_per_cpu();
+               if (!rec->rx_cnt.cpu) {
+                       fprintf(stderr,
+                               "Failed to allocate rx_cnt per-CPU array\n");
+                       goto end_rec;
+               }
+       }
+       if (sample_mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) {
+               for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) {
+                       rec->redir_err[i].cpu = alloc_record_per_cpu();
+                       if (!rec->redir_err[i].cpu) {
+                               fprintf(stderr,
+                                       "Failed to allocate redir_err per-CPU array for "
+                                       "\"%s\" case\n",
+                                       xdp_redirect_err_names[i]);
+                               while (i--)
+                                       free(rec->redir_err[i].cpu);
+                               goto end_rx_cnt;
+                       }
+               }
+       }
+       if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+               rec->kthread.cpu = alloc_record_per_cpu();
+               if (!rec->kthread.cpu) {
+                       fprintf(stderr,
+                               "Failed to allocate kthread per-CPU array\n");
+                       goto end_redir;
+               }
+       }
+       if (sample_mask & SAMPLE_EXCEPTION_CNT) {
+               for (i = 0; i < XDP_ACTION_MAX; i++) {
+                       rec->exception[i].cpu = alloc_record_per_cpu();
+                       if (!rec->exception[i].cpu) {
+                               fprintf(stderr,
+                                       "Failed to allocate exception per-CPU array for "
+                                       "\"%s\" case\n",
+                                       action2str(i));
+                               while (i--)
+                                       free(rec->exception[i].cpu);
+                               goto end_kthread;
+                       }
+               }
+       }
+       if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) {
+               rec->devmap_xmit.cpu = alloc_record_per_cpu();
+               if (!rec->devmap_xmit.cpu) {
+                       fprintf(stderr,
+                               "Failed to allocate devmap_xmit per-CPU array\n");
+                       goto end_exception;
+               }
+       }
+       if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+               hash_init(rec->xmit_map);
+       if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
+               for (i = 0; i < sample_n_cpus; i++) {
+                       rec->enq[i].cpu = alloc_record_per_cpu();
+                       if (!rec->enq[i].cpu) {
+                               fprintf(stderr,
+                                       "Failed to allocate enqueue per-CPU array for "
+                                       "CPU %d\n",
+                                       i);
+                               while (i--)
+                                       free(rec->enq[i].cpu);
+                               goto end_devmap_xmit;
+                       }
+               }
+       }
+
+       return rec;
+
+end_devmap_xmit:
+       free(rec->devmap_xmit.cpu);
+end_exception:
+       for (i = 0; i < XDP_ACTION_MAX; i++)
+               free(rec->exception[i].cpu);
+end_kthread:
+       free(rec->kthread.cpu);
+end_redir:
+       for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+               free(rec->redir_err[i].cpu);
+end_rx_cnt:
+       free(rec->rx_cnt.cpu);
+end_rec:
+       free(rec);
+       return NULL;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+       struct hlist_node *tmp;
+       struct map_entry *e;
+       int i;
+
+       for (i = 0; i < sample_n_cpus; i++)
+               free(r->enq[i].cpu);
+       hash_for_each_safe(r->xmit_map, i, tmp, e, node) {
+               hash_del(&e->node);
+               free(e->val.cpu);
+               free(e);
+       }
+       free(r->devmap_xmit.cpu);
+       for (i = 0; i < XDP_ACTION_MAX; i++)
+               free(r->exception[i].cpu);
+       free(r->kthread.cpu);
+       for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+               free(r->redir_err[i].cpu);
+       free(r->rx_cnt.cpu);
+       free(r);
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+       double period_ = 0;
+       __u64 period = 0;
+
+       period = r->timestamp - p->timestamp;
+       if (period > 0)
+               period_ = ((double)period / NANOSEC_PER_SEC);
+
+       return period_;
+}
+
+static double sample_round(double val)
+{
+       if (val - floor(val) < 0.5)
+               return floor(val);
+       return ceil(val);
+}
+
+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
+{
+       __u64 packets = 0;
+       __u64 pps = 0;
+
+       if (period_ > 0) {
+               packets = r->processed - p->processed;
+               pps = sample_round(packets / period_);
+       }
+       return pps;
+}
+
+static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
+{
+       __u64 packets = 0;
+       __u64 pps = 0;
+
+       if (period_ > 0) {
+               packets = r->dropped - p->dropped;
+               pps = sample_round(packets / period_);
+       }
+       return pps;
+}
+
+static __u64 calc_errs_pps(struct datarec *r, struct datarec *p, double period_)
+{
+       __u64 packets = 0;
+       __u64 pps = 0;
+
+       if (period_ > 0) {
+               packets = r->issue - p->issue;
+               pps = sample_round(packets / period_);
+       }
+       return pps;
+}
+
+static __u64 calc_info_pps(struct datarec *r, struct datarec *p, double period_)
+{
+       __u64 packets = 0;
+       __u64 pps = 0;
+
+       if (period_ > 0) {
+               packets = r->info - p->info;
+               pps = sample_round(packets / period_);
+       }
+       return pps;
+}
+
+static void calc_xdp_pps(struct datarec *r, struct datarec *p, double *xdp_pass,
+                        double *xdp_drop, double *xdp_redirect, double period_)
+{
+       *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
+       if (period_ > 0) {
+               *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
+               *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
+               *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
+       }
+}
+
+static void stats_get_rx_cnt(struct stats_record *stats_rec,
+                            struct stats_record *stats_prev,
+                            unsigned int nr_cpus, struct sample_output *out)
+{
+       struct record *rec, *prev;
+       double t, pps, drop, err;
+       int i;
+
+       rec = &stats_rec->rx_cnt;
+       prev = &stats_prev->rx_cnt;
+       t = calc_period(rec, prev);
+
+       for (i = 0; i < nr_cpus; i++) {
+               struct datarec *r = &rec->cpu[i];
+               struct datarec *p = &prev->cpu[i];
+               char str[64];
+
+               pps = calc_pps(r, p, t);
+               drop = calc_drop_pps(r, p, t);
+               err = calc_errs_pps(r, p, t);
+               if (!pps && !drop && !err)
+                       continue;
+
+               snprintf(str, sizeof(str), "cpu:%d", i);
+               print_default("    %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+                             "\n",
+                             str, PPS(pps), DROP(drop), ERR(err));
+       }
+
+       if (out) {
+               pps = calc_pps(&rec->total, &prev->total, t);
+               drop = calc_drop_pps(&rec->total, &prev->total, t);
+               err = calc_errs_pps(&rec->total, &prev->total, t);
+
+               out->rx_cnt.pps = pps;
+               out->rx_cnt.drop = drop;
+               out->rx_cnt.err = err;
+               out->totals.rx += pps;
+               out->totals.drop += drop;
+               out->totals.err += err;
+       }
+}
+
+static void stats_get_cpumap_enqueue(struct stats_record *stats_rec,
+                                    struct stats_record *stats_prev,
+                                    unsigned int nr_cpus)
+{
+       struct record *rec, *prev;
+       double t, pps, drop, err;
+       int i, to_cpu;
+
+       /* cpumap enqueue stats */
+       for (to_cpu = 0; to_cpu < sample_n_cpus; to_cpu++) {
+               rec = &stats_rec->enq[to_cpu];
+               prev = &stats_prev->enq[to_cpu];
+               t = calc_period(rec, prev);
+
+               pps = calc_pps(&rec->total, &prev->total, t);
+               drop = calc_drop_pps(&rec->total, &prev->total, t);
+               err = calc_errs_pps(&rec->total, &prev->total, t);
+
+               if (pps > 0 || drop > 0) {
+                       char str[64];
+
+                       snprintf(str, sizeof(str), "enqueue to cpu %d", to_cpu);
+
+                       if (err > 0)
+                               err = pps / err; /* calc average bulk size */
+
+                       print_err(drop,
+                                 "  %-20s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+                                         ".2f") "\n",
+                                 str, PPS(pps), DROP(drop), err, "bulk-avg");
+               }
+
+               for (i = 0; i < nr_cpus; i++) {
+                       struct datarec *r = &rec->cpu[i];
+                       struct datarec *p = &prev->cpu[i];
+                       char str[64];
+
+                       pps = calc_pps(r, p, t);
+                       drop = calc_drop_pps(r, p, t);
+                       err = calc_errs_pps(r, p, t);
+                       if (!pps && !drop && !err)
+                               continue;
+
+                       snprintf(str, sizeof(str), "cpu:%d->%d", i, to_cpu);
+                       if (err > 0)
+                               err = pps / err; /* calc average bulk size */
+                       print_default(
+                               "    %-18s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+                                       ".2f") "\n",
+                               str, PPS(pps), DROP(drop), err, "bulk-avg");
+               }
+       }
+}
+
+static void stats_get_cpumap_remote(struct stats_record *stats_rec,
+                                   struct stats_record *stats_prev,
+                                   unsigned int nr_cpus)
+{
+       double xdp_pass, xdp_drop, xdp_redirect;
+       struct record *rec, *prev;
+       double t;
+       int i;
+
+       rec = &stats_rec->kthread;
+       prev = &stats_prev->kthread;
+       t = calc_period(rec, prev);
+
+       calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
+                    &xdp_redirect, t);
+       if (xdp_pass || xdp_drop || xdp_redirect) {
+               print_err(xdp_drop,
+                         "    %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+                         "xdp_stats", PASS(xdp_pass), DROP(xdp_drop),
+                         REDIR(xdp_redirect));
+       }
+
+       for (i = 0; i < nr_cpus; i++) {
+               struct datarec *r = &rec->cpu[i];
+               struct datarec *p = &prev->cpu[i];
+               char str[64];
+
+               calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, &xdp_redirect, t);
+               if (!xdp_pass && !xdp_drop && !xdp_redirect)
+                       continue;
+
+               snprintf(str, sizeof(str), "cpu:%d", i);
+               print_default("      %-16s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+                             "\n",
+                             str, PASS(xdp_pass), DROP(xdp_drop),
+                             REDIR(xdp_redirect));
+       }
+}
+
+static void stats_get_cpumap_kthread(struct stats_record *stats_rec,
+                                    struct stats_record *stats_prev,
+                                    unsigned int nr_cpus)
+{
+       struct record *rec, *prev;
+       double t, pps, drop, err;
+       int i;
+
+       rec = &stats_rec->kthread;
+       prev = &stats_prev->kthread;
+       t = calc_period(rec, prev);
+
+       pps = calc_pps(&rec->total, &prev->total, t);
+       drop = calc_drop_pps(&rec->total, &prev->total, t);
+       err = calc_errs_pps(&rec->total, &prev->total, t);
+
+       print_err(drop, "  %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+                 pps ? "kthread total" : "kthread", PPS(pps), DROP(drop), err,
+                 "sched");
+
+       for (i = 0; i < nr_cpus; i++) {
+               struct datarec *r = &rec->cpu[i];
+               struct datarec *p = &prev->cpu[i];
+               char str[64];
+
+               pps = calc_pps(r, p, t);
+               drop = calc_drop_pps(r, p, t);
+               err = calc_errs_pps(r, p, t);
+               if (!pps && !drop && !err)
+                       continue;
+
+               snprintf(str, sizeof(str), "cpu:%d", i);
+               print_default("    %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+                             "\n",
+                             str, PPS(pps), DROP(drop), err, "sched");
+       }
+}
+
+static void stats_get_redirect_cnt(struct stats_record *stats_rec,
+                                  struct stats_record *stats_prev,
+                                  unsigned int nr_cpus,
+                                  struct sample_output *out)
+{
+       struct record *rec, *prev;
+       double t, pps;
+       int i;
+
+       rec = &stats_rec->redir_err[0];
+       prev = &stats_prev->redir_err[0];
+       t = calc_period(rec, prev);
+       for (i = 0; i < nr_cpus; i++) {
+               struct datarec *r = &rec->cpu[i];
+               struct datarec *p = &prev->cpu[i];
+               char str[64];
+
+               pps = calc_pps(r, p, t);
+               if (!pps)
+                       continue;
+
+               snprintf(str, sizeof(str), "cpu:%d", i);
+               print_default("    %-18s " FMT_COLUMNf "\n", str, REDIR(pps));
+       }
+
+       if (out) {
+               pps = calc_pps(&rec->total, &prev->total, t);
+               out->redir_cnt.suc = pps;
+               out->totals.redir += pps;
+       }
+}
+
+static void stats_get_redirect_err_cnt(struct stats_record *stats_rec,
+                                      struct stats_record *stats_prev,
+                                      unsigned int nr_cpus,
+                                      struct sample_output *out)
+{
+       struct record *rec, *prev;
+       double t, drop, sum = 0;
+       int rec_i, i;
+
+       for (rec_i = 1; rec_i < XDP_REDIRECT_ERR_MAX; rec_i++) {
+               char str[64];
+
+               rec = &stats_rec->redir_err[rec_i];
+               prev = &stats_prev->redir_err[rec_i];
+               t = calc_period(rec, prev);
+
+               drop = calc_drop_pps(&rec->total, &prev->total, t);
+               if (drop > 0 && !out) {
+                       snprintf(str, sizeof(str),
+                                sample_log_level & LL_DEFAULT ? "%s total" :
+                                                                      "%s",
+                                xdp_redirect_err_names[rec_i]);
+                       print_err(drop, "    %-18s " FMT_COLUMNf "\n", str,
+                                 ERR(drop));
+               }
+
+               for (i = 0; i < nr_cpus; i++) {
+                       struct datarec *r = &rec->cpu[i];
+                       struct datarec *p = &prev->cpu[i];
+                       double drop;
+
+                       drop = calc_drop_pps(r, p, t);
+                       if (!drop)
+                               continue;
+
+                       snprintf(str, sizeof(str), "cpu:%d", i);
+                       print_default("       %-16s" FMT_COLUMNf "\n", str,
+                                     ERR(drop));
+               }
+
+               sum += drop;
+       }
+
+       if (out) {
+               out->redir_cnt.err = sum;
+               out->totals.err += sum;
+       }
+}
+
+static void stats_get_exception_cnt(struct stats_record *stats_rec,
+                                   struct stats_record *stats_prev,
+                                   unsigned int nr_cpus,
+                                   struct sample_output *out)
+{
+       double t, drop, sum = 0;
+       struct record *rec, *prev;
+       int rec_i, i;
+
+       for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
+               rec = &stats_rec->exception[rec_i];
+               prev = &stats_prev->exception[rec_i];
+               t = calc_period(rec, prev);
+
+               drop = calc_drop_pps(&rec->total, &prev->total, t);
+               /* Fold out errors after heading */
+               sum += drop;
+
+               if (drop > 0 && !out) {
+                       print_always("    %-18s " FMT_COLUMNf "\n",
+                                    action2str(rec_i), ERR(drop));
+
+                       for (i = 0; i < nr_cpus; i++) {
+                               struct datarec *r = &rec->cpu[i];
+                               struct datarec *p = &prev->cpu[i];
+                               char str[64];
+                               double drop;
+
+                               drop = calc_drop_pps(r, p, t);
+                               if (!drop)
+                                       continue;
+
+                               snprintf(str, sizeof(str), "cpu:%d", i);
+                               print_default("       %-16s" FMT_COLUMNf "\n",
+                                             str, ERR(drop));
+                       }
+               }
+       }
+
+       if (out) {
+               out->except_cnt.hits = sum;
+               out->totals.err += sum;
+       }
+}
+
+static void stats_get_devmap_xmit(struct stats_record *stats_rec,
+                                 struct stats_record *stats_prev,
+                                 unsigned int nr_cpus,
+                                 struct sample_output *out)
+{
+       double pps, drop, info, err;
+       struct record *rec, *prev;
+       double t;
+       int i;
+
+       rec = &stats_rec->devmap_xmit;
+       prev = &stats_prev->devmap_xmit;
+       t = calc_period(rec, prev);
+       for (i = 0; i < nr_cpus; i++) {
+               struct datarec *r = &rec->cpu[i];
+               struct datarec *p = &prev->cpu[i];
+               char str[64];
+
+               pps = calc_pps(r, p, t);
+               drop = calc_drop_pps(r, p, t);
+               err = calc_errs_pps(r, p, t);
+
+               if (!pps && !drop && !err)
+                       continue;
+
+               snprintf(str, sizeof(str), "cpu:%d", i);
+               info = calc_info_pps(r, p, t);
+               if (info > 0)
+                       info = (pps + drop) / info; /* calc avg bulk */
+               print_default("     %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+                                     __COLUMN(".2f") "\n",
+                             str, XMIT(pps), DROP(drop), err, "drv_err/s",
+                             info, "bulk-avg");
+       }
+       if (out) {
+               pps = calc_pps(&rec->total, &prev->total, t);
+               drop = calc_drop_pps(&rec->total, &prev->total, t);
+               info = calc_info_pps(&rec->total, &prev->total, t);
+               if (info > 0)
+                       info = (pps + drop) / info; /* calc avg bulk */
+               err = calc_errs_pps(&rec->total, &prev->total, t);
+
+               out->xmit_cnt.pps = pps;
+               out->xmit_cnt.drop = drop;
+               out->xmit_cnt.bavg = info;
+               out->xmit_cnt.err = err;
+               out->totals.xmit += pps;
+               out->totals.drop_xmit += drop;
+               out->totals.err += err;
+       }
+}
+
+static void stats_get_devmap_xmit_multi(struct stats_record *stats_rec,
+                                       struct stats_record *stats_prev,
+                                       unsigned int nr_cpus,
+                                       struct sample_output *out,
+                                       bool xmit_total)
+{
+       double pps, drop, info, err;
+       struct map_entry *entry;
+       struct record *r, *p;
+       double t;
+       int bkt;
+
+       hash_for_each(stats_rec->xmit_map, bkt, entry, node) {
+               struct map_entry *e, *x = NULL;
+               char ifname_from[IFNAMSIZ];
+               char ifname_to[IFNAMSIZ];
+               const char *fstr, *tstr;
+               unsigned long prev_time;
+               struct record beg = {};
+               __u32 from_idx, to_idx;
+               char str[128];
+               __u64 pair;
+               int i;
+
+               prev_time = sample_interval * NANOSEC_PER_SEC;
+
+               pair = entry->pair;
+               from_idx = pair >> 32;
+               to_idx = pair & 0xFFFFFFFF;
+
+               r = &entry->val;
+               beg.timestamp = r->timestamp - prev_time;
+
+               /* Find matching entry from stats_prev map */
+               hash_for_each_possible(stats_prev->xmit_map, e, node, pair) {
+                       if (e->pair == pair) {
+                               x = e;
+                               break;
+                       }
+               }
+               if (x)
+                       p = &x->val;
+               else
+                       p = &beg;
+               t = calc_period(r, p);
+               pps = calc_pps(&r->total, &p->total, t);
+               drop = calc_drop_pps(&r->total, &p->total, t);
+               info = calc_info_pps(&r->total, &p->total, t);
+               if (info > 0)
+                       info = (pps + drop) / info; /* calc avg bulk */
+               err = calc_errs_pps(&r->total, &p->total, t);
+
+               if (out) {
+                       /* We are responsible for filling out totals */
+                       out->totals.xmit += pps;
+                       out->totals.drop_xmit += drop;
+                       out->totals.err += err;
+                       continue;
+               }
+
+               fstr = tstr = NULL;
+               if (if_indextoname(from_idx, ifname_from))
+                       fstr = ifname_from;
+               if (if_indextoname(to_idx, ifname_to))
+                       tstr = ifname_to;
+
+               snprintf(str, sizeof(str), "xmit %s->%s", fstr ?: "?",
+                        tstr ?: "?");
+               /* Skip idle streams of redirection */
+               if (pps || drop || err) {
+                       print_err(drop,
+                                 "  %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+                                 __COLUMN(".2f") "\n", str, XMIT(pps), DROP(drop),
+                                 err, "drv_err/s", info, "bulk-avg");
+               }
+
+               for (i = 0; i < nr_cpus; i++) {
+                       struct datarec *rc = &r->cpu[i];
+                       struct datarec *pc, p_beg = {};
+                       char str[64];
+
+                       pc = p == &beg ? &p_beg : &p->cpu[i];
+
+                       pps = calc_pps(rc, pc, t);
+                       drop = calc_drop_pps(rc, pc, t);
+                       err = calc_errs_pps(rc, pc, t);
+
+                       if (!pps && !drop && !err)
+                               continue;
+
+                       snprintf(str, sizeof(str), "cpu:%d", i);
+                       info = calc_info_pps(rc, pc, t);
+                       if (info > 0)
+                               info = (pps + drop) / info; /* calc avg bulk */
+
+                       print_default("     %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+                                     __COLUMN(".2f") "\n", str, XMIT(pps),
+                                     DROP(drop), err, "drv_err/s", info, "bulk-avg");
+               }
+       }
+}
+
+static void stats_print(const char *prefix, int mask, struct stats_record *r,
+                       struct stats_record *p, struct sample_output *out)
+{
+       int nr_cpus = libbpf_num_possible_cpus();
+       const char *str;
+
+       print_always("%-23s", prefix ?: "Summary");
+       if (mask & SAMPLE_RX_CNT)
+               print_always(FMT_COLUMNl, RX(out->totals.rx));
+       if (mask & SAMPLE_REDIRECT_CNT)
+               print_always(FMT_COLUMNl, REDIR(out->totals.redir));
+       printf(FMT_COLUMNl,
+              out->totals.err + out->totals.drop + out->totals.drop_xmit,
+              "err,drop/s");
+       if (mask & SAMPLE_DEVMAP_XMIT_CNT ||
+           mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+               printf(FMT_COLUMNl, XMIT(out->totals.xmit));
+       printf("\n");
+
+       if (mask & SAMPLE_RX_CNT) {
+               str = (sample_log_level & LL_DEFAULT) && out->rx_cnt.pps ?
+                                   "receive total" :
+                                   "receive";
+               print_err((out->rx_cnt.err || out->rx_cnt.drop),
+                         "  %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl "\n",
+                         str, PPS(out->rx_cnt.pps), DROP(out->rx_cnt.drop),
+                         ERR(out->rx_cnt.err));
+
+               stats_get_rx_cnt(r, p, nr_cpus, NULL);
+       }
+
+       if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+               stats_get_cpumap_enqueue(r, p, nr_cpus);
+
+       if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+               stats_get_cpumap_kthread(r, p, nr_cpus);
+               stats_get_cpumap_remote(r, p, nr_cpus);
+       }
+
+       if (mask & SAMPLE_REDIRECT_CNT) {
+               str = out->redir_cnt.suc ? "redirect total" : "redirect";
+               print_default("  %-20s " FMT_COLUMNl "\n", str,
+                             REDIR(out->redir_cnt.suc));
+
+               stats_get_redirect_cnt(r, p, nr_cpus, NULL);
+       }
+
+       if (mask & SAMPLE_REDIRECT_ERR_CNT) {
+               str = (sample_log_level & LL_DEFAULT) && out->redir_cnt.err ?
+                                   "redirect_err total" :
+                                   "redirect_err";
+               print_err(out->redir_cnt.err, "  %-20s " FMT_COLUMNl "\n", str,
+                         ERR(out->redir_cnt.err));
+
+               stats_get_redirect_err_cnt(r, p, nr_cpus, NULL);
+       }
+
+       if (mask & SAMPLE_EXCEPTION_CNT) {
+               str = out->except_cnt.hits ? "xdp_exception total" :
+                                                  "xdp_exception";
+
+               print_err(out->except_cnt.hits, "  %-20s " FMT_COLUMNl "\n", str,
+                         HITS(out->except_cnt.hits));
+
+               stats_get_exception_cnt(r, p, nr_cpus, NULL);
+       }
+
+       if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
+               str = (sample_log_level & LL_DEFAULT) && out->xmit_cnt.pps ?
+                                   "devmap_xmit total" :
+                                   "devmap_xmit";
+
+               print_err(out->xmit_cnt.err || out->xmit_cnt.drop,
+                         "  %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl
+                                 __COLUMN(".2f") "\n",
+                         str, XMIT(out->xmit_cnt.pps),
+                         DROP(out->xmit_cnt.drop), out->xmit_cnt.err,
+                         "drv_err/s", out->xmit_cnt.bavg, "bulk-avg");
+
+               stats_get_devmap_xmit(r, p, nr_cpus, NULL);
+       }
+
+       if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+               stats_get_devmap_xmit_multi(r, p, nr_cpus, NULL,
+                                           mask & SAMPLE_DEVMAP_XMIT_CNT);
+
+       if (sample_log_level & LL_DEFAULT ||
+           ((sample_log_level & LL_SIMPLE) && sample_err_exp)) {
+               sample_err_exp = false;
+               printf("\n");
+       }
+}
+
+int sample_setup_maps(struct bpf_map **maps)
+{
+       sample_n_cpus = libbpf_num_possible_cpus();
+
+       for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+               sample_map[i] = maps[i];
+
+               switch (i) {
+               case MAP_RX:
+               case MAP_CPUMAP_KTHREAD:
+               case MAP_DEVMAP_XMIT:
+                       sample_map_count[i] = sample_n_cpus;
+                       break;
+               case MAP_REDIRECT_ERR:
+                       sample_map_count[i] =
+                               XDP_REDIRECT_ERR_MAX * sample_n_cpus;
+                       break;
+               case MAP_EXCEPTION:
+                       sample_map_count[i] = XDP_ACTION_MAX * sample_n_cpus;
+               case MAP_CPUMAP_ENQUEUE:
+                       sample_map_count[i] = sample_n_cpus * sample_n_cpus;
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               if (bpf_map__resize(sample_map[i], sample_map_count[i]) < 0)
+                       return -errno;
+       }
+       sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI];
+       return 0;
+}
+
+static int sample_setup_maps_mappings(void)
+{
+       for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+               size_t size = sample_map_count[i] * sizeof(struct datarec);
+
+               sample_mmap[i] = mmap(NULL, size, PROT_READ | PROT_WRITE,
+                                     MAP_SHARED, bpf_map__fd(sample_map[i]), 0);
+               if (sample_mmap[i] == MAP_FAILED)
+                       return -errno;
+       }
+       return 0;
+}
+
+int __sample_init(int mask)
+{
+       sigset_t st;
+
+       sigemptyset(&st);
+       sigaddset(&st, SIGQUIT);
+       sigaddset(&st, SIGINT);
+       sigaddset(&st, SIGTERM);
+
+       if (sigprocmask(SIG_BLOCK, &st, NULL) < 0)
+               return -errno;
+
+       sample_sig_fd = signalfd(-1, &st, SFD_CLOEXEC | SFD_NONBLOCK);
+       if (sample_sig_fd < 0)
+               return -errno;
+
+       sample_mask = mask;
+
+       return sample_setup_maps_mappings();
+}
+
+static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
+{
+       __u32 cur_prog_id = 0;
+       int ret;
+
+       if (prog_id) {
+               ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags);
+               if (ret < 0)
+                       return -errno;
+
+               if (prog_id != cur_prog_id) {
+                       print_always(
+                               "Program on ifindex %d does not match installed "
+                               "program, skipping unload\n",
+                               ifindex);
+                       return -ENOENT;
+               }
+       }
+
+       return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+}
+
+int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
+                      bool force)
+{
+       int ret, xdp_flags = 0;
+       __u32 prog_id = 0;
+
+       if (sample_xdp_cnt == 32) {
+               fprintf(stderr,
+                       "Total limit for installed XDP programs in a sample reached\n");
+               return -ENOTSUP;
+       }
+
+       xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
+       xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+       ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog),
+                                 xdp_flags);
+       if (ret < 0) {
+               ret = -errno;
+               fprintf(stderr,
+                       "Failed to install program \"%s\" on ifindex %d, mode = %s, "
+                       "force = %s: %s\n",
+                       bpf_program__name(xdp_prog), ifindex,
+                       generic ? "skb" : "native", force ? "true" : "false",
+                       strerror(-ret));
+               return ret;
+       }
+
+       ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags);
+       if (ret < 0) {
+               ret = -errno;
+               fprintf(stderr,
+                       "Failed to get XDP program id for ifindex %d, removing program: %s\n",
+                       ifindex, strerror(errno));
+               __sample_remove_xdp(ifindex, 0, xdp_flags);
+               return ret;
+       }
+       sample_xdp_progs[sample_xdp_cnt++] =
+               (struct xdp_desc){ ifindex, prog_id, xdp_flags };
+
+       return 0;
+}
+
+static void sample_summary_print(void)
+{
+       double period = sample_out.rx_cnt.pps;
+
+       if (sample_out.totals.rx) {
+               double pkts = sample_out.totals.rx;
+
+               print_always("  Packets received    : %'-10llu\n",
+                            sample_out.totals.rx);
+               print_always("  Average packets/s   : %'-10.0f\n",
+                            sample_round(pkts / period));
+       }
+       if (sample_out.totals.redir) {
+               double pkts = sample_out.totals.redir;
+
+               print_always("  Packets redirected  : %'-10llu\n",
+                            sample_out.totals.redir);
+               print_always("  Average redir/s     : %'-10.0f\n",
+                            sample_round(pkts / period));
+       }
+       if (sample_out.totals.drop)
+               print_always("  Rx dropped          : %'-10llu\n",
+                            sample_out.totals.drop);
+       if (sample_out.totals.drop_xmit)
+               print_always("  Tx dropped          : %'-10llu\n",
+                            sample_out.totals.drop_xmit);
+       if (sample_out.totals.err)
+               print_always("  Errors recorded     : %'-10llu\n",
+                            sample_out.totals.err);
+       if (sample_out.totals.xmit) {
+               double pkts = sample_out.totals.xmit;
+
+               print_always("  Packets transmitted : %'-10llu\n",
+                            sample_out.totals.xmit);
+               print_always("  Average transmit/s  : %'-10.0f\n",
+                            sample_round(pkts / period));
+       }
+}
+
+void sample_exit(int status)
+{
+       size_t size;
+
+       for (int i = 0; i < NUM_MAP; i++) {
+               size = sample_map_count[i] * sizeof(**sample_mmap);
+               munmap(sample_mmap[i], size);
+       }
+       while (sample_xdp_cnt--) {
+               int i = sample_xdp_cnt, ifindex, xdp_flags;
+               __u32 prog_id;
+
+               prog_id = sample_xdp_progs[i].prog_id;
+               ifindex = sample_xdp_progs[i].ifindex;
+               xdp_flags = sample_xdp_progs[i].flags;
+
+               __sample_remove_xdp(ifindex, prog_id, xdp_flags);
+       }
+       sample_summary_print();
+       close(sample_sig_fd);
+       exit(status);
+}
+
+static int sample_stats_collect(struct stats_record *rec)
+{
+       int i;
+
+       if (sample_mask & SAMPLE_RX_CNT)
+               map_collect_percpu(sample_mmap[MAP_RX], &rec->rx_cnt);
+
+       if (sample_mask & SAMPLE_REDIRECT_CNT)
+               map_collect_percpu(sample_mmap[MAP_REDIRECT_ERR], &rec->redir_err[0]);
+
+       if (sample_mask & SAMPLE_REDIRECT_ERR_CNT) {
+               for (i = 1; i < XDP_REDIRECT_ERR_MAX; i++)
+                       map_collect_percpu(&sample_mmap[MAP_REDIRECT_ERR][i * sample_n_cpus],
+                                          &rec->redir_err[i]);
+       }
+
+       if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+               for (i = 0; i < sample_n_cpus; i++)
+                       map_collect_percpu(&sample_mmap[MAP_CPUMAP_ENQUEUE][i * sample_n_cpus],
+                                          &rec->enq[i]);
+
+       if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT)
+               map_collect_percpu(sample_mmap[MAP_CPUMAP_KTHREAD],
+                                  &rec->kthread);
+
+       if (sample_mask & SAMPLE_EXCEPTION_CNT)
+               for (i = 0; i < XDP_ACTION_MAX; i++)
+                       map_collect_percpu(&sample_mmap[MAP_EXCEPTION][i * sample_n_cpus],
+                                          &rec->exception[i]);
+
+       if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT)
+               map_collect_percpu(sample_mmap[MAP_DEVMAP_XMIT], &rec->devmap_xmit);
+
+       if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) {
+               if (map_collect_percpu_devmap(bpf_map__fd(sample_map[MAP_DEVMAP_XMIT_MULTI]), rec) < 0)
+                       return -EINVAL;
+       }
+       return 0;
+}
+
+static void sample_summary_update(struct sample_output *out, int interval)
+{
+       sample_out.totals.rx += out->totals.rx;
+       sample_out.totals.redir += out->totals.redir;
+       sample_out.totals.drop += out->totals.drop;
+       sample_out.totals.drop_xmit += out->totals.drop_xmit;
+       sample_out.totals.err += out->totals.err;
+       sample_out.totals.xmit += out->totals.xmit;
+       sample_out.rx_cnt.pps += interval;
+}
+
+static void sample_stats_print(int mask, struct stats_record *cur,
+                              struct stats_record *prev, char *prog_name,
+                              int interval)
+{
+       struct sample_output out = {};
+
+       if (mask & SAMPLE_RX_CNT)
+               stats_get_rx_cnt(cur, prev, 0, &out);
+       if (mask & SAMPLE_REDIRECT_CNT)
+               stats_get_redirect_cnt(cur, prev, 0, &out);
+       if (mask & SAMPLE_REDIRECT_ERR_CNT)
+               stats_get_redirect_err_cnt(cur, prev, 0, &out);
+       if (mask & SAMPLE_EXCEPTION_CNT)
+               stats_get_exception_cnt(cur, prev, 0, &out);
+       if (mask & SAMPLE_DEVMAP_XMIT_CNT)
+               stats_get_devmap_xmit(cur, prev, 0, &out);
+       else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+               stats_get_devmap_xmit_multi(cur, prev, 0, &out,
+                                           mask & SAMPLE_DEVMAP_XMIT_CNT);
+       sample_summary_update(&out, interval);
+
+       stats_print(prog_name, mask, cur, prev, &out);
+}
+
+void sample_switch_mode(void)
+{
+       sample_log_level ^= LL_DEBUG - 1;
+}
+
+static int sample_signal_cb(void)
+{
+       struct signalfd_siginfo si;
+       int r;
+
+       r = read(sample_sig_fd, &si, sizeof(si));
+       if (r < 0)
+               return -errno;
+
+       switch (si.ssi_signo) {
+       case SIGQUIT:
+               sample_switch_mode();
+               printf("\n");
+               break;
+       default:
+               printf("\n");
+               return 1;
+       }
+
+       return 0;
+}
+
+/* Pointer swap trick */
+static void swap(struct stats_record **a, struct stats_record **b)
+{
+       struct stats_record *tmp;
+
+       tmp = *a;
+       *a = *b;
+       *b = tmp;
+}
+
+static int sample_timer_cb(int timerfd, struct stats_record **rec,
+                          struct stats_record **prev, int interval)
+{
+       char line[64] = "Summary";
+       int ret;
+       __u64 t;
+
+       ret = read(timerfd, &t, sizeof(t));
+       if (ret < 0)
+               return -errno;
+
+       swap(prev, rec);
+       ret = sample_stats_collect(*rec);
+       if (ret < 0)
+               return ret;
+
+       if (sample_xdp_cnt == 2 && !(sample_mask & SAMPLE_SKIP_HEADING)) {
+               char fi[IFNAMSIZ];
+               char to[IFNAMSIZ];
+               const char *f, *t;
+
+               f = t = NULL;
+               if (if_indextoname(sample_xdp_progs[0].ifindex, fi))
+                       f = fi;
+               if (if_indextoname(sample_xdp_progs[1].ifindex, to))
+                       t = to;
+
+               snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?");
+       }
+
+       sample_stats_print(sample_mask, *rec, *prev, line, interval);
+       return 0;
+}
+
+int sample_run(int interval, void (*post_cb)(void *), void *ctx)
+{
+       struct timespec ts = { interval, 0 };
+       struct itimerspec its = { ts, ts };
+       struct stats_record *rec, *prev;
+       struct pollfd pfd[2] = {};
+       int timerfd, ret;
+
+       if (!interval) {
+               fprintf(stderr, "Incorrect interval 0\n");
+               return -EINVAL;
+       }
+       sample_interval = interval;
+       /* Pretty print numbers */
+       setlocale(LC_NUMERIC, "en_US.UTF-8");
+
+       timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
+       if (timerfd < 0)
+               return -errno;
+       timerfd_settime(timerfd, 0, &its, NULL);
+
+       pfd[0].fd = sample_sig_fd;
+       pfd[0].events = POLLIN;
+
+       pfd[1].fd = timerfd;
+       pfd[1].events = POLLIN;
+
+       ret = -ENOMEM;
+       rec = alloc_stats_record();
+       if (!rec)
+               goto end;
+       prev = alloc_stats_record();
+       if (!prev)
+               goto end_rec;
+
+       ret = sample_stats_collect(rec);
+       if (ret < 0)
+               goto end_rec_prev;
+
+       for (;;) {
+               ret = poll(pfd, 2, -1);
+               if (ret < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       else
+                               break;
+               }
+
+               if (pfd[0].revents & POLLIN)
+                       ret = sample_signal_cb();
+               else if (pfd[1].revents & POLLIN)
+                       ret = sample_timer_cb(timerfd, &rec, &prev, interval);
+
+               if (ret)
+                       break;
+
+               if (post_cb)
+                       post_cb(ctx);
+       }
+
+end_rec_prev:
+       free_stats_record(prev);
+end_rec:
+       free_stats_record(rec);
+end:
+       close(timerfd);
+
+       return ret;
+}
+
+const char *get_driver_name(int ifindex)
+{
+       struct ethtool_drvinfo drv = {};
+       char ifname[IF_NAMESIZE];
+       static char drvname[32];
+       struct ifreq ifr = {};
+       int fd, r = 0;
+
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd < 0)
+               return "[error]";
+
+       if (!if_indextoname(ifindex, ifname))
+               goto end;
+
+       drv.cmd = ETHTOOL_GDRVINFO;
+       safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+       ifr.ifr_data = (void *)&drv;
+
+       r = ioctl(fd, SIOCETHTOOL, &ifr);
+       if (r)
+               goto end;
+
+       safe_strncpy(drvname, drv.driver, sizeof(drvname));
+
+       close(fd);
+       return drvname;
+
+end:
+       r = errno;
+       close(fd);
+       return r == EOPNOTSUPP ? "loopback" : "[error]";
+}
+
+int get_mac_addr(int ifindex, void *mac_addr)
+{
+       char ifname[IF_NAMESIZE];
+       struct ifreq ifr = {};
+       int fd, r;
+
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd < 0)
+               return -errno;
+
+       if (!if_indextoname(ifindex, ifname)) {
+               r = -errno;
+               goto end;
+       }
+
+       safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+       r = ioctl(fd, SIOCGIFHWADDR, &ifr);
+       if (r) {
+               r = -errno;
+               goto end;
+       }
+
+       memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+
+end:
+       close(fd);
+       return r;
+}
+
+__attribute__((constructor)) static void sample_ctor(void)
+{
+       if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) {
+               fprintf(stderr, "Failed to set libbpf strict mode: %s\n",
+                       strerror(errno));
+               /* Just exit, nothing to cleanup right now */
+               exit(EXIT_FAIL_BPF);
+       }
+}
diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h
new file mode 100644 (file)
index 0000000..d97465f
--- /dev/null
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef XDP_SAMPLE_USER_H
+#define XDP_SAMPLE_USER_H
+
+#include <bpf/libbpf.h>
+#include <linux/compiler.h>
+
+#include "xdp_sample_shared.h"
+
+enum stats_mask {
+       _SAMPLE_REDIRECT_MAP         = 1U << 0,
+       SAMPLE_RX_CNT                = 1U << 1,
+       SAMPLE_REDIRECT_ERR_CNT      = 1U << 2,
+       SAMPLE_CPUMAP_ENQUEUE_CNT    = 1U << 3,
+       SAMPLE_CPUMAP_KTHREAD_CNT    = 1U << 4,
+       SAMPLE_EXCEPTION_CNT         = 1U << 5,
+       SAMPLE_DEVMAP_XMIT_CNT       = 1U << 6,
+       SAMPLE_REDIRECT_CNT          = 1U << 7,
+       SAMPLE_REDIRECT_MAP_CNT      = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP,
+       SAMPLE_REDIRECT_ERR_MAP_CNT  = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP,
+       SAMPLE_DEVMAP_XMIT_CNT_MULTI = 1U << 8,
+       SAMPLE_SKIP_HEADING          = 1U << 9,
+};
+
+/* Exit return codes */
+#define EXIT_OK                        0
+#define EXIT_FAIL              1
+#define EXIT_FAIL_OPTION       2
+#define EXIT_FAIL_XDP          3
+#define EXIT_FAIL_BPF          4
+#define EXIT_FAIL_MEM          5
+
+int sample_setup_maps(struct bpf_map **maps);
+int __sample_init(int mask);
+void sample_exit(int status);
+int sample_run(int interval, void (*post_cb)(void *), void *ctx);
+
+void sample_switch_mode(void);
+int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
+                      bool force);
+void sample_usage(char *argv[], const struct option *long_options,
+                 const char *doc, int mask, bool error);
+
+const char *get_driver_name(int ifindex);
+int get_mac_addr(int ifindex, void *mac_addr);
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstringop-truncation"
+__attribute__((unused))
+static inline char *safe_strncpy(char *dst, const char *src, size_t size)
+{
+       if (!size)
+               return dst;
+       strncpy(dst, src, size - 1);
+       dst[size - 1] = '\0';
+       return dst;
+}
+#pragma GCC diagnostic pop
+
+#define __attach_tp(name)                                                      \
+       ({                                                                     \
+               if (!bpf_program__is_tracing(skel->progs.name))                \
+                       return -EINVAL;                                        \
+               skel->links.name = bpf_program__attach(skel->progs.name);      \
+               if (!skel->links.name)                                         \
+                       return -errno;                                         \
+       })
+
+#define sample_init_pre_load(skel)                                             \
+       ({                                                                     \
+               skel->rodata->nr_cpus = libbpf_num_possible_cpus();            \
+               sample_setup_maps((struct bpf_map *[]){                        \
+                       skel->maps.rx_cnt, skel->maps.redir_err_cnt,           \
+                       skel->maps.cpumap_enqueue_cnt,                         \
+                       skel->maps.cpumap_kthread_cnt,                         \
+                       skel->maps.exception_cnt, skel->maps.devmap_xmit_cnt,  \
+                       skel->maps.devmap_xmit_cnt_multi });                   \
+       })
+
+#define DEFINE_SAMPLE_INIT(name)                                               \
+       static int sample_init(struct name *skel, int mask)                    \
+       {                                                                      \
+               int ret;                                                       \
+               ret = __sample_init(mask);                                     \
+               if (ret < 0)                                                   \
+                       return ret;                                            \
+               if (mask & SAMPLE_REDIRECT_MAP_CNT)                            \
+                       __attach_tp(tp_xdp_redirect_map);                      \
+               if (mask & SAMPLE_REDIRECT_CNT)                                \
+                       __attach_tp(tp_xdp_redirect);                          \
+               if (mask & SAMPLE_REDIRECT_ERR_MAP_CNT)                        \
+                       __attach_tp(tp_xdp_redirect_map_err);                  \
+               if (mask & SAMPLE_REDIRECT_ERR_CNT)                            \
+                       __attach_tp(tp_xdp_redirect_err);                      \
+               if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT)                          \
+                       __attach_tp(tp_xdp_cpumap_enqueue);                    \
+               if (mask & SAMPLE_CPUMAP_KTHREAD_CNT)                          \
+                       __attach_tp(tp_xdp_cpumap_kthread);                    \
+               if (mask & SAMPLE_EXCEPTION_CNT)                               \
+                       __attach_tp(tp_xdp_exception);                         \
+               if (mask & SAMPLE_DEVMAP_XMIT_CNT)                             \
+                       __attach_tp(tp_xdp_devmap_xmit);                       \
+               if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)                       \
+                       __attach_tp(tp_xdp_devmap_xmit_multi);                 \
+               return 0;                                                      \
+       }
+
+#endif
index 33d0bde..49d7a6a 100644 (file)
@@ -1,12 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2017 - 2018 Intel Corporation. */
 
-#include <asm/barrier.h>
 #include <errno.h>
 #include <getopt.h>
 #include <libgen.h>
 #include <linux/bpf.h>
-#include <linux/compiler.h>
 #include <linux/if_link.h>
 #include <linux/if_xdp.h>
 #include <linux/if_ether.h>
@@ -653,17 +651,15 @@ out:
        return result;
 }
 
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
-
 /*
  *     This is a version of ip_compute_csum() optimized for IP headers,
  *     which always checksum on 4 octet boundaries.
  *     This function code has been taken from
  *     Linux kernel lib/checksum.c
  */
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 {
-       return (__force __sum16)~do_csum(iph, ihl * 4);
+       return (__sum16)~do_csum(iph, ihl * 4);
 }
 
 /*
@@ -673,11 +669,11 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
  */
 static inline __sum16 csum_fold(__wsum csum)
 {
-       u32 sum = (__force u32)csum;
+       u32 sum = (u32)csum;
 
        sum = (sum & 0xffff) + (sum >> 16);
        sum = (sum & 0xffff) + (sum >> 16);
-       return (__force __sum16)~sum;
+       return (__sum16)~sum;
 }
 
 /*
@@ -703,16 +699,16 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
 __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
                          __u32 len, __u8 proto, __wsum sum)
 {
-       unsigned long long s = (__force u32)sum;
+       unsigned long long s = (u32)sum;
 
-       s += (__force u32)saddr;
-       s += (__force u32)daddr;
+       s += (u32)saddr;
+       s += (u32)daddr;
 #ifdef __BIG_ENDIAN__
        s += proto + len;
 #else
        s += (proto + len) << 8;
 #endif
-       return (__force __wsum)from64to32(s);
+       return (__wsum)from64to32(s);
 }
 
 /*
index a335393..9331942 100644 (file)
@@ -123,7 +123,7 @@ function root_check_run_with_sudo() {
     if [ "$EUID" -ne 0 ]; then
        if [ -x $0 ]; then # Directly executable use sudo
            info "Not root, running with sudo"
-            sudo "$0" "$@"
+            sudo -E "$0" "$@"
             exit $?
        fi
        err 4 "cannot perform sudo run of $0"
index 30a610b..99ec068 100755 (executable)
@@ -89,14 +89,21 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     pg_set $dev "burst $BURST"
 done
 
+# Run if user hits control-c
+function print_result() {
+    # Print results
+    for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
+        dev=${DEV}@${thread}
+        echo "Device: $dev"
+        cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+    done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
 # start_run
 echo "Running... ctrl^C to stop" >&2
 pg_ctrl "start"
 echo "Done" >&2
 
-# Print results
-for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
-    dev=${DEV}@${thread}
-    echo "Device: $dev"
-    cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+print_result
index a6195bd..04b0dd0 100755 (executable)
@@ -69,14 +69,21 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     pg_set $dev "xmit_mode queue_xmit"
 done
 
+# Run if user hits control-c
+function print_result {
+    # Print results
+    for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
+        dev=${DEV}@${thread}
+        echo "Device: $dev"
+        cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+    done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
 # start_run
 echo "Running... ctrl^C to stop" >&2
 pg_ctrl "start"
 echo "Done" >&2
 
-# Print results
-for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
-    dev=${DEV}@${thread}
-    echo "Device: $dev"
-    cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+print_result
index 246cfe0..09a92ea 100755 (executable)
@@ -79,15 +79,22 @@ pg_set $DEV "flag UDPSRC_RND"
 pg_set $DEV "udp_src_min $UDP_SRC_MIN"
 pg_set $DEV "udp_src_max $UDP_SRC_MAX"
 
+# Run if user hits control-c
+function print_result() {
+    # Print results
+    echo "Result device: $DEV"
+    cat /proc/net/pktgen/$DEV
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
 if [ -z "$APPEND" ]; then
     # start_run
     echo "Running... ctrl^C to stop" >&2
     pg_ctrl "start"
     echo "Done" >&2
 
-    # Print results
-    echo "Result device: $DEV"
-    cat /proc/net/pktgen/$DEV
+    print_result
 else
     echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
 fi
\ No newline at end of file
index c6af3d9..7fa41c8 100755 (executable)
@@ -83,18 +83,25 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     pg_set $dev "udp_src_max $UDP_SRC_MAX"
 done
 
-if [ -z "$APPEND" ]; then
-    # start_run
-    echo "Running... ctrl^C to stop" >&2
-    pg_ctrl "start"
-    echo "Done" >&2
-
+# Run if user hits control-c
+function print_result() {
     # Print results
     for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
         dev=${DEV}@${thread}
         echo "Device: $dev"
         cat /proc/net/pktgen/$dev | grep -A2 "Result:"
     done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
+if [ -z "$APPEND" ]; then
+    # start_run
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
+    echo "Done" >&2
+
+    print_result
 else
     echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
 fi
index ab87de4..8bf2fdf 100755 (executable)
@@ -85,7 +85,7 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 done
 
 # Run if user hits control-c
-function control_c() {
+function print_result() {
     # Print results
     for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
        dev=${DEV}@${thread}
@@ -94,11 +94,13 @@ function control_c() {
     done
 }
 # trap keyboard interrupt (Ctrl-C)
-trap control_c SIGINT
+trap true SIGINT
 
 if [ -z "$APPEND" ]; then
     echo "Running... ctrl^C to stop" >&2
     pg_ctrl "start"
+
+    print_result
 else
     echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
 fi
index 56c5f5a..cff51f8 100755 (executable)
@@ -13,13 +13,15 @@ root_check_run_with_sudo "$@"
 # Parameter parsing via include
 source ${basedir}/parameters.sh
 # Set some default params, if they didn't get set
-[ -z "$DEST_IP" ]   && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+    [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
 [ -z "$DST_MAC" ]   && DST_MAC="90:e2:ba:ff:ff:ff"
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 [ -z "$COUNT" ]     && COUNT="0" # Zero means indefinitely
 if [ -n "$DEST_IP" ]; then
-    validate_addr $DEST_IP
-    read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP)
+    validate_addr${IP6} $DEST_IP
+    read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
 fi
 if [ -n "$DST_PORT" ]; then
     read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
@@ -62,8 +64,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 
     # Single destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst_min $DST_MIN"
-    pg_set $dev "dst_max $DST_MAX"
+    pg_set $dev "dst${IP6}_min $DST_MIN"
+    pg_set $dev "dst${IP6}_max $DST_MAX"
 
     if [ -n "$DST_PORT" ]; then
        # Single destination port or random port range
index 6e0effa..3578d0a 100755 (executable)
@@ -17,14 +17,16 @@ root_check_run_with_sudo "$@"
 # Parameter parsing via include
 source ${basedir}/parameters.sh
 # Set some default params, if they didn't get set
-[ -z "$DEST_IP" ]   && DEST_IP="198.18.0.42"
+if [ -z "$DEST_IP" ]; then
+    [ -z "$IP6" ] && DEST_IP="198.18.0.42" || DEST_IP="FD00::1"
+fi
 [ -z "$DST_MAC" ]   && DST_MAC="90:e2:ba:ff:ff:ff"
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 [ -z "$BURST" ]     && BURST=32
 [ -z "$COUNT" ]     && COUNT="0" # Zero means indefinitely
 if [ -n "$DEST_IP" ]; then
-    validate_addr $DEST_IP
-    read -r DST_MIN DST_MAX <<< $(parse_addr $DEST_IP)
+    validate_addr${IP6} $DEST_IP
+    read -r DST_MIN DST_MAX <<< $(parse_addr${IP6} $DEST_IP)
 fi
 if [ -n "$DST_PORT" ]; then
     read -r UDP_DST_MIN UDP_DST_MAX <<< $(parse_ports $DST_PORT)
@@ -52,8 +54,8 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
 
     # Single destination
     pg_set $dev "dst_mac $DST_MAC"
-    pg_set $dev "dst_min $DST_MIN"
-    pg_set $dev "dst_max $DST_MAX"
+    pg_set $dev "dst${IP6}_min $DST_MIN"
+    pg_set $dev "dst${IP6}_max $DST_MAX"
 
     if [ -n "$DST_PORT" ]; then
        # Single destination port or random port range
index 7c27923..264cc5d 100755 (executable)
@@ -100,12 +100,8 @@ for ((i = 0; i < $THREADS; i++)); do
     pg_set $dev "udp_src_max $UDP_SRC_MAX"
 done
 
-# start_run
-if [ -z "$APPEND" ]; then
-    echo "Running... ctrl^C to stop" >&2
-    pg_ctrl "start"
-    echo "Done" >&2
-
+# Run if user hits control-c
+function print_result() {
     # Print results
     for ((i = 0; i < $THREADS; i++)); do
         thread=${cpu_array[$((i+F_THREAD))]}
@@ -113,6 +109,17 @@ if [ -z "$APPEND" ]; then
         echo "Device: $dev"
         cat /proc/net/pktgen/$dev | grep -A2 "Result:"
     done
+}
+# trap keyboard interrupt (Ctrl-C)
+trap true SIGINT
+
+# start_run
+if [ -z "$APPEND" ]; then
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
+    echo "Done" >&2
+
+    print_result
 else
     echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
 fi
index 2d94025..00ac7b7 100755 (executable)
@@ -547,6 +547,7 @@ class PrinterHelpers(Printer):
             'struct inode',
             'struct socket',
             'struct file',
+            'struct bpf_timer',
     ]
     known_types = {
             '...',
@@ -594,6 +595,7 @@ class PrinterHelpers(Printer):
             'struct inode',
             'struct socket',
             'struct file',
+            'struct bpf_timer',
     }
     mapped_types = {
             'u8': '__u8',
index b0032c4..2143f59 100644 (file)
@@ -1330,7 +1330,9 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc
                        return SECCLASS_SMC_SOCKET;
                case PF_XDP:
                        return SECCLASS_XDP_SOCKET;
-#if PF_MAX > 45
+               case PF_MCTP:
+                       return SECCLASS_MCTP_SOCKET;
+#if PF_MAX > 46
 #error New address family defined, please update this function.
 #endif
                }
index 62d19bc..084757f 100644 (file)
@@ -246,6 +246,8 @@ struct security_class_mapping secclass_map[] = {
            NULL } },
        { "xdp_socket",
          { COMMON_SOCK_PERMS, NULL } },
+       { "mctp_socket",
+         { COMMON_SOCK_PERMS, NULL } },
        { "perf_event",
          { "open", "cpu", "kernel", "tracepoint", "read", "write", NULL } },
        { "lockdown",
@@ -255,6 +257,6 @@ struct security_class_mapping secclass_map[] = {
        { NULL }
   };
 
-#if PF_MAX > 45
+#if PF_MAX > 46
 #error New address family defined, please update secclass_map.
 #endif
index ff4d327..88b28aa 100644 (file)
@@ -12,7 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **btf** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } |
+               { **-B** | **--base-btf** } }
 
        *COMMANDS* := { **dump** | **help** }
 
@@ -73,6 +74,20 @@ OPTIONS
 =======
        .. include:: common_options.rst
 
+       -B, --base-btf *FILE*
+                 Pass a base BTF object. Base BTF objects are typically used
+                 with BTF objects for kernel modules. To avoid duplicating
+                 all kernel symbols required by modules, BTF objects for
+                 modules are "split", they are built incrementally on top of
+                 the kernel (vmlinux) BTF object. So the base BTF reference
+                 should usually point to the kernel BTF.
+
+                 When the main BTF object to process (for example, the
+                 module BTF to dump) is passed as a *FILE*, bpftool attempts
+                 to autodetect the path for the base object, and passing
+                 this option is optional. When the main BTF object is passed
+                 through other handles, this option becomes necessary.
+
 EXAMPLES
 ========
 **# bpftool btf dump id 1226**
@@ -217,3 +232,34 @@ All the standard ways to specify map or program are supported:
 **# bpftool btf dump prog tag b88e0a09b1d9759d**
 
 **# bpftool btf dump prog pinned /sys/fs/bpf/prog_name**
+
+|
+| **# bpftool btf dump file /sys/kernel/btf/i2c_smbus**
+| (or)
+| **# I2C_SMBUS_ID=$(bpftool btf show -p | jq '.[] | select(.name=="i2c_smbus").id')**
+| **# bpftool btf dump id ${I2C_SMBUS_ID} -B /sys/kernel/btf/vmlinux**
+
+::
+
+  [104848] STRUCT 'i2c_smbus_alert' size=40 vlen=2
+          'alert' type_id=393 bits_offset=0
+          'ara' type_id=56050 bits_offset=256
+  [104849] STRUCT 'alert_data' size=12 vlen=3
+          'addr' type_id=16 bits_offset=0
+          'type' type_id=56053 bits_offset=32
+          'data' type_id=7 bits_offset=64
+  [104850] PTR '(anon)' type_id=104848
+  [104851] PTR '(anon)' type_id=104849
+  [104852] FUNC 'i2c_register_spd' type_id=84745 linkage=static
+  [104853] FUNC 'smbalert_driver_init' type_id=1213 linkage=static
+  [104854] FUNC_PROTO '(anon)' ret_type_id=18 vlen=1
+          'ara' type_id=56050
+  [104855] FUNC 'i2c_handle_smbus_alert' type_id=104854 linkage=static
+  [104856] FUNC 'smbalert_remove' type_id=104854 linkage=static
+  [104857] FUNC_PROTO '(anon)' ret_type_id=18 vlen=2
+          'ara' type_id=56050
+          'id' type_id=56056
+  [104858] FUNC 'smbalert_probe' type_id=104857 linkage=static
+  [104859] FUNC 'smbalert_work' type_id=9695 linkage=static
+  [104860] FUNC 'smbus_alert' type_id=71367 linkage=static
+  [104861] FUNC 'smbus_do_alert' type_id=84827 linkage=static
index baee859..3e4395e 100644 (file)
@@ -12,7 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **cgroup** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } }
 
        *COMMANDS* :=
        { **show** | **list** | **tree** | **attach** | **detach** | **help** }
index dd3771b..ab9f57e 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **feature** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
 
        *COMMANDS* := { **probe** | **help** }
 
index 7cd6681..2ef2f2d 100644 (file)
@@ -12,7 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **gen** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-L** | **--use-loader** } }
 
        *COMMAND* := { **object** | **skeleton** | **help** }
 
@@ -152,6 +153,12 @@ OPTIONS
 =======
        .. include:: common_options.rst
 
+       -L, --use-loader
+                 For skeletons, generate a "light" skeleton (also known as "loader"
+                 skeleton). A light skeleton contains a loader eBPF program. It does
+                 not use the majority of the libbpf infrastructure, and does not need
+                 libelf.
+
 EXAMPLES
 ========
 **$ cat example1.bpf.c**
index 51f49be..471f363 100644 (file)
@@ -12,6 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **iter** *COMMAND*
 
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+
        *COMMANDS* := { **pin** | **help** }
 
 ITER COMMANDS
index 5f7db2a..0de90f0 100644 (file)
@@ -12,7 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **link** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
 
        *COMMANDS* := { **show** | **list** | **pin** | **help** }
 
index 3d52256..d0c4abe 100644 (file)
@@ -12,7 +12,8 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **map** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
 
        *COMMANDS* :=
        { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
index d8165d5..1ae0375 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **net** *COMMAND*
 
-       *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
 
        *COMMANDS* :=
        { **show** | **list** | **attach** | **detach** | **help** }
index e958ce9..ce52798 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **perf** *COMMAND*
 
-       *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
 
        *COMMANDS* :=
        { **show** | **list** | **help** }
index fe1b38e..91608cb 100644 (file)
@@ -12,7 +12,9 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **prog** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
+               { **-L** | **--use-loader** } }
 
        *COMMANDS* :=
        { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load**
@@ -48,10 +50,11 @@ PROG COMMANDS
 |              **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
 |      }
 |       *ATTACH_TYPE* := {
-|              **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
+|              **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
 |      }
 |      *METRICs* := {
-|              **cycles** | **instructions** | **l1d_loads** | **llc_misses**
+|              **cycles** | **instructions** | **l1d_loads** | **llc_misses** |
+|              **itlb_misses** | **dtlb_misses**
 |      }
 
 
@@ -223,6 +226,20 @@ OPTIONS
                  Do not automatically attempt to mount any virtual file system
                  (such as tracefs or BPF virtual file system) when necessary.
 
+       -L, --use-loader
+                 Load program as a "loader" program. This is useful to debug
+                 the generation of such programs. When this option is in
+                 use, bpftool attempts to load the programs from the object
+                 file into the kernel, but does not pin them (therefore, the
+                 *PATH* must not be provided).
+
+                 When combined with the **-d**\ \|\ **--debug** option,
+                 additional debug messages are generated, and the execution
+                 of the loader program will use the **bpf_trace_printk**\ ()
+                 helper to log each step of loading BTF, creating the maps,
+                 and loading the programs (see **bpftool prog tracelog** as
+                 a way to dump those messages).
+
 EXAMPLES
 ========
 **# bpftool prog show**
@@ -326,3 +343,16 @@ EXAMPLES
       40176203 cycles                                                 (83.05%)
       42518139 instructions    #   1.06 insns per cycle               (83.39%)
            123 llc_misses      #   2.89 LLC misses per million insns  (83.15%)
+
+|
+| Output below is for the trace logs.
+| Run in separate terminals:
+| **# bpftool prog tracelog**
+| **# bpftool prog load -L -d file.o**
+
+::
+
+    bpftool-620059  [004] d... 2634685.517903: bpf_trace_printk: btf_load size 665 r=5
+    bpftool-620059  [004] d... 2634685.517912: bpf_trace_printk: map_create sample_map idx 0 type 2 value_size 4 value_btf_id 0 r=6
+    bpftool-620059  [004] d... 2634685.517997: bpf_trace_printk: prog_load sample insn_cnt 13 r=7
+    bpftool-620059  [004] d... 2634685.517999: bpf_trace_printk: close(5) = 0
index 506e70e..02afc0f 100644 (file)
@@ -12,7 +12,7 @@ SYNOPSIS
 
        **bpftool** [*OPTIONS*] **struct_ops** *COMMAND*
 
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
 
        *COMMANDS* :=
        { **show** | **list** | **dump** | **register** | **unregister** | **help** }
index e7d9493..bb23f55 100644 (file)
@@ -18,15 +18,15 @@ SYNOPSIS
 
        *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** }
 
-       *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
-       | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-V** | **--version** } |
+               { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
 
        *MAP-COMMANDS* :=
-       { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
-       | **delete** | **pin** | **event_pipe** | **help** }
+       { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** |
+               **delete** | **pin** | **event_pipe** | **help** }
 
-       *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
-       | **load** | **attach** | **detach** | **help** }
+       *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** |
+               **load** | **attach** | **detach** | **help** }
 
        *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
 
index cc33c58..88e2bcf 100644 (file)
@@ -260,7 +260,8 @@ _bpftool()
 
     # Deal with options
     if [[ ${words[cword]} == -* ]]; then
-        local c='--version --json --pretty --bpffs --mapcompat --debug'
+        local c='--version --json --pretty --bpffs --mapcompat --debug \
+              --use-loader --base-btf'
         COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
         return 0
     fi
@@ -278,7 +279,7 @@ _bpftool()
             _sysfs_get_netdevs
             return 0
             ;;
-        file|pinned)
+        file|pinned|-B|--base-btf)
             _filedir
             return 0
             ;;
@@ -291,7 +292,8 @@ _bpftool()
     # Remove all options so completions don't have to deal with them.
     local i
     for (( i=1; i < ${#words[@]}; )); do
-        if [[ ${words[i]::1} == - ]]; then
+        if [[ ${words[i]::1} == - ]] &&
+            [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then
             words=( "${words[@]:0:i}" "${words[@]:i+1}" )
             [[ $i -le $cword ]] && cword=$(( cword - 1 ))
         else
@@ -343,7 +345,8 @@ _bpftool()
 
             local PROG_TYPE='id pinned tag name'
             local MAP_TYPE='id pinned name'
-            local METRIC_TYPE='cycles instructions l1d_loads llc_misses'
+            local METRIC_TYPE='cycles instructions l1d_loads llc_misses \
+                itlb_misses dtlb_misses'
             case $command in
                 show|list)
                     [[ $prev != "$command" ]] && return 0
@@ -404,8 +407,10 @@ _bpftool()
                             return 0
                             ;;
                         5)
-                            COMPREPLY=( $( compgen -W 'msg_verdict stream_verdict \
-                                stream_parser flow_dissector' -- "$cur" ) )
+                            local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \
+                                skb_verdict stream_verdict stream_parser \
+                                flow_dissector'
+                            COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) )
                             return 0
                             ;;
                         6)
@@ -464,7 +469,7 @@ _bpftool()
 
                     case $prev in
                         type)
-                            COMPREPLY=( $( compgen -W "socket kprobe \
+                            local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \
                                 kretprobe classifier flow_dissector \
                                 action tracepoint raw_tracepoint \
                                 xdp perf_event cgroup/skb cgroup/sock \
@@ -479,8 +484,8 @@ _bpftool()
                                 cgroup/post_bind4 cgroup/post_bind6 \
                                 cgroup/sysctl cgroup/getsockopt \
                                 cgroup/setsockopt cgroup/sock_release struct_ops \
-                                fentry fexit freplace sk_lookup" -- \
-                                                   "$cur" ) )
+                                fentry fexit freplace sk_lookup'
+                            COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_LOAD_TYPES" -- "$cur" ) )
                             return 0
                             ;;
                         id)
@@ -698,15 +703,15 @@ _bpftool()
                             return 0
                             ;;
                         type)
-                            COMPREPLY=( $( compgen -W 'hash array prog_array \
-                                perf_event_array percpu_hash percpu_array \
-                                stack_trace cgroup_array lru_hash \
+                            local BPFTOOL_MAP_CREATE_TYPES='hash array \
+                                prog_array perf_event_array percpu_hash \
+                                percpu_array stack_trace cgroup_array lru_hash \
                                 lru_percpu_hash lpm_trie array_of_maps \
                                 hash_of_maps devmap devmap_hash sockmap cpumap \
                                 xskmap sockhash cgroup_storage reuseport_sockarray \
                                 percpu_cgroup_storage queue stack sk_storage \
-                                struct_ops inode_storage task_storage' -- \
-                                                   "$cur" ) )
+                                struct_ops inode_storage task_storage ringbuf'
+                            COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) )
                             return 0
                             ;;
                         key|value|flags|entries)
@@ -1017,34 +1022,37 @@ _bpftool()
                     return 0
                     ;;
                 attach|detach)
-                    local ATTACH_TYPES='ingress egress sock_create sock_ops \
-                        device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
+                    local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \
+                        sock_create sock_ops device \
+                        bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
                         getpeername4 getpeername6 getsockname4 getsockname6 \
                         sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
                         setsockopt sock_release'
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag name'
-                    case $prev in
-                        $command)
-                            _filedir
-                            return 0
-                            ;;
-                        ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
-                        post_bind4|post_bind6|connect4|connect6|getpeername4|\
-                        getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
-                        recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
+                    # Check for $prev = $command first
+                    if [ $prev = $command ]; then
+                        _filedir
+                        return 0
+                    # Then check for attach type. This is done outside of the
+                    # "case $prev in" to avoid writing the whole list of attach
+                    # types again as pattern to match (where we cannot reuse
+                    # our variable).
+                    elif [[ $BPFTOOL_CGROUP_ATTACH_TYPES =~ $prev ]]; then
                             COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
                                 "$cur" ) )
                             return 0
-                            ;;
+                    fi
+                    # case/esac for the other cases
+                    case $prev in
                         id)
                             _bpftool_get_prog_ids
                             return 0
                             ;;
                         *)
-                            if ! _bpftool_search_list "$ATTACH_TYPES"; then
-                                COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \
-                                    "$cur" ) )
+                            if ! _bpftool_search_list "$BPFTOOL_CGROUP_ATTACH_TYPES"; then
+                                COMPREPLY=( $( compgen -W \
+                                    "$BPFTOOL_CGROUP_ATTACH_TYPES" -- "$cur" ) )
                             elif [[ "$command" == "attach" ]]; then
                                 # We have an attach type on the command line,
                                 # but it is not the previous word, or
index 385d5c9..f7e5ff3 100644 (file)
@@ -580,16 +580,12 @@ static int do_dump(int argc, char **argv)
        }
 
        if (!btf) {
-               err = btf__get_from_id(btf_id, &btf);
+               btf = btf__load_from_kernel_by_id_split(btf_id, base_btf);
+               err = libbpf_get_error(btf);
                if (err) {
                        p_err("get btf by id (%u): %s", btf_id, strerror(err));
                        goto done;
                }
-               if (!btf) {
-                       err = -ENOENT;
-                       p_err("can't find btf with ID (%u)", btf_id);
-                       goto done;
-               }
        }
 
        if (dump_c) {
@@ -985,7 +981,8 @@ static int do_help(int argc, char **argv)
                "       FORMAT  := { raw | c }\n"
                "       " HELP_SPEC_MAP "\n"
                "       " HELP_SPEC_PROGRAM "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-B|--base-btf} }\n"
                "",
                bin_name, "btf");
 
index 7ca54d0..9c25286 100644 (file)
@@ -64,8 +64,10 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
        }
        info = &prog_info->info;
 
-       if (!info->btf_id || !info->nr_func_info ||
-           btf__get_from_id(info->btf_id, &prog_btf))
+       if (!info->btf_id || !info->nr_func_info)
+               goto print;
+       prog_btf = btf__load_from_kernel_by_id(info->btf_id);
+       if (libbpf_get_error(prog_btf))
                goto print;
        finfo = u64_to_ptr(info->func_info);
        func_type = btf__type_by_id(prog_btf, finfo->type_id);
index 6e53b1d..3571a28 100644 (file)
@@ -501,7 +501,8 @@ static int do_help(int argc, char **argv)
                HELP_SPEC_ATTACH_TYPES "\n"
                "       " HELP_SPEC_ATTACH_FLAGS "\n"
                "       " HELP_SPEC_PROGRAM "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} }\n"
                "",
                bin_name, argv[-2]);
 
index dc6daa1..d42d930 100644 (file)
@@ -67,6 +67,12 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
        [BPF_MODIFY_RETURN]             = "mod_ret",
        [BPF_LSM_MAC]                   = "lsm_mac",
        [BPF_SK_LOOKUP]                 = "sk_lookup",
+       [BPF_TRACE_ITER]                = "trace_iter",
+       [BPF_XDP_DEVMAP]                = "xdp_devmap",
+       [BPF_XDP_CPUMAP]                = "xdp_cpumap",
+       [BPF_XDP]                       = "xdp",
+       [BPF_SK_REUSEPORT_SELECT]       = "sk_skb_reuseport_select",
+       [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]    = "sk_skb_reuseport_select_or_migrate",
 };
 
 void p_err(const char *fmt, ...)
index 40a88df..7f36385 100644 (file)
@@ -1005,6 +1005,7 @@ static int do_help(int argc, char **argv)
                "       %1$s %2$s help\n"
                "\n"
                "       COMPONENT := { kernel | dev NAME }\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                "",
                bin_name, argv[-2]);
 
index 1d71ff8..d40d92b 100644 (file)
@@ -1026,7 +1026,8 @@ static int do_help(int argc, char **argv)
                "       %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
                "       %1$s %2$s help\n"
                "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-L|--use-loader} }\n"
                "",
                bin_name, "gen");
 
index 3b1aad7..84a9b01 100644 (file)
@@ -97,7 +97,9 @@ static int do_help(int argc, char **argv)
        fprintf(stderr,
                "Usage: %1$s %2$s pin OBJ PATH [map MAP]\n"
                "       %1$s %2$s help\n"
+               "\n"
                "       " HELP_SPEC_MAP "\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                "",
                bin_name, "iter");
 
index e77e152..8cc3e36 100644 (file)
@@ -401,7 +401,8 @@ static int do_help(int argc, char **argv)
                "       %1$s %2$s help\n"
                "\n"
                "       " HELP_SPEC_LINK "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} | {-n|--nomount} }\n"
                "",
                bin_name, argv[-2]);
 
index 3ddfd48..02eaaf0 100644 (file)
@@ -64,7 +64,8 @@ static int do_help(int argc, char **argv)
                "       %s version\n"
                "\n"
                "       OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-V|--version} }\n"
                "",
                bin_name, bin_name, bin_name);
 
index c1cf297..90caa42 100644 (file)
@@ -57,8 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
 #define HELP_SPEC_PROGRAM                                              \
        "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }"
 #define HELP_SPEC_OPTIONS                                              \
-       "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n"   \
-       "\t            {-m|--mapcompat} | {-n|--nomount} }"
+       "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
 #define HELP_SPEC_MAP                                                  \
        "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
 #define HELP_SPEC_LINK                                                 \
index 09ae038..407071d 100644 (file)
@@ -807,10 +807,11 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info)
        } else if (info->btf_value_type_id) {
                int err;
 
-               err = btf__get_from_id(info->btf_id, &btf);
-               if (err || !btf) {
+               btf = btf__load_from_kernel_by_id(info->btf_id);
+               err = libbpf_get_error(btf);
+               if (err) {
                        p_err("failed to get btf");
-                       btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH);
+                       btf = ERR_PTR(err);
                }
        }
 
@@ -1039,11 +1040,10 @@ static void print_key_value(struct bpf_map_info *info, void *key,
                            void *value)
 {
        json_writer_t *btf_wtr;
-       struct btf *btf = NULL;
-       int err;
+       struct btf *btf;
 
-       err = btf__get_from_id(info->btf_id, &btf);
-       if (err) {
+       btf = btf__load_from_kernel_by_id(info->btf_id);
+       if (libbpf_get_error(btf)) {
                p_err("failed to get btf");
                return;
        }
@@ -1466,8 +1466,9 @@ static int do_help(int argc, char **argv)
                "                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
                "                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
                "                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
-               "                 task_storage }\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "                 task_storage }\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} | {-n|--nomount} }\n"
                "",
                bin_name, argv[-2]);
 
index f836d11..6490537 100644 (file)
@@ -729,6 +729,7 @@ static int do_help(int argc, char **argv)
                "\n"
                "       " HELP_SPEC_PROGRAM "\n"
                "       ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                "\n"
                "Note: Only xdp and tc attachments are supported now.\n"
                "      For progs attached to cgroups, use \"bpftool cgroup\"\n"
index ad23934..50de087 100644 (file)
@@ -231,7 +231,10 @@ static int do_show(int argc, char **argv)
 static int do_help(int argc, char **argv)
 {
        fprintf(stderr,
-               "Usage: %1$s %2$s { show | list | help }\n"
+               "Usage: %1$s %2$s { show | list }\n"
+               "       %1$s %2$s help }\n"
+               "\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                "",
                bin_name, argv[-2]);
 
index cc48726..9c3e343 100644 (file)
@@ -249,10 +249,10 @@ static void show_prog_metadata(int fd, __u32 num_maps)
        struct bpf_map_info map_info;
        struct btf_var_secinfo *vsi;
        bool printed_header = false;
-       struct btf *btf = NULL;
        unsigned int i, vlen;
        void *value = NULL;
        const char *name;
+       struct btf *btf;
        int err;
 
        if (!num_maps)
@@ -263,8 +263,8 @@ static void show_prog_metadata(int fd, __u32 num_maps)
        if (!value)
                return;
 
-       err = btf__get_from_id(map_info.btf_id, &btf);
-       if (err || !btf)
+       btf = btf__load_from_kernel_by_id(map_info.btf_id);
+       if (libbpf_get_error(btf))
                goto out_free;
 
        t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
@@ -646,9 +646,12 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
                member_len = info->xlated_prog_len;
        }
 
-       if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) {
-               p_err("failed to get btf");
-               return -1;
+       if (info->btf_id) {
+               btf = btf__load_from_kernel_by_id(info->btf_id);
+               if (libbpf_get_error(btf)) {
+                       p_err("failed to get btf");
+                       return -1;
+               }
        }
 
        func_info = u64_to_ptr(info->func_info);
@@ -781,6 +784,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
                kernel_syms_destroy(&dd);
        }
 
+       btf__free(btf);
+
        return 0;
 }
 
@@ -2002,8 +2007,8 @@ static char *profile_target_name(int tgt_fd)
        struct bpf_prog_info_linear *info_linear;
        struct bpf_func_info *func_info;
        const struct btf_type *t;
+       struct btf *btf = NULL;
        char *name = NULL;
-       struct btf *btf;
 
        info_linear = bpf_program__get_prog_info_linear(
                tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
@@ -2012,12 +2017,17 @@ static char *profile_target_name(int tgt_fd)
                return NULL;
        }
 
-       if (info_linear->info.btf_id == 0 ||
-           btf__get_from_id(info_linear->info.btf_id, &btf)) {
+       if (info_linear->info.btf_id == 0) {
                p_err("prog FD %d doesn't have valid btf", tgt_fd);
                goto out;
        }
 
+       btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+       if (libbpf_get_error(btf)) {
+               p_err("failed to load btf for prog FD %d", tgt_fd);
+               goto out;
+       }
+
        func_info = u64_to_ptr(info_linear->info.func_info);
        t = btf__type_by_id(btf, func_info[0].type_id);
        if (!t) {
@@ -2027,6 +2037,7 @@ static char *profile_target_name(int tgt_fd)
        }
        name = strdup(btf__name_by_offset(btf, t->name_off));
 out:
+       btf__free(btf);
        free(info_linear);
        return name;
 }
@@ -2245,10 +2256,12 @@ static int do_help(int argc, char **argv)
                "                 cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
                "                 cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
                "                 struct_ops | fentry | fexit | freplace | sk_lookup }\n"
-               "       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
-               "                        flow_dissector }\n"
+               "       ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n"
+               "                        stream_parser | flow_dissector }\n"
                "       METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
+               "                    {-L|--use-loader} }\n"
                "",
                bin_name, argv[-2]);
 
index b58b91f..ab2d229 100644 (file)
@@ -572,8 +572,8 @@ static int do_help(int argc, char **argv)
                "       %1$s %2$s unregister STRUCT_OPS_MAP\n"
                "       %1$s %2$s help\n"
                "\n"
-               "       OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
                "       STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                "",
                bin_name, argv[-2]);
 
index 3ad9301..de6365b 100644 (file)
@@ -291,7 +291,7 @@ static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
        sh->sh_addralign = expected;
 
        if (gelf_update_shdr(scn, sh) == 0) {
-               printf("FAILED cannot update section header: %s\n",
+               pr_err("FAILED cannot update section header: %s\n",
                        elf_errmsg(-1));
                return -1;
        }
@@ -317,6 +317,7 @@ static int elf_collect(struct object *obj)
 
        elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
        if (!elf) {
+               close(fd);
                pr_err("FAILED cannot create ELF descriptor: %s\n",
                        elf_errmsg(-1));
                return -1;
@@ -484,7 +485,7 @@ static int symbols_resolve(struct object *obj)
        err = libbpf_get_error(btf);
        if (err) {
                pr_err("FAILED: load BTF from %s: %s\n",
-                       obj->path, strerror(-err));
+                       obj->btf ?: obj->path, strerror(-err));
                return -1;
        }
 
@@ -555,8 +556,7 @@ static int id_patch(struct object *obj, struct btf_id *id)
        int i;
 
        if (!id->id) {
-               pr_err("FAILED unresolved symbol %s\n", id->name);
-               return -EINVAL;
+               pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name);
        }
 
        for (i = 0; i < id->addr_cnt; i++) {
@@ -734,8 +734,9 @@ int main(int argc, const char **argv)
 
        err = 0;
 out:
-       if (obj.efile.elf)
+       if (obj.efile.elf) {
                elf_end(obj.efile.elf);
-       close(obj.efile.fd);
+               close(obj.efile.fd);
+       }
        return err;
 }
index bf9252c..791f31d 100644 (file)
@@ -84,7 +84,7 @@ struct bpf_lpm_trie_key {
 
 struct bpf_cgroup_storage_key {
        __u64   cgroup_inode_id;        /* cgroup inode id */
-       __u32   attach_type;            /* program attach type */
+       __u32   attach_type;            /* program attach type (enum bpf_attach_type) */
 };
 
 union bpf_iter_link_info {
@@ -324,9 +324,6 @@ union bpf_iter_link_info {
  *             **BPF_PROG_TYPE_SK_LOOKUP**
  *                     *data_in* and *data_out* must be NULL.
  *
- *             **BPF_PROG_TYPE_XDP**
- *                     *ctx_in* and *ctx_out* must be NULL.
- *
  *             **BPF_PROG_TYPE_RAW_TRACEPOINT**,
  *             **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
  *
@@ -996,6 +993,7 @@ enum bpf_attach_type {
        BPF_SK_SKB_VERDICT,
        BPF_SK_REUSEPORT_SELECT,
        BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
+       BPF_PERF_EVENT,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -1009,6 +1007,7 @@ enum bpf_link_type {
        BPF_LINK_TYPE_ITER = 4,
        BPF_LINK_TYPE_NETNS = 5,
        BPF_LINK_TYPE_XDP = 6,
+       BPF_LINK_TYPE_PERF_EVENT = 7,
 
        MAX_BPF_LINK_TYPE,
 };
@@ -1449,6 +1448,13 @@ union bpf_attr {
                                __aligned_u64   iter_info;      /* extra bpf_iter_link_info */
                                __u32           iter_info_len;  /* iter_info length */
                        };
+                       struct {
+                               /* black box user-provided value passed through
+                                * to BPF program at the execution time and
+                                * accessible through bpf_get_attach_cookie() BPF helper
+                                */
+                               __u64           bpf_cookie;
+                       } perf_event;
                };
        } link_create;
 
@@ -3249,7 +3255,7 @@ union bpf_attr {
  * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
  *     Description
  *             Select a **SO_REUSEPORT** socket from a
- *             **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ *             **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*.
  *             It checks the selected socket is matching the incoming
  *             request in the socket buffer.
  *     Return
@@ -4780,6 +4786,97 @@ union bpf_attr {
  *             Execute close syscall for given FD.
  *     Return
  *             A syscall result.
+ *
+ * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags)
+ *     Description
+ *             Initialize the timer.
+ *             First 4 bits of *flags* specify clockid.
+ *             Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
+ *             All other bits of *flags* are reserved.
+ *             The verifier will reject the program if *timer* is not from
+ *             the same *map*.
+ *     Return
+ *             0 on success.
+ *             **-EBUSY** if *timer* is already initialized.
+ *             **-EINVAL** if invalid *flags* are passed.
+ *             **-EPERM** if *timer* is in a map that doesn't have any user references.
+ *             The user space should either hold a file descriptor to a map with timers
+ *             or pin such map in bpffs. When map is unpinned or file descriptor is
+ *             closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn)
+ *     Description
+ *             Configure the timer to call *callback_fn* static function.
+ *     Return
+ *             0 on success.
+ *             **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ *             **-EPERM** if *timer* is in a map that doesn't have any user references.
+ *             The user space should either hold a file descriptor to a map with timers
+ *             or pin such map in bpffs. When map is unpinned or file descriptor is
+ *             closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags)
+ *     Description
+ *             Set timer expiration N nanoseconds from the current time. The
+ *             configured callback will be invoked in soft irq context on some cpu
+ *             and will not repeat unless another bpf_timer_start() is made.
+ *             In such case the next invocation can migrate to a different cpu.
+ *             Since struct bpf_timer is a field inside map element the map
+ *             owns the timer. The bpf_timer_set_callback() will increment refcnt
+ *             of BPF program to make sure that callback_fn code stays valid.
+ *             When user space reference to a map reaches zero all timers
+ *             in a map are cancelled and corresponding program's refcnts are
+ *             decremented. This is done to make sure that Ctrl-C of a user
+ *             process doesn't leave any timers running. If map is pinned in
+ *             bpffs the callback_fn can re-arm itself indefinitely.
+ *             bpf_map_update/delete_elem() helpers and user space sys_bpf commands
+ *             cancel and free the timer in the given map element.
+ *             The map can contain timers that invoke callback_fn-s from different
+ *             programs. The same callback_fn can serve different timers from
+ *             different maps if key/value layout matches across maps.
+ *             Every bpf_timer_set_callback() can have different callback_fn.
+ *
+ *     Return
+ *             0 on success.
+ *             **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
+ *             or invalid *flags* are passed.
+ *
+ * long bpf_timer_cancel(struct bpf_timer *timer)
+ *     Description
+ *             Cancel the timer and wait for callback_fn to finish if it was running.
+ *     Return
+ *             0 if the timer was not active.
+ *             1 if the timer was active.
+ *             **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ *             **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its
+ *             own timer which would have led to a deadlock otherwise.
+ *
+ * u64 bpf_get_func_ip(void *ctx)
+ *     Description
+ *             Get address of the traced function (for tracing and kprobe programs).
+ *     Return
+ *             Address of the traced function.
+ *
+ * u64 bpf_get_attach_cookie(void *ctx)
+ *     Description
+ *             Get bpf_cookie value provided (optionally) during the program
+ *             attachment. It might be different for each individual
+ *             attachment, even if BPF program itself is the same.
+ *             Expects BPF program context *ctx* as a first argument.
+ *
+ *             Supported for the following program types:
+ *                     - kprobe/uprobe;
+ *                     - tracepoint;
+ *                     - perf_event.
+ *     Return
+ *             Value specified by user at BPF link creation/attachment time
+ *             or 0, if it was not specified.
+ *
+ * long bpf_task_pt_regs(struct task_struct *task)
+ *     Description
+ *             Get the struct pt_regs associated with **task**.
+ *     Return
+ *             A pointer to struct pt_regs.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -4951,6 +5048,13 @@ union bpf_attr {
        FN(sys_bpf),                    \
        FN(btf_find_by_name_kind),      \
        FN(sys_close),                  \
+       FN(timer_init),                 \
+       FN(timer_set_callback),         \
+       FN(timer_start),                \
+       FN(timer_cancel),               \
+       FN(get_func_ip),                \
+       FN(get_attach_cookie),          \
+       FN(task_pt_regs),               \
        /* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6077,6 +6181,11 @@ struct bpf_spin_lock {
        __u32   val;
 };
 
+struct bpf_timer {
+       __u64 :64;
+       __u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
        __u32   write;          /* Sysctl is being read (= 0) or written (= 1).
                                 * Allows 1,2,4-byte read, but no write.
index c86c3e9..47afae3 100644 (file)
@@ -48,4 +48,57 @@ struct ethtool_channels {
        __u32   combined_count;
 };
 
+#define ETHTOOL_FWVERS_LEN     32
+#define ETHTOOL_BUSINFO_LEN    32
+#define ETHTOOL_EROMVERS_LEN   32
+
+/**
+ * struct ethtool_drvinfo - general driver and device information
+ * @cmd: Command number = %ETHTOOL_GDRVINFO
+ * @driver: Driver short name.  This should normally match the name
+ *     in its bus driver structure (e.g. pci_driver::name).  Must
+ *     not be an empty string.
+ * @version: Driver version string; may be an empty string
+ * @fw_version: Firmware version string; may be an empty string
+ * @erom_version: Expansion ROM version string; may be an empty string
+ * @bus_info: Device bus address.  This should match the dev_name()
+ *     string for the underlying bus device, if there is one.  May be
+ *     an empty string.
+ * @reserved2: Reserved for future use; see the note on reserved space.
+ * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
+ *     %ETHTOOL_SPFLAGS commands; also the number of strings in the
+ *     %ETH_SS_PRIV_FLAGS set
+ * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS
+ *     command; also the number of strings in the %ETH_SS_STATS set
+ * @testinfo_len: Number of results returned by the %ETHTOOL_TEST
+ *     command; also the number of strings in the %ETH_SS_TEST set
+ * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM
+ *     and %ETHTOOL_SEEPROM commands, in bytes
+ * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS
+ *     command, in bytes
+ *
+ * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
+ * strings in any string set (from Linux 2.6.34).
+ *
+ * Drivers should set at most @driver, @version, @fw_version and
+ * @bus_info in their get_drvinfo() implementation.  The ethtool
+ * core fills in the other fields using other driver operations.
+ */
+struct ethtool_drvinfo {
+       __u32   cmd;
+       char    driver[32];
+       char    version[32];
+       char    fw_version[ETHTOOL_FWVERS_LEN];
+       char    bus_info[ETHTOOL_BUSINFO_LEN];
+       char    erom_version[ETHTOOL_EROMVERS_LEN];
+       char    reserved2[12];
+       __u32   n_priv_flags;
+       __u32   n_stats;
+       __u32   testinfo_len;
+       __u32   eedump_len;
+       __u32   regdump_len;
+};
+
+#define ETHTOOL_GDRVINFO       0x00000003
+
 #endif /* _UAPI_LINUX_ETHTOOL_H */
index d208b2a..b3610fd 100644 (file)
@@ -230,6 +230,7 @@ enum {
        IFLA_INET6_ICMP6STATS,  /* statistics (icmpv6)          */
        IFLA_INET6_TOKEN,       /* device token                 */
        IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+       IFLA_INET6_RA_MTU,      /* mtu carried in the RA message */
        __IFLA_INET6_MAX
 };
 
@@ -653,6 +654,7 @@ enum {
        IFLA_BOND_AD_ACTOR_SYSTEM,
        IFLA_BOND_TLB_DYNAMIC_LB,
        IFLA_BOND_PEER_NOTIF_DELAY,
+       IFLA_BOND_AD_LACP_ACTIVE,
        __IFLA_BOND_MAX,
 };
 
index 430f687..94f0a14 100644 (file)
@@ -1,3 +1,3 @@
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
            netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
-           btf_dump.o ringbuf.o strset.o linker.o gen_loader.o
+           btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o
index ec14aa7..74c3b73 100644 (file)
@@ -4,8 +4,9 @@
 RM ?= rm
 srctree = $(abs_srctree)
 
+VERSION_SCRIPT := libbpf.map
 LIBBPF_VERSION := $(shell \
-       grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
+       grep -oE '^LIBBPF_([0-9.]+)' $(VERSION_SCRIPT) | \
        sort -rV | head -n1 | cut -d'_' -f2)
 LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
 
@@ -110,7 +111,6 @@ SHARED_OBJDIR       := $(OUTPUT)sharedobjs/
 STATIC_OBJDIR  := $(OUTPUT)staticobjs/
 BPF_IN_SHARED  := $(SHARED_OBJDIR)libbpf-in.o
 BPF_IN_STATIC  := $(STATIC_OBJDIR)libbpf-in.o
-VERSION_SCRIPT := libbpf.map
 BPF_HELPER_DEFS        := $(OUTPUT)bpf_helper_defs.h
 
 LIB_TARGET     := $(addprefix $(OUTPUT),$(LIB_TARGET))
@@ -163,10 +163,10 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
 
 $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
 
-$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
+$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT)
        $(QUIET_LINK)$(CC) $(LDFLAGS) \
                --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
-               -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@
+               -Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@
        @ln -sf $(@F) $(OUTPUT)libbpf.so
        @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
 
@@ -181,7 +181,7 @@ $(OUTPUT)libbpf.pc:
 
 check: check_abi
 
-check_abi: $(OUTPUT)libbpf.so
+check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
        @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then  \
                echo "Warning: Num of global symbols in $(BPF_IN_SHARED)"        \
                     "($(GLOBAL_SYM_COUNT)) does NOT match with num of"  \
index 86dcac4..2401fad 100644 (file)
@@ -684,8 +684,13 @@ int bpf_link_create(int prog_fd, int target_fd,
        iter_info_len = OPTS_GET(opts, iter_info_len, 0);
        target_btf_id = OPTS_GET(opts, target_btf_id, 0);
 
-       if (iter_info_len && target_btf_id)
-               return libbpf_err(-EINVAL);
+       /* validate we don't have unexpected combinations of non-zero fields */
+       if (iter_info_len || target_btf_id) {
+               if (iter_info_len && target_btf_id)
+                       return libbpf_err(-EINVAL);
+               if (!OPTS_ZEROED(opts, target_btf_id))
+                       return libbpf_err(-EINVAL);
+       }
 
        memset(&attr, 0, sizeof(attr));
        attr.link_create.prog_fd = prog_fd;
@@ -693,14 +698,27 @@ int bpf_link_create(int prog_fd, int target_fd,
        attr.link_create.attach_type = attach_type;
        attr.link_create.flags = OPTS_GET(opts, flags, 0);
 
-       if (iter_info_len) {
-               attr.link_create.iter_info =
-                       ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
-               attr.link_create.iter_info_len = iter_info_len;
-       } else if (target_btf_id) {
+       if (target_btf_id) {
                attr.link_create.target_btf_id = target_btf_id;
+               goto proceed;
        }
 
+       switch (attach_type) {
+       case BPF_TRACE_ITER:
+               attr.link_create.iter_info = ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+               attr.link_create.iter_info_len = iter_info_len;
+               break;
+       case BPF_PERF_EVENT:
+               attr.link_create.perf_event.bpf_cookie = OPTS_GET(opts, perf_event.bpf_cookie, 0);
+               if (!OPTS_ZEROED(opts, perf_event))
+                       return libbpf_err(-EINVAL);
+               break;
+       default:
+               if (!OPTS_ZEROED(opts, flags))
+                       return libbpf_err(-EINVAL);
+               break;
+       }
+proceed:
        fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
        return libbpf_err_errno(fd);
 }
index 4f758f8..6fffb3c 100644 (file)
@@ -177,8 +177,14 @@ struct bpf_link_create_opts {
        union bpf_iter_link_info *iter_info;
        __u32 iter_info_len;
        __u32 target_btf_id;
+       union {
+               struct {
+                       __u64 bpf_cookie;
+               } perf_event;
+       };
+       size_t :0;
 };
-#define bpf_link_create_opts__last_field target_btf_id
+#define bpf_link_create_opts__last_field perf_event
 
 LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
                               enum bpf_attach_type attach_type,
index 7ff3d5c..77dc24d 100644 (file)
@@ -1179,7 +1179,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
 
 static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
 
-int btf__load(struct btf *btf)
+int btf__load_into_kernel(struct btf *btf)
 {
        __u32 log_buf_size = 0, raw_size;
        char *log_buf = NULL;
@@ -1227,6 +1227,7 @@ done:
        free(log_buf);
        return libbpf_err(err);
 }
+int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));
 
 int btf__fd(const struct btf *btf)
 {
@@ -1381,21 +1382,35 @@ exit_free:
        return btf;
 }
 
-int btf__get_from_id(__u32 id, struct btf **btf)
+struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
 {
-       struct btf *res;
-       int err, btf_fd;
+       struct btf *btf;
+       int btf_fd;
 
-       *btf = NULL;
        btf_fd = bpf_btf_get_fd_by_id(id);
        if (btf_fd < 0)
-               return libbpf_err(-errno);
-
-       res = btf_get_from_fd(btf_fd, NULL);
-       err = libbpf_get_error(res);
+               return libbpf_err_ptr(-errno);
 
+       btf = btf_get_from_fd(btf_fd, base_btf);
        close(btf_fd);
 
+       return libbpf_ptr(btf);
+}
+
+struct btf *btf__load_from_kernel_by_id(__u32 id)
+{
+       return btf__load_from_kernel_by_id_split(id, NULL);
+}
+
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+       struct btf *res;
+       int err;
+
+       *btf = NULL;
+       res = btf__load_from_kernel_by_id(id);
+       err = libbpf_get_error(res);
+
        if (err)
                return libbpf_err(err);
 
@@ -4020,7 +4035,7 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
                 */
                if (d->hypot_adjust_canon)
                        continue;
-               
+
                if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
                        d->map[t_id] = c_id;
 
@@ -4393,7 +4408,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
  * Probe few well-known locations for vmlinux kernel image and try to load BTF
  * data out of it to use for target BTF.
  */
-struct btf *libbpf_find_kernel_btf(void)
+struct btf *btf__load_vmlinux_btf(void)
 {
        struct {
                const char *path_fmt;
@@ -4439,6 +4454,16 @@ struct btf *libbpf_find_kernel_btf(void)
        return libbpf_err_ptr(-ESRCH);
 }
 
+struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf")));
+
+struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf)
+{
+       char path[80];
+
+       snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name);
+       return btf__parse_split(path, vmlinux_btf);
+}
+
 int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
 {
        int i, n, err;
index b54f1c3..4a711f9 100644 (file)
@@ -44,8 +44,17 @@ LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_b
 LIBBPF_API struct btf *btf__parse_raw(const char *path);
 LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf);
 
+LIBBPF_API struct btf *btf__load_vmlinux_btf(void);
+LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf);
+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
+LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
+LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
+LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
+
 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
 LIBBPF_API int btf__load(struct btf *btf);
+LIBBPF_API int btf__load_into_kernel(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
                                   const char *type_name);
 LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
@@ -66,7 +75,6 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
 LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
 LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
                                    __u32 expected_key_size,
                                    __u32 expected_value_size,
@@ -89,8 +97,6 @@ int btf_ext__reloc_line_info(const struct btf *btf,
 LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
 LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
 
-LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
-
 LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf,
@@ -184,6 +190,25 @@ LIBBPF_API int
 btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
                         const struct btf_dump_emit_type_decl_opts *opts);
 
+
+struct btf_dump_type_data_opts {
+       /* size of this struct, for forward/backward compatibility */
+       size_t sz;
+       const char *indent_str;
+       int indent_level;
+       /* below match "show" flags for bpf_show_snprintf() */
+       bool compact;           /* no newlines/indentation */
+       bool skip_names;        /* skip member/type names */
+       bool emit_zeroes;       /* show 0-valued fields */
+       size_t :0;
+};
+#define btf_dump_type_data_opts__last_field emit_zeroes
+
+LIBBPF_API int
+btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+                        const void *data, size_t data_sz,
+                        const struct btf_dump_type_data_opts *opts);
+
 /*
  * A set of helpers for easier BTF types handling
  */
index 5dc6b51..e4b483f 100644 (file)
@@ -10,6 +10,8 @@
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
+#include <ctype.h>
+#include <endian.h>
 #include <errno.h>
 #include <linux/err.h>
 #include <linux/btf.h>
@@ -53,6 +55,26 @@ struct btf_dump_type_aux_state {
        __u8 referenced: 1;
 };
 
+/* indent string length; one indent string is added for each indent level */
+#define BTF_DATA_INDENT_STR_LEN                        32
+
+/*
+ * Common internal data for BTF type data dump operations.
+ */
+struct btf_dump_data {
+       const void *data_end;           /* end of valid data to show */
+       bool compact;
+       bool skip_names;
+       bool emit_zeroes;
+       __u8 indent_lvl;        /* base indent level */
+       char indent_str[BTF_DATA_INDENT_STR_LEN];
+       /* below are used during iteration */
+       int depth;
+       bool is_array_member;
+       bool is_array_terminated;
+       bool is_array_char;
+};
+
 struct btf_dump {
        const struct btf *btf;
        const struct btf_ext *btf_ext;
@@ -60,6 +82,7 @@ struct btf_dump {
        struct btf_dump_opts opts;
        int ptr_sz;
        bool strip_mods;
+       bool skip_anon_defs;
        int last_id;
 
        /* per-type auxiliary state */
@@ -89,6 +112,10 @@ struct btf_dump {
         * name occurrences
         */
        struct hashmap *ident_names;
+       /*
+        * data for typed display; allocated if needed.
+        */
+       struct btf_dump_data *typed_dump;
 };
 
 static size_t str_hash_fn(const void *key, void *ctx)
@@ -765,11 +792,11 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
                break;
        case BTF_KIND_FUNC_PROTO: {
                const struct btf_param *p = btf_params(t);
-               __u16 vlen = btf_vlen(t);
+               __u16 n = btf_vlen(t);
                int i;
 
                btf_dump_emit_type(d, t->type, cont_id);
-               for (i = 0; i < vlen; i++, p++)
+               for (i = 0; i < n; i++, p++)
                        btf_dump_emit_type(d, p->type, cont_id);
 
                break;
@@ -852,8 +879,9 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
 static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
                                     const struct btf_type *t)
 {
-       btf_dump_printf(d, "%s %s",
+       btf_dump_printf(d, "%s%s%s",
                        btf_is_struct(t) ? "struct" : "union",
+                       t->name_off ? " " : "",
                        btf_dump_type_name(d, id));
 }
 
@@ -1259,7 +1287,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
                case BTF_KIND_UNION:
                        btf_dump_emit_mods(d, decls);
                        /* inline anonymous struct/union */
-                       if (t->name_off == 0)
+                       if (t->name_off == 0 && !d->skip_anon_defs)
                                btf_dump_emit_struct_def(d, id, t, lvl);
                        else
                                btf_dump_emit_struct_fwd(d, id, t);
@@ -1267,7 +1295,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
                case BTF_KIND_ENUM:
                        btf_dump_emit_mods(d, decls);
                        /* inline anonymous enum */
-                       if (t->name_off == 0)
+                       if (t->name_off == 0 && !d->skip_anon_defs)
                                btf_dump_emit_enum_def(d, id, t, lvl);
                        else
                                btf_dump_emit_enum_fwd(d, id, t);
@@ -1392,6 +1420,39 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
        btf_dump_emit_name(d, fname, last_was_ptr);
 }
 
+/* show type name as (type_name) */
+static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
+                                   bool top_level)
+{
+       const struct btf_type *t;
+
+       /* for array members, we don't bother emitting type name for each
+        * member to avoid the redundancy of
+        * .name = (char[4])[(char)'f',(char)'o',(char)'o',]
+        */
+       if (d->typed_dump->is_array_member)
+               return;
+
+       /* avoid type name specification for variable/section; it will be done
+        * for the associated variable value(s).
+        */
+       t = btf__type_by_id(d->btf, id);
+       if (btf_is_var(t) || btf_is_datasec(t))
+               return;
+
+       if (top_level)
+               btf_dump_printf(d, "(");
+
+       d->skip_anon_defs = true;
+       d->strip_mods = true;
+       btf_dump_emit_type_decl(d, id, "", 0);
+       d->strip_mods = false;
+       d->skip_anon_defs = false;
+
+       if (top_level)
+               btf_dump_printf(d, ")");
+}
+
 /* return number of duplicates (occurrences) of a given name */
 static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
                                 const char *orig_name)
@@ -1442,3 +1503,803 @@ static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
 {
        return btf_dump_resolve_name(d, id, d->ident_names);
 }
+
+static int btf_dump_dump_type_data(struct btf_dump *d,
+                                  const char *fname,
+                                  const struct btf_type *t,
+                                  __u32 id,
+                                  const void *data,
+                                  __u8 bits_offset,
+                                  __u8 bit_sz);
+
+static const char *btf_dump_data_newline(struct btf_dump *d)
+{
+       return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n";
+}
+
+static const char *btf_dump_data_delim(struct btf_dump *d)
+{
+       return d->typed_dump->depth == 0 ? "" : ",";
+}
+
+static void btf_dump_data_pfx(struct btf_dump *d)
+{
+       int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth;
+
+       if (d->typed_dump->compact)
+               return;
+
+       for (i = 0; i < lvl; i++)
+               btf_dump_printf(d, "%s", d->typed_dump->indent_str);
+}
+
+/* A macro is used here as btf_type_value[s]() appends format specifiers
+ * to the format specifier passed in; these do the work of appending
+ * delimiters etc while the caller simply has to specify the type values
+ * in the format specifier + value(s).
+ */
+#define btf_dump_type_values(d, fmt, ...)                              \
+       btf_dump_printf(d, fmt "%s%s",                                  \
+                       ##__VA_ARGS__,                                  \
+                       btf_dump_data_delim(d),                         \
+                       btf_dump_data_newline(d))
+
+static int btf_dump_unsupported_data(struct btf_dump *d,
+                                    const struct btf_type *t,
+                                    __u32 id)
+{
+       btf_dump_printf(d, "<unsupported kind:%u>", btf_kind(t));
+       return -ENOTSUP;
+}
+
+static int btf_dump_get_bitfield_value(struct btf_dump *d,
+                                      const struct btf_type *t,
+                                      const void *data,
+                                      __u8 bits_offset,
+                                      __u8 bit_sz,
+                                      __u64 *value)
+{
+       __u16 left_shift_bits, right_shift_bits;
+       __u8 nr_copy_bits, nr_copy_bytes;
+       const __u8 *bytes = data;
+       int sz = t->size;
+       __u64 num = 0;
+       int i;
+
+       /* Maximum supported bitfield size is 64 bits */
+       if (sz > 8) {
+               pr_warn("unexpected bitfield size %d\n", sz);
+               return -EINVAL;
+       }
+
+       /* Bitfield value retrieval is done in two steps; first relevant bytes are
+        * stored in num, then we left/right shift num to eliminate irrelevant bits.
+        */
+       nr_copy_bits = bit_sz + bits_offset;
+       nr_copy_bytes = t->size;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+       for (i = nr_copy_bytes - 1; i >= 0; i--)
+               num = num * 256 + bytes[i];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+       for (i = 0; i < nr_copy_bytes; i++)
+               num = num * 256 + bytes[i];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+       left_shift_bits = 64 - nr_copy_bits;
+       right_shift_bits = 64 - bit_sz;
+
+       *value = (num << left_shift_bits) >> right_shift_bits;
+
+       return 0;
+}
+
+static int btf_dump_bitfield_check_zero(struct btf_dump *d,
+                                       const struct btf_type *t,
+                                       const void *data,
+                                       __u8 bits_offset,
+                                       __u8 bit_sz)
+{
+       __u64 check_num;
+       int err;
+
+       err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num);
+       if (err)
+               return err;
+       if (check_num == 0)
+               return -ENODATA;
+       return 0;
+}
+
+static int btf_dump_bitfield_data(struct btf_dump *d,
+                                 const struct btf_type *t,
+                                 const void *data,
+                                 __u8 bits_offset,
+                                 __u8 bit_sz)
+{
+       __u64 print_num;
+       int err;
+
+       err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num);
+       if (err)
+               return err;
+
+       btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num);
+
+       return 0;
+}
+
+/* ints, floats and ptrs */
+static int btf_dump_base_type_check_zero(struct btf_dump *d,
+                                        const struct btf_type *t,
+                                        __u32 id,
+                                        const void *data)
+{
+       static __u8 bytecmp[16] = {};
+       int nr_bytes;
+
+       /* For pointer types, pointer size is not defined on a per-type basis.
+        * On dump creation however, we store the pointer size.
+        */
+       if (btf_kind(t) == BTF_KIND_PTR)
+               nr_bytes = d->ptr_sz;
+       else
+               nr_bytes = t->size;
+
+       if (nr_bytes < 1 || nr_bytes > 16) {
+               pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id);
+               return -EINVAL;
+       }
+
+       if (memcmp(data, bytecmp, nr_bytes) == 0)
+               return -ENODATA;
+       return 0;
+}
+
+static bool ptr_is_aligned(const void *data, int data_sz)
+{
+       return ((uintptr_t)data) % data_sz == 0;
+}
+
+static int btf_dump_int_data(struct btf_dump *d,
+                            const struct btf_type *t,
+                            __u32 type_id,
+                            const void *data,
+                            __u8 bits_offset)
+{
+       __u8 encoding = btf_int_encoding(t);
+       bool sign = encoding & BTF_INT_SIGNED;
+       int sz = t->size;
+
+       if (sz == 0) {
+               pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+               return -EINVAL;
+       }
+
+       /* handle packed int data - accesses of integers not aligned on
+        * int boundaries can cause problems on some platforms.
+        */
+       if (!ptr_is_aligned(data, sz))
+               return btf_dump_bitfield_data(d, t, data, 0, 0);
+
+       switch (sz) {
+       case 16: {
+               const __u64 *ints = data;
+               __u64 lsi, msi;
+
+               /* avoid use of __int128 as some 32-bit platforms do not
+                * support it.
+                */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+               lsi = ints[0];
+               msi = ints[1];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+               lsi = ints[1];
+               msi = ints[0];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+               if (msi == 0)
+                       btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi);
+               else
+                       btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi,
+                                            (unsigned long long)lsi);
+               break;
+       }
+       case 8:
+               if (sign)
+                       btf_dump_type_values(d, "%lld", *(long long *)data);
+               else
+                       btf_dump_type_values(d, "%llu", *(unsigned long long *)data);
+               break;
+       case 4:
+               if (sign)
+                       btf_dump_type_values(d, "%d", *(__s32 *)data);
+               else
+                       btf_dump_type_values(d, "%u", *(__u32 *)data);
+               break;
+       case 2:
+               if (sign)
+                       btf_dump_type_values(d, "%d", *(__s16 *)data);
+               else
+                       btf_dump_type_values(d, "%u", *(__u16 *)data);
+               break;
+       case 1:
+               if (d->typed_dump->is_array_char) {
+                       /* check for null terminator */
+                       if (d->typed_dump->is_array_terminated)
+                               break;
+                       if (*(char *)data == '\0') {
+                               d->typed_dump->is_array_terminated = true;
+                               break;
+                       }
+                       if (isprint(*(char *)data)) {
+                               btf_dump_type_values(d, "'%c'", *(char *)data);
+                               break;
+                       }
+               }
+               if (sign)
+                       btf_dump_type_values(d, "%d", *(__s8 *)data);
+               else
+                       btf_dump_type_values(d, "%u", *(__u8 *)data);
+               break;
+       default:
+               pr_warn("unexpected sz %d for id [%u]\n", sz, type_id);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+union float_data {
+       long double ld;
+       double d;
+       float f;
+};
+
+static int btf_dump_float_data(struct btf_dump *d,
+                              const struct btf_type *t,
+                              __u32 type_id,
+                              const void *data)
+{
+       const union float_data *flp = data;
+       union float_data fl;
+       int sz = t->size;
+
+       /* handle unaligned data; copy to local union */
+       if (!ptr_is_aligned(data, sz)) {
+               memcpy(&fl, data, sz);
+               flp = &fl;
+       }
+
+       switch (sz) {
+       case 16:
+               btf_dump_type_values(d, "%Lf", flp->ld);
+               break;
+       case 8:
+               btf_dump_type_values(d, "%lf", flp->d);
+               break;
+       case 4:
+               btf_dump_type_values(d, "%f", flp->f);
+               break;
+       default:
+               pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int btf_dump_var_data(struct btf_dump *d,
+                            const struct btf_type *v,
+                            __u32 id,
+                            const void *data)
+{
+       enum btf_func_linkage linkage = btf_var(v)->linkage;
+       const struct btf_type *t;
+       const char *l;
+       __u32 type_id;
+
+       switch (linkage) {
+       case BTF_FUNC_STATIC:
+               l = "static ";
+               break;
+       case BTF_FUNC_EXTERN:
+               l = "extern ";
+               break;
+       case BTF_FUNC_GLOBAL:
+       default:
+               l = "";
+               break;
+       }
+
+       /* format of output here is [linkage] [type] [varname] = (type)value,
+        * for example "static int cpu_profile_flip = (int)1"
+        */
+       btf_dump_printf(d, "%s", l);
+       type_id = v->type;
+       t = btf__type_by_id(d->btf, type_id);
+       btf_dump_emit_type_cast(d, type_id, false);
+       btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off));
+       return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
+}
+
+static int btf_dump_array_data(struct btf_dump *d,
+                              const struct btf_type *t,
+                              __u32 id,
+                              const void *data)
+{
+       const struct btf_array *array = btf_array(t);
+       const struct btf_type *elem_type;
+       __u32 i, elem_size = 0, elem_type_id;
+       bool is_array_member;
+
+       elem_type_id = array->type;
+       elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+       elem_size = btf__resolve_size(d->btf, elem_type_id);
+       if (elem_size <= 0) {
+               pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
+               return -EINVAL;
+       }
+
+       if (btf_is_int(elem_type)) {
+               /*
+                * BTF_INT_CHAR encoding never seems to be set for
+                * char arrays, so if size is 1 and element is
+                * printable as a char, we'll do that.
+                */
+               if (elem_size == 1)
+                       d->typed_dump->is_array_char = true;
+       }
+
+       /* note that we increment depth before calling btf_dump_print() below;
+        * this is intentional.  btf_dump_data_newline() will not print a
+        * newline for depth 0 (since this leaves us with trailing newlines
+        * at the end of typed display), so depth is incremented first.
+        * For similar reasons, we decrement depth before showing the closing
+        * parenthesis.
+        */
+       d->typed_dump->depth++;
+       btf_dump_printf(d, "[%s", btf_dump_data_newline(d));
+
+       /* may be a multidimensional array, so store current "is array member"
+        * status so we can restore it correctly later.
+        */
+       is_array_member = d->typed_dump->is_array_member;
+       d->typed_dump->is_array_member = true;
+       for (i = 0; i < array->nelems; i++, data += elem_size) {
+               if (d->typed_dump->is_array_terminated)
+                       break;
+               btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0);
+       }
+       d->typed_dump->is_array_member = is_array_member;
+       d->typed_dump->depth--;
+       btf_dump_data_pfx(d);
+       btf_dump_type_values(d, "]");
+
+       return 0;
+}
+
+static int btf_dump_struct_data(struct btf_dump *d,
+                               const struct btf_type *t,
+                               __u32 id,
+                               const void *data)
+{
+       const struct btf_member *m = btf_members(t);
+       __u16 n = btf_vlen(t);
+       int i, err;
+
+       /* note that we increment depth before calling btf_dump_print() below;
+        * this is intentional.  btf_dump_data_newline() will not print a
+        * newline for depth 0 (since this leaves us with trailing newlines
+        * at the end of typed display), so depth is incremented first.
+        * For similar reasons, we decrement depth before showing the closing
+        * parenthesis.
+        */
+       d->typed_dump->depth++;
+       btf_dump_printf(d, "{%s", btf_dump_data_newline(d));
+
+       for (i = 0; i < n; i++, m++) {
+               const struct btf_type *mtype;
+               const char *mname;
+               __u32 moffset;
+               __u8 bit_sz;
+
+               mtype = btf__type_by_id(d->btf, m->type);
+               mname = btf_name_of(d, m->name_off);
+               moffset = btf_member_bit_offset(t, i);
+
+               bit_sz = btf_member_bitfield_size(t, i);
+               err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8,
+                                             moffset % 8, bit_sz);
+               if (err < 0)
+                       return err;
+       }
+       d->typed_dump->depth--;
+       btf_dump_data_pfx(d);
+       btf_dump_type_values(d, "}");
+       return err;
+}
+
+union ptr_data {
+       unsigned int p;
+       unsigned long long lp;
+};
+
+static int btf_dump_ptr_data(struct btf_dump *d,
+                             const struct btf_type *t,
+                             __u32 id,
+                             const void *data)
+{
+       if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) {
+               btf_dump_type_values(d, "%p", *(void **)data);
+       } else {
+               union ptr_data pt;
+
+               memcpy(&pt, data, d->ptr_sz);
+               if (d->ptr_sz == 4)
+                       btf_dump_type_values(d, "0x%x", pt.p);
+               else
+                       btf_dump_type_values(d, "0x%llx", pt.lp);
+       }
+       return 0;
+}
+
+static int btf_dump_get_enum_value(struct btf_dump *d,
+                                  const struct btf_type *t,
+                                  const void *data,
+                                  __u32 id,
+                                  __s64 *value)
+{
+       int sz = t->size;
+
+       /* handle unaligned enum value */
+       if (!ptr_is_aligned(data, sz)) {
+               __u64 val;
+               int err;
+
+               err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val);
+               if (err)
+                       return err;
+               *value = (__s64)val;
+               return 0;
+       }
+
+       switch (t->size) {
+       case 8:
+               *value = *(__s64 *)data;
+               return 0;
+       case 4:
+               *value = *(__s32 *)data;
+               return 0;
+       case 2:
+               *value = *(__s16 *)data;
+               return 0;
+       case 1:
+               *value = *(__s8 *)data;
+               return 0;
+       default:
+               pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id);
+               return -EINVAL;
+       }
+}
+
+static int btf_dump_enum_data(struct btf_dump *d,
+                             const struct btf_type *t,
+                             __u32 id,
+                             const void *data)
+{
+       const struct btf_enum *e;
+       __s64 value;
+       int i, err;
+
+       err = btf_dump_get_enum_value(d, t, data, id, &value);
+       if (err)
+               return err;
+
+       for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
+               if (value != e->val)
+                       continue;
+               btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+               return 0;
+       }
+
+       btf_dump_type_values(d, "%d", value);
+       return 0;
+}
+
+static int btf_dump_datasec_data(struct btf_dump *d,
+                                const struct btf_type *t,
+                                __u32 id,
+                                const void *data)
+{
+       const struct btf_var_secinfo *vsi;
+       const struct btf_type *var;
+       __u32 i;
+       int err;
+
+       btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off));
+
+       for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) {
+               var = btf__type_by_id(d->btf, vsi->type);
+               err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0);
+               if (err < 0)
+                       return err;
+               btf_dump_printf(d, ";");
+       }
+       return 0;
+}
+
+/* return size of type, or if base type overflows, return -E2BIG. */
+static int btf_dump_type_data_check_overflow(struct btf_dump *d,
+                                            const struct btf_type *t,
+                                            __u32 id,
+                                            const void *data,
+                                            __u8 bits_offset)
+{
+       __s64 size = btf__resolve_size(d->btf, id);
+
+       if (size < 0 || size >= INT_MAX) {
+               pr_warn("unexpected size [%zu] for id [%u]\n",
+                       (size_t)size, id);
+               return -EINVAL;
+       }
+
+       /* Only do overflow checking for base types; we do not want to
+        * avoid showing part of a struct, union or array, even if we
+        * do not have enough data to show the full object.  By
+        * restricting overflow checking to base types we can ensure
+        * that partial display succeeds, while avoiding overflowing
+        * and using bogus data for display.
+        */
+       t = skip_mods_and_typedefs(d->btf, id, NULL);
+       if (!t) {
+               pr_warn("unexpected error skipping mods/typedefs for id [%u]\n",
+                       id);
+               return -EINVAL;
+       }
+
+       switch (btf_kind(t)) {
+       case BTF_KIND_INT:
+       case BTF_KIND_FLOAT:
+       case BTF_KIND_PTR:
+       case BTF_KIND_ENUM:
+               if (data + bits_offset / 8 + size > d->typed_dump->data_end)
+                       return -E2BIG;
+               break;
+       default:
+               break;
+       }
+       return (int)size;
+}
+
+static int btf_dump_type_data_check_zero(struct btf_dump *d,
+                                        const struct btf_type *t,
+                                        __u32 id,
+                                        const void *data,
+                                        __u8 bits_offset,
+                                        __u8 bit_sz)
+{
+       __s64 value;
+       int i, err;
+
+       /* toplevel exceptions; we show zero values if
+        * - we ask for them (emit_zeros)
+        * - if we are at top-level so we see "struct empty { }"
+        * - or if we are an array member and the array is non-empty and
+        *   not a char array; we don't want to be in a situation where we
+        *   have an integer array 0, 1, 0, 1 and only show non-zero values.
+        *   If the array contains zeroes only, or is a char array starting
+        *   with a '\0', the array-level check_zero() will prevent showing it;
+        *   we are concerned with determining zero value at the array member
+        *   level here.
+        */
+       if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 ||
+           (d->typed_dump->is_array_member &&
+            !d->typed_dump->is_array_char))
+               return 0;
+
+       t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+       switch (btf_kind(t)) {
+       case BTF_KIND_INT:
+               if (bit_sz)
+                       return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz);
+               return btf_dump_base_type_check_zero(d, t, id, data);
+       case BTF_KIND_FLOAT:
+       case BTF_KIND_PTR:
+               return btf_dump_base_type_check_zero(d, t, id, data);
+       case BTF_KIND_ARRAY: {
+               const struct btf_array *array = btf_array(t);
+               const struct btf_type *elem_type;
+               __u32 elem_type_id, elem_size;
+               bool ischar;
+
+               elem_type_id = array->type;
+               elem_size = btf__resolve_size(d->btf, elem_type_id);
+               elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+
+               ischar = btf_is_int(elem_type) && elem_size == 1;
+
+               /* check all elements; if _any_ element is nonzero, all
+                * of array is displayed.  We make an exception however
+                * for char arrays where the first element is 0; these
+                * are considered zeroed also, even if later elements are
+                * non-zero because the string is terminated.
+                */
+               for (i = 0; i < array->nelems; i++) {
+                       if (i == 0 && ischar && *(char *)data == 0)
+                               return -ENODATA;
+                       err = btf_dump_type_data_check_zero(d, elem_type,
+                                                           elem_type_id,
+                                                           data +
+                                                           (i * elem_size),
+                                                           bits_offset, 0);
+                       if (err != -ENODATA)
+                               return err;
+               }
+               return -ENODATA;
+       }
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION: {
+               const struct btf_member *m = btf_members(t);
+               __u16 n = btf_vlen(t);
+
+               /* if any struct/union member is non-zero, the struct/union
+                * is considered non-zero and dumped.
+                */
+               for (i = 0; i < n; i++, m++) {
+                       const struct btf_type *mtype;
+                       __u32 moffset;
+
+                       mtype = btf__type_by_id(d->btf, m->type);
+                       moffset = btf_member_bit_offset(t, i);
+
+                       /* btf_int_bits() does not store member bitfield size;
+                        * bitfield size needs to be stored here so int display
+                        * of member can retrieve it.
+                        */
+                       bit_sz = btf_member_bitfield_size(t, i);
+                       err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8,
+                                                           moffset % 8, bit_sz);
+                       if (err != ENODATA)
+                               return err;
+               }
+               return -ENODATA;
+       }
+       case BTF_KIND_ENUM:
+               err = btf_dump_get_enum_value(d, t, data, id, &value);
+               if (err)
+                       return err;
+               if (value == 0)
+                       return -ENODATA;
+               return 0;
+       default:
+               return 0;
+       }
+}
+
+/* returns size of data dumped, or error. */
+static int btf_dump_dump_type_data(struct btf_dump *d,
+                                  const char *fname,
+                                  const struct btf_type *t,
+                                  __u32 id,
+                                  const void *data,
+                                  __u8 bits_offset,
+                                  __u8 bit_sz)
+{
+       int size, err;
+
+       size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
+       if (size < 0)
+               return size;
+       err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz);
+       if (err) {
+               /* zeroed data is expected and not an error, so simply skip
+                * dumping such data.  Record other errors however.
+                */
+               if (err == -ENODATA)
+                       return size;
+               return err;
+       }
+       btf_dump_data_pfx(d);
+
+       if (!d->typed_dump->skip_names) {
+               if (fname && strlen(fname) > 0)
+                       btf_dump_printf(d, ".%s = ", fname);
+               btf_dump_emit_type_cast(d, id, true);
+       }
+
+       t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+       switch (btf_kind(t)) {
+       case BTF_KIND_UNKN:
+       case BTF_KIND_FWD:
+       case BTF_KIND_FUNC:
+       case BTF_KIND_FUNC_PROTO:
+               err = btf_dump_unsupported_data(d, t, id);
+               break;
+       case BTF_KIND_INT:
+               if (bit_sz)
+                       err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz);
+               else
+                       err = btf_dump_int_data(d, t, id, data, bits_offset);
+               break;
+       case BTF_KIND_FLOAT:
+               err = btf_dump_float_data(d, t, id, data);
+               break;
+       case BTF_KIND_PTR:
+               err = btf_dump_ptr_data(d, t, id, data);
+               break;
+       case BTF_KIND_ARRAY:
+               err = btf_dump_array_data(d, t, id, data);
+               break;
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION:
+               err = btf_dump_struct_data(d, t, id, data);
+               break;
+       case BTF_KIND_ENUM:
+               /* handle bitfield and int enum values */
+               if (bit_sz) {
+                       __u64 print_num;
+                       __s64 enum_val;
+
+                       err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz,
+                                                         &print_num);
+                       if (err)
+                               break;
+                       enum_val = (__s64)print_num;
+                       err = btf_dump_enum_data(d, t, id, &enum_val);
+               } else
+                       err = btf_dump_enum_data(d, t, id, data);
+               break;
+       case BTF_KIND_VAR:
+               err = btf_dump_var_data(d, t, id, data);
+               break;
+       case BTF_KIND_DATASEC:
+               err = btf_dump_datasec_data(d, t, id, data);
+               break;
+       default:
+               pr_warn("unexpected kind [%u] for id [%u]\n",
+                       BTF_INFO_KIND(t->info), id);
+               return -EINVAL;
+       }
+       if (err < 0)
+               return err;
+       return size;
+}
+
+int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+                            const void *data, size_t data_sz,
+                            const struct btf_dump_type_data_opts *opts)
+{
+       struct btf_dump_data typed_dump = {};
+       const struct btf_type *t;
+       int ret;
+
+       if (!OPTS_VALID(opts, btf_dump_type_data_opts))
+               return libbpf_err(-EINVAL);
+
+       t = btf__type_by_id(d->btf, id);
+       if (!t)
+               return libbpf_err(-ENOENT);
+
+       d->typed_dump = &typed_dump;
+       d->typed_dump->data_end = data + data_sz;
+       d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);
+
+       /* default indent string is a tab */
+       if (!opts->indent_str)
+               d->typed_dump->indent_str[0] = '\t';
+       else
+               strncat(d->typed_dump->indent_str, opts->indent_str,
+                       sizeof(d->typed_dump->indent_str) - 1);
+
+       d->typed_dump->compact = OPTS_GET(opts, compact, false);
+       d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
+       d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
+
+       ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
+
+       d->typed_dump = NULL;
+
+       return libbpf_err(ret);
+}
index 6f5e275..88d8825 100644 (file)
@@ -193,6 +193,8 @@ enum kern_feature_id {
        FEAT_MODULE_BTF,
        /* BTF_KIND_FLOAT support */
        FEAT_BTF_FLOAT,
+       /* BPF perf link support */
+       FEAT_PERF_LINK,
        __FEAT_CNT,
 };
 
@@ -498,6 +500,10 @@ struct bpf_object {
         * it at load time.
         */
        struct btf *btf_vmlinux;
+       /* Path to the custom BTF to be used for BPF CO-RE relocations as an
+        * override for vmlinux BTF.
+        */
+       char *btf_custom_path;
        /* vmlinux BTF override for CO-RE relocations */
        struct btf *btf_vmlinux_override;
        /* Lazily initialized kernel module BTFs */
@@ -591,11 +597,6 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
               insn->off == 0;
 }
 
-static bool is_ldimm64_insn(struct bpf_insn *insn)
-{
-       return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
-}
-
 static bool is_call_insn(const struct bpf_insn *insn)
 {
        return insn->code == (BPF_JMP | BPF_CALL);
@@ -2645,8 +2646,10 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
        struct bpf_program *prog;
        int i;
 
-       /* CO-RE relocations need kernel BTF */
-       if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
+       /* CO-RE relocations need kernel BTF, only when btf_custom_path
+        * is not specified
+        */
+       if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
                return true;
 
        /* Support for typed ksyms needs kernel BTF */
@@ -2679,7 +2682,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
        if (!force && !obj_needs_vmlinux_btf(obj))
                return 0;
 
-       obj->btf_vmlinux = libbpf_find_kernel_btf();
+       obj->btf_vmlinux = btf__load_vmlinux_btf();
        err = libbpf_get_error(obj->btf_vmlinux);
        if (err) {
                pr_warn("Error loading vmlinux BTF: %d\n", err);
@@ -2768,7 +2771,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
                 */
                btf__set_fd(kern_btf, 0);
        } else {
-               err = btf__load(kern_btf);
+               err = btf__load_into_kernel(kern_btf);
        }
        if (sanitize) {
                if (!err) {
@@ -3894,6 +3897,42 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
        return 0;
 }
 
+static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
+{
+       char file[PATH_MAX], buff[4096];
+       FILE *fp;
+       __u32 val;
+       int err;
+
+       snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+       memset(info, 0, sizeof(*info));
+
+       fp = fopen(file, "r");
+       if (!fp) {
+               err = -errno;
+               pr_warn("failed to open %s: %d. No procfs support?\n", file,
+                       err);
+               return err;
+       }
+
+       while (fgets(buff, sizeof(buff), fp)) {
+               if (sscanf(buff, "map_type:\t%u", &val) == 1)
+                       info->type = val;
+               else if (sscanf(buff, "key_size:\t%u", &val) == 1)
+                       info->key_size = val;
+               else if (sscanf(buff, "value_size:\t%u", &val) == 1)
+                       info->value_size = val;
+               else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
+                       info->max_entries = val;
+               else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
+                       info->map_flags = val;
+       }
+
+       fclose(fp);
+
+       return 0;
+}
+
 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
 {
        struct bpf_map_info info = {};
@@ -3902,6 +3941,8 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
        char *new_name;
 
        err = bpf_obj_get_info_by_fd(fd, &info, &len);
+       if (err && errno == EINVAL)
+               err = bpf_get_map_info_from_fdinfo(fd, &info);
        if (err)
                return libbpf_err(err);
 
@@ -4298,6 +4339,37 @@ static int probe_module_btf(void)
        return !err;
 }
 
+static int probe_perf_link(void)
+{
+       struct bpf_load_program_attr attr;
+       struct bpf_insn insns[] = {
+               BPF_MOV64_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       };
+       int prog_fd, link_fd, err;
+
+       memset(&attr, 0, sizeof(attr));
+       attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
+       attr.insns = insns;
+       attr.insns_cnt = ARRAY_SIZE(insns);
+       attr.license = "GPL";
+       prog_fd = bpf_load_program_xattr(&attr, NULL, 0);
+       if (prog_fd < 0)
+               return -errno;
+
+       /* use invalid perf_event FD to get EBADF, if link is supported;
+        * otherwise EINVAL should be returned
+        */
+       link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
+       err = -errno; /* close() can clobber errno */
+
+       if (link_fd >= 0)
+               close(link_fd);
+       close(prog_fd);
+
+       return link_fd < 0 && err == -EBADF;
+}
+
 enum kern_feature_result {
        FEAT_UNKNOWN = 0,
        FEAT_SUPPORTED = 1,
@@ -4348,6 +4420,9 @@ static struct kern_feature_desc {
        [FEAT_BTF_FLOAT] = {
                "BTF_KIND_FLOAT support", probe_kern_btf_float,
        },
+       [FEAT_PERF_LINK] = {
+               "BPF perf link support", probe_perf_link,
+       },
 };
 
 static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
@@ -4381,12 +4456,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
        struct bpf_map_info map_info = {};
        char msg[STRERR_BUFSIZE];
        __u32 map_info_len;
+       int err;
 
        map_info_len = sizeof(map_info);
 
-       if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
-               pr_warn("failed to get map info for map FD %d: %s\n",
-                       map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
+       err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
+       if (err && errno == EINVAL)
+               err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
+       if (err) {
+               pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
+                       libbpf_strerror_r(errno, msg, sizeof(msg)));
                return false;
        }
 
@@ -4479,6 +4558,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 {
        struct bpf_create_map_attr create_attr;
        struct bpf_map_def *def = &map->def;
+       int err = 0;
 
        memset(&create_attr, 0, sizeof(create_attr));
 
@@ -4521,8 +4601,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 
        if (bpf_map_type__is_map_in_map(def->type)) {
                if (map->inner_map) {
-                       int err;
-
                        err = bpf_object__create_map(obj, map->inner_map, true);
                        if (err) {
                                pr_warn("map '%s': failed to create inner map: %d\n",
@@ -4547,8 +4625,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
        if (map->fd < 0 && (create_attr.btf_key_type_id ||
                            create_attr.btf_value_type_id)) {
                char *cp, errmsg[STRERR_BUFSIZE];
-               int err = -errno;
 
+               err = -errno;
                cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
                pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
                        map->name, cp, err);
@@ -4560,8 +4638,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
                map->fd = bpf_create_map_xattr(&create_attr);
        }
 
-       if (map->fd < 0)
-               return -errno;
+       err = map->fd < 0 ? -errno : 0;
 
        if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
                if (obj->gen_loader)
@@ -4570,7 +4647,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
                zfree(&map->inner_map);
        }
 
-       return 0;
+       return err;
 }
 
 static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
@@ -4616,10 +4693,13 @@ bpf_object__create_maps(struct bpf_object *obj)
        char *cp, errmsg[STRERR_BUFSIZE];
        unsigned int i, j;
        int err;
+       bool retried;
 
        for (i = 0; i < obj->nr_maps; i++) {
                map = &obj->maps[i];
 
+               retried = false;
+retry:
                if (map->pin_path) {
                        err = bpf_object__reuse_map(map);
                        if (err) {
@@ -4627,6 +4707,12 @@ bpf_object__create_maps(struct bpf_object *obj)
                                        map->name);
                                goto err_out;
                        }
+                       if (retried && map->fd < 0) {
+                               pr_warn("map '%s': cannot find pinned map\n",
+                                       map->name);
+                               err = -ENOENT;
+                               goto err_out;
+                       }
                }
 
                if (map->fd >= 0) {
@@ -4660,9 +4746,13 @@ bpf_object__create_maps(struct bpf_object *obj)
                if (map->pin_path && !map->pinned) {
                        err = bpf_map__pin(map, NULL);
                        if (err) {
+                               zclose(map->fd);
+                               if (!retried && err == -EEXIST) {
+                                       retried = true;
+                                       goto retry;
+                               }
                                pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
                                        map->name, map->pin_path, err);
-                               zclose(map->fd);
                                goto err_out;
                        }
                }
@@ -4679,279 +4769,6 @@ err_out:
        return err;
 }
 
-#define BPF_CORE_SPEC_MAX_LEN 64
-
-/* represents BPF CO-RE field or array element accessor */
-struct bpf_core_accessor {
-       __u32 type_id;          /* struct/union type or array element type */
-       __u32 idx;              /* field index or array index */
-       const char *name;       /* field name or NULL for array accessor */
-};
-
-struct bpf_core_spec {
-       const struct btf *btf;
-       /* high-level spec: named fields and array indices only */
-       struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
-       /* original unresolved (no skip_mods_or_typedefs) root type ID */
-       __u32 root_type_id;
-       /* CO-RE relocation kind */
-       enum bpf_core_relo_kind relo_kind;
-       /* high-level spec length */
-       int len;
-       /* raw, low-level spec: 1-to-1 with accessor spec string */
-       int raw_spec[BPF_CORE_SPEC_MAX_LEN];
-       /* raw spec length */
-       int raw_len;
-       /* field bit offset represented by spec */
-       __u32 bit_offset;
-};
-
-static bool str_is_empty(const char *s)
-{
-       return !s || !s[0];
-}
-
-static bool is_flex_arr(const struct btf *btf,
-                       const struct bpf_core_accessor *acc,
-                       const struct btf_array *arr)
-{
-       const struct btf_type *t;
-
-       /* not a flexible array, if not inside a struct or has non-zero size */
-       if (!acc->name || arr->nelems > 0)
-               return false;
-
-       /* has to be the last member of enclosing struct */
-       t = btf__type_by_id(btf, acc->type_id);
-       return acc->idx == btf_vlen(t) - 1;
-}
-
-static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_FIELD_BYTE_OFFSET: return "byte_off";
-       case BPF_FIELD_BYTE_SIZE: return "byte_sz";
-       case BPF_FIELD_EXISTS: return "field_exists";
-       case BPF_FIELD_SIGNED: return "signed";
-       case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
-       case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
-       case BPF_TYPE_ID_LOCAL: return "local_type_id";
-       case BPF_TYPE_ID_TARGET: return "target_type_id";
-       case BPF_TYPE_EXISTS: return "type_exists";
-       case BPF_TYPE_SIZE: return "type_size";
-       case BPF_ENUMVAL_EXISTS: return "enumval_exists";
-       case BPF_ENUMVAL_VALUE: return "enumval_value";
-       default: return "unknown";
-       }
-}
-
-static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_FIELD_BYTE_OFFSET:
-       case BPF_FIELD_BYTE_SIZE:
-       case BPF_FIELD_EXISTS:
-       case BPF_FIELD_SIGNED:
-       case BPF_FIELD_LSHIFT_U64:
-       case BPF_FIELD_RSHIFT_U64:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_TYPE_ID_LOCAL:
-       case BPF_TYPE_ID_TARGET:
-       case BPF_TYPE_EXISTS:
-       case BPF_TYPE_SIZE:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_ENUMVAL_EXISTS:
-       case BPF_ENUMVAL_VALUE:
-               return true;
-       default:
-               return false;
-       }
-}
-
-/*
- * Turn bpf_core_relo into a low- and high-level spec representation,
- * validating correctness along the way, as well as calculating resulting
- * field bit offset, specified by accessor string. Low-level spec captures
- * every single level of nestedness, including traversing anonymous
- * struct/union members. High-level one only captures semantically meaningful
- * "turning points": named fields and array indicies.
- * E.g., for this case:
- *
- *   struct sample {
- *       int __unimportant;
- *       struct {
- *           int __1;
- *           int __2;
- *           int a[7];
- *       };
- *   };
- *
- *   struct sample *s = ...;
- *
- *   int x = &s->a[3]; // access string = '0:1:2:3'
- *
- * Low-level spec has 1:1 mapping with each element of access string (it's
- * just a parsed access string representation): [0, 1, 2, 3].
- *
- * High-level spec will capture only 3 points:
- *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
- *   - field 'a' access (corresponds to '2' in low-level spec);
- *   - array element #3 access (corresponds to '3' in low-level spec).
- *
- * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
- * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
- * spec and raw_spec are kept empty.
- *
- * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
- * string to specify enumerator's value index that need to be relocated.
- */
-static int bpf_core_parse_spec(const struct btf *btf,
-                              __u32 type_id,
-                              const char *spec_str,
-                              enum bpf_core_relo_kind relo_kind,
-                              struct bpf_core_spec *spec)
-{
-       int access_idx, parsed_len, i;
-       struct bpf_core_accessor *acc;
-       const struct btf_type *t;
-       const char *name;
-       __u32 id;
-       __s64 sz;
-
-       if (str_is_empty(spec_str) || *spec_str == ':')
-               return -EINVAL;
-
-       memset(spec, 0, sizeof(*spec));
-       spec->btf = btf;
-       spec->root_type_id = type_id;
-       spec->relo_kind = relo_kind;
-
-       /* type-based relocations don't have a field access string */
-       if (core_relo_is_type_based(relo_kind)) {
-               if (strcmp(spec_str, "0"))
-                       return -EINVAL;
-               return 0;
-       }
-
-       /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
-       while (*spec_str) {
-               if (*spec_str == ':')
-                       ++spec_str;
-               if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
-                       return -EINVAL;
-               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
-                       return -E2BIG;
-               spec_str += parsed_len;
-               spec->raw_spec[spec->raw_len++] = access_idx;
-       }
-
-       if (spec->raw_len == 0)
-               return -EINVAL;
-
-       t = skip_mods_and_typedefs(btf, type_id, &id);
-       if (!t)
-               return -EINVAL;
-
-       access_idx = spec->raw_spec[0];
-       acc = &spec->spec[0];
-       acc->type_id = id;
-       acc->idx = access_idx;
-       spec->len++;
-
-       if (core_relo_is_enumval_based(relo_kind)) {
-               if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
-                       return -EINVAL;
-
-               /* record enumerator name in a first accessor */
-               acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
-               return 0;
-       }
-
-       if (!core_relo_is_field_based(relo_kind))
-               return -EINVAL;
-
-       sz = btf__resolve_size(btf, id);
-       if (sz < 0)
-               return sz;
-       spec->bit_offset = access_idx * sz * 8;
-
-       for (i = 1; i < spec->raw_len; i++) {
-               t = skip_mods_and_typedefs(btf, id, &id);
-               if (!t)
-                       return -EINVAL;
-
-               access_idx = spec->raw_spec[i];
-               acc = &spec->spec[spec->len];
-
-               if (btf_is_composite(t)) {
-                       const struct btf_member *m;
-                       __u32 bit_offset;
-
-                       if (access_idx >= btf_vlen(t))
-                               return -EINVAL;
-
-                       bit_offset = btf_member_bit_offset(t, access_idx);
-                       spec->bit_offset += bit_offset;
-
-                       m = btf_members(t) + access_idx;
-                       if (m->name_off) {
-                               name = btf__name_by_offset(btf, m->name_off);
-                               if (str_is_empty(name))
-                                       return -EINVAL;
-
-                               acc->type_id = id;
-                               acc->idx = access_idx;
-                               acc->name = name;
-                               spec->len++;
-                       }
-
-                       id = m->type;
-               } else if (btf_is_array(t)) {
-                       const struct btf_array *a = btf_array(t);
-                       bool flex;
-
-                       t = skip_mods_and_typedefs(btf, a->type, &id);
-                       if (!t)
-                               return -EINVAL;
-
-                       flex = is_flex_arr(btf, acc - 1, a);
-                       if (!flex && access_idx >= a->nelems)
-                               return -EINVAL;
-
-                       spec->spec[spec->len].type_id = id;
-                       spec->spec[spec->len].idx = access_idx;
-                       spec->len++;
-
-                       sz = btf__resolve_size(btf, id);
-                       if (sz < 0)
-                               return sz;
-                       spec->bit_offset += access_idx * sz * 8;
-               } else {
-                       pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
-                               type_id, spec_str, i, id, btf_kind_str(t));
-                       return -EINVAL;
-               }
-       }
-
-       return 0;
-}
-
 static bool bpf_core_is_flavor_sep(const char *s)
 {
        /* check X___Y name pattern, where X and Y are not underscores */
@@ -4964,7 +4781,7 @@ static bool bpf_core_is_flavor_sep(const char *s)
  * before last triple underscore. Struct name part after last triple
  * underscore is ignored by BPF CO-RE relocation during relocation matching.
  */
-static size_t bpf_core_essential_name_len(const char *name)
+size_t bpf_core_essential_name_len(const char *name)
 {
        size_t n = strlen(name);
        int i;
@@ -4976,34 +4793,20 @@ static size_t bpf_core_essential_name_len(const char *name)
        return n;
 }
 
-struct core_cand
-{
-       const struct btf *btf;
-       const struct btf_type *t;
-       const char *name;
-       __u32 id;
-};
-
-/* dynamically sized list of type IDs and its associated struct btf */
-struct core_cand_list {
-       struct core_cand *cands;
-       int len;
-};
-
-static void bpf_core_free_cands(struct core_cand_list *cands)
+static void bpf_core_free_cands(struct bpf_core_cand_list *cands)
 {
        free(cands->cands);
        free(cands);
 }
 
-static int bpf_core_add_cands(struct core_cand *local_cand,
+static int bpf_core_add_cands(struct bpf_core_cand *local_cand,
                              size_t local_essent_len,
                              const struct btf *targ_btf,
                              const char *targ_btf_name,
                              int targ_start_id,
-                             struct core_cand_list *cands)
+                             struct bpf_core_cand_list *cands)
 {
-       struct core_cand *new_cands, *cand;
+       struct bpf_core_cand *new_cands, *cand;
        const struct btf_type *t;
        const char *targ_name;
        size_t targ_essent_len;
@@ -5139,11 +4942,11 @@ err_out:
        return 0;
 }
 
-static struct core_cand_list *
+static struct bpf_core_cand_list *
 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
 {
-       struct core_cand local_cand = {};
-       struct core_cand_list *cands;
+       struct bpf_core_cand local_cand = {};
+       struct bpf_core_cand_list *cands;
        const struct btf *main_btf;
        size_t local_essent_len;
        int err, i;
@@ -5197,165 +5000,6 @@ err_out:
        return ERR_PTR(err);
 }
 
-/* Check two types for compatibility for the purpose of field access
- * relocation. const/volatile/restrict and typedefs are skipped to ensure we
- * are relocating semantically compatible entities:
- *   - any two STRUCTs/UNIONs are compatible and can be mixed;
- *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
- *   - any two PTRs are always compatible;
- *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
- *     least one of enums should be anonymous;
- *   - for ENUMs, check sizes, names are ignored;
- *   - for INT, size and signedness are ignored;
- *   - any two FLOATs are always compatible;
- *   - for ARRAY, dimensionality is ignored, element types are checked for
- *     compatibility recursively;
- *   - everything else shouldn't be ever a target of relocation.
- * These rules are not set in stone and probably will be adjusted as we get
- * more experience with using BPF CO-RE relocations.
- */
-static int bpf_core_fields_are_compat(const struct btf *local_btf,
-                                     __u32 local_id,
-                                     const struct btf *targ_btf,
-                                     __u32 targ_id)
-{
-       const struct btf_type *local_type, *targ_type;
-
-recur:
-       local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
-       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
-       if (!local_type || !targ_type)
-               return -EINVAL;
-
-       if (btf_is_composite(local_type) && btf_is_composite(targ_type))
-               return 1;
-       if (btf_kind(local_type) != btf_kind(targ_type))
-               return 0;
-
-       switch (btf_kind(local_type)) {
-       case BTF_KIND_PTR:
-       case BTF_KIND_FLOAT:
-               return 1;
-       case BTF_KIND_FWD:
-       case BTF_KIND_ENUM: {
-               const char *local_name, *targ_name;
-               size_t local_len, targ_len;
-
-               local_name = btf__name_by_offset(local_btf,
-                                                local_type->name_off);
-               targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
-               local_len = bpf_core_essential_name_len(local_name);
-               targ_len = bpf_core_essential_name_len(targ_name);
-               /* one of them is anonymous or both w/ same flavor-less names */
-               return local_len == 0 || targ_len == 0 ||
-                      (local_len == targ_len &&
-                       strncmp(local_name, targ_name, local_len) == 0);
-       }
-       case BTF_KIND_INT:
-               /* just reject deprecated bitfield-like integers; all other
-                * integers are by default compatible between each other
-                */
-               return btf_int_offset(local_type) == 0 &&
-                      btf_int_offset(targ_type) == 0;
-       case BTF_KIND_ARRAY:
-               local_id = btf_array(local_type)->type;
-               targ_id = btf_array(targ_type)->type;
-               goto recur;
-       default:
-               pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
-                       btf_kind(local_type), local_id, targ_id);
-               return 0;
-       }
-}
-
-/*
- * Given single high-level named field accessor in local type, find
- * corresponding high-level accessor for a target type. Along the way,
- * maintain low-level spec for target as well. Also keep updating target
- * bit offset.
- *
- * Searching is performed through recursive exhaustive enumeration of all
- * fields of a struct/union. If there are any anonymous (embedded)
- * structs/unions, they are recursively searched as well. If field with
- * desired name is found, check compatibility between local and target types,
- * before returning result.
- *
- * 1 is returned, if field is found.
- * 0 is returned if no compatible field is found.
- * <0 is returned on error.
- */
-static int bpf_core_match_member(const struct btf *local_btf,
-                                const struct bpf_core_accessor *local_acc,
-                                const struct btf *targ_btf,
-                                __u32 targ_id,
-                                struct bpf_core_spec *spec,
-                                __u32 *next_targ_id)
-{
-       const struct btf_type *local_type, *targ_type;
-       const struct btf_member *local_member, *m;
-       const char *local_name, *targ_name;
-       __u32 local_id;
-       int i, n, found;
-
-       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
-       if (!targ_type)
-               return -EINVAL;
-       if (!btf_is_composite(targ_type))
-               return 0;
-
-       local_id = local_acc->type_id;
-       local_type = btf__type_by_id(local_btf, local_id);
-       local_member = btf_members(local_type) + local_acc->idx;
-       local_name = btf__name_by_offset(local_btf, local_member->name_off);
-
-       n = btf_vlen(targ_type);
-       m = btf_members(targ_type);
-       for (i = 0; i < n; i++, m++) {
-               __u32 bit_offset;
-
-               bit_offset = btf_member_bit_offset(targ_type, i);
-
-               /* too deep struct/union/array nesting */
-               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
-                       return -E2BIG;
-
-               /* speculate this member will be the good one */
-               spec->bit_offset += bit_offset;
-               spec->raw_spec[spec->raw_len++] = i;
-
-               targ_name = btf__name_by_offset(targ_btf, m->name_off);
-               if (str_is_empty(targ_name)) {
-                       /* embedded struct/union, we need to go deeper */
-                       found = bpf_core_match_member(local_btf, local_acc,
-                                                     targ_btf, m->type,
-                                                     spec, next_targ_id);
-                       if (found) /* either found or error */
-                               return found;
-               } else if (strcmp(local_name, targ_name) == 0) {
-                       /* matching named field */
-                       struct bpf_core_accessor *targ_acc;
-
-                       targ_acc = &spec->spec[spec->len++];
-                       targ_acc->type_id = targ_id;
-                       targ_acc->idx = i;
-                       targ_acc->name = targ_name;
-
-                       *next_targ_id = m->type;
-                       found = bpf_core_fields_are_compat(local_btf,
-                                                          local_member->type,
-                                                          targ_btf, m->type);
-                       if (!found)
-                               spec->len--; /* pop accessor */
-                       return found;
-               }
-               /* member turned out not to be what we looked for */
-               spec->bit_offset -= bit_offset;
-               spec->raw_len--;
-       }
-
-       return 0;
-}
-
 /* Check local and target types for compatibility. This check is used for
  * type-based CO-RE relocations and follow slightly different rules than
  * field-based relocations. This function assumes that root types were already
@@ -5375,8 +5019,8 @@ static int bpf_core_match_member(const struct btf *local_btf,
  * These rules are not set in stone and probably will be adjusted as we get
  * more experience with using BPF CO-RE relocations.
  */
-static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
-                                    const struct btf *targ_btf, __u32 targ_id)
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+                             const struct btf *targ_btf, __u32 targ_id)
 {
        const struct btf_type *local_type, *targ_type;
        int depth = 32; /* max recursion depth */
@@ -5450,491 +5094,35 @@ recur:
        }
 }
 
-/*
- * Try to match local spec to a target type and, if successful, produce full
- * target spec (high-level, low-level + bit offset).
- */
-static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
-                              const struct btf *targ_btf, __u32 targ_id,
-                              struct bpf_core_spec *targ_spec)
+static size_t bpf_core_hash_fn(const void *key, void *ctx)
 {
-       const struct btf_type *targ_type;
-       const struct bpf_core_accessor *local_acc;
-       struct bpf_core_accessor *targ_acc;
-       int i, sz, matched;
-
-       memset(targ_spec, 0, sizeof(*targ_spec));
-       targ_spec->btf = targ_btf;
-       targ_spec->root_type_id = targ_id;
-       targ_spec->relo_kind = local_spec->relo_kind;
-
-       if (core_relo_is_type_based(local_spec->relo_kind)) {
-               return bpf_core_types_are_compat(local_spec->btf,
-                                                local_spec->root_type_id,
-                                                targ_btf, targ_id);
-       }
-
-       local_acc = &local_spec->spec[0];
-       targ_acc = &targ_spec->spec[0];
+       return (size_t)key;
+}
 
-       if (core_relo_is_enumval_based(local_spec->relo_kind)) {
-               size_t local_essent_len, targ_essent_len;
-               const struct btf_enum *e;
-               const char *targ_name;
+static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+       return k1 == k2;
+}
 
-               /* has to resolve to an enum */
-               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
-               if (!btf_is_enum(targ_type))
-                       return 0;
+static void *u32_as_hash_key(__u32 x)
+{
+       return (void *)(uintptr_t)x;
+}
 
-               local_essent_len = bpf_core_essential_name_len(local_acc->name);
-
-               for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
-                       targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
-                       targ_essent_len = bpf_core_essential_name_len(targ_name);
-                       if (targ_essent_len != local_essent_len)
-                               continue;
-                       if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
-                               targ_acc->type_id = targ_id;
-                               targ_acc->idx = i;
-                               targ_acc->name = targ_name;
-                               targ_spec->len++;
-                               targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
-                               targ_spec->raw_len++;
-                               return 1;
-                       }
-               }
-               return 0;
-       }
-
-       if (!core_relo_is_field_based(local_spec->relo_kind))
-               return -EINVAL;
-
-       for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
-               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
-                                                  &targ_id);
-               if (!targ_type)
-                       return -EINVAL;
-
-               if (local_acc->name) {
-                       matched = bpf_core_match_member(local_spec->btf,
-                                                       local_acc,
-                                                       targ_btf, targ_id,
-                                                       targ_spec, &targ_id);
-                       if (matched <= 0)
-                               return matched;
-               } else {
-                       /* for i=0, targ_id is already treated as array element
-                        * type (because it's the original struct), for others
-                        * we should find array element type first
-                        */
-                       if (i > 0) {
-                               const struct btf_array *a;
-                               bool flex;
-
-                               if (!btf_is_array(targ_type))
-                                       return 0;
-
-                               a = btf_array(targ_type);
-                               flex = is_flex_arr(targ_btf, targ_acc - 1, a);
-                               if (!flex && local_acc->idx >= a->nelems)
-                                       return 0;
-                               if (!skip_mods_and_typedefs(targ_btf, a->type,
-                                                           &targ_id))
-                                       return -EINVAL;
-                       }
-
-                       /* too deep struct/union/array nesting */
-                       if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
-                               return -E2BIG;
-
-                       targ_acc->type_id = targ_id;
-                       targ_acc->idx = local_acc->idx;
-                       targ_acc->name = NULL;
-                       targ_spec->len++;
-                       targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
-                       targ_spec->raw_len++;
-
-                       sz = btf__resolve_size(targ_btf, targ_id);
-                       if (sz < 0)
-                               return sz;
-                       targ_spec->bit_offset += local_acc->idx * sz * 8;
-               }
-       }
-
-       return 1;
-}
-
-static int bpf_core_calc_field_relo(const struct bpf_program *prog,
-                                   const struct bpf_core_relo *relo,
-                                   const struct bpf_core_spec *spec,
-                                   __u32 *val, __u32 *field_sz, __u32 *type_id,
-                                   bool *validate)
-{
-       const struct bpf_core_accessor *acc;
-       const struct btf_type *t;
-       __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
-       const struct btf_member *m;
-       const struct btf_type *mt;
-       bool bitfield;
-       __s64 sz;
-
-       *field_sz = 0;
-
-       if (relo->kind == BPF_FIELD_EXISTS) {
-               *val = spec ? 1 : 0;
-               return 0;
-       }
-
-       if (!spec)
-               return -EUCLEAN; /* request instruction poisoning */
-
-       acc = &spec->spec[spec->len - 1];
-       t = btf__type_by_id(spec->btf, acc->type_id);
-
-       /* a[n] accessor needs special handling */
-       if (!acc->name) {
-               if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
-                       *val = spec->bit_offset / 8;
-                       /* remember field size for load/store mem size */
-                       sz = btf__resolve_size(spec->btf, acc->type_id);
-                       if (sz < 0)
-                               return -EINVAL;
-                       *field_sz = sz;
-                       *type_id = acc->type_id;
-               } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
-                       sz = btf__resolve_size(spec->btf, acc->type_id);
-                       if (sz < 0)
-                               return -EINVAL;
-                       *val = sz;
-               } else {
-                       pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
-                               prog->name, relo->kind, relo->insn_off / 8);
-                       return -EINVAL;
-               }
-               if (validate)
-                       *validate = true;
-               return 0;
-       }
-
-       m = btf_members(t) + acc->idx;
-       mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
-       bit_off = spec->bit_offset;
-       bit_sz = btf_member_bitfield_size(t, acc->idx);
-
-       bitfield = bit_sz > 0;
-       if (bitfield) {
-               byte_sz = mt->size;
-               byte_off = bit_off / 8 / byte_sz * byte_sz;
-               /* figure out smallest int size necessary for bitfield load */
-               while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
-                       if (byte_sz >= 8) {
-                               /* bitfield can't be read with 64-bit read */
-                               pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
-                                       prog->name, relo->kind, relo->insn_off / 8);
-                               return -E2BIG;
-                       }
-                       byte_sz *= 2;
-                       byte_off = bit_off / 8 / byte_sz * byte_sz;
-               }
-       } else {
-               sz = btf__resolve_size(spec->btf, field_type_id);
-               if (sz < 0)
-                       return -EINVAL;
-               byte_sz = sz;
-               byte_off = spec->bit_offset / 8;
-               bit_sz = byte_sz * 8;
-       }
-
-       /* for bitfields, all the relocatable aspects are ambiguous and we
-        * might disagree with compiler, so turn off validation of expected
-        * value, except for signedness
-        */
-       if (validate)
-               *validate = !bitfield;
-
-       switch (relo->kind) {
-       case BPF_FIELD_BYTE_OFFSET:
-               *val = byte_off;
-               if (!bitfield) {
-                       *field_sz = byte_sz;
-                       *type_id = field_type_id;
-               }
-               break;
-       case BPF_FIELD_BYTE_SIZE:
-               *val = byte_sz;
-               break;
-       case BPF_FIELD_SIGNED:
-               /* enums will be assumed unsigned */
-               *val = btf_is_enum(mt) ||
-                      (btf_int_encoding(mt) & BTF_INT_SIGNED);
-               if (validate)
-                       *validate = true; /* signedness is never ambiguous */
-               break;
-       case BPF_FIELD_LSHIFT_U64:
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-               *val = 64 - (bit_off + bit_sz - byte_off  * 8);
-#else
-               *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
-#endif
-               break;
-       case BPF_FIELD_RSHIFT_U64:
-               *val = 64 - bit_sz;
-               if (validate)
-                       *validate = true; /* right shift is never ambiguous */
-               break;
-       case BPF_FIELD_EXISTS:
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
-                                  const struct bpf_core_spec *spec,
-                                  __u32 *val)
-{
-       __s64 sz;
-
-       /* type-based relos return zero when target type is not found */
-       if (!spec) {
-               *val = 0;
-               return 0;
-       }
-
-       switch (relo->kind) {
-       case BPF_TYPE_ID_TARGET:
-               *val = spec->root_type_id;
-               break;
-       case BPF_TYPE_EXISTS:
-               *val = 1;
-               break;
-       case BPF_TYPE_SIZE:
-               sz = btf__resolve_size(spec->btf, spec->root_type_id);
-               if (sz < 0)
-                       return -EINVAL;
-               *val = sz;
-               break;
-       case BPF_TYPE_ID_LOCAL:
-       /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
-                                     const struct bpf_core_spec *spec,
-                                     __u32 *val)
-{
-       const struct btf_type *t;
-       const struct btf_enum *e;
-
-       switch (relo->kind) {
-       case BPF_ENUMVAL_EXISTS:
-               *val = spec ? 1 : 0;
-               break;
-       case BPF_ENUMVAL_VALUE:
-               if (!spec)
-                       return -EUCLEAN; /* request instruction poisoning */
-               t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
-               e = btf_enum(t) + spec->spec[0].idx;
-               *val = e->val;
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-struct bpf_core_relo_res
-{
-       /* expected value in the instruction, unless validate == false */
-       __u32 orig_val;
-       /* new value that needs to be patched up to */
-       __u32 new_val;
-       /* relocation unsuccessful, poison instruction, but don't fail load */
-       bool poison;
-       /* some relocations can't be validated against orig_val */
-       bool validate;
-       /* for field byte offset relocations or the forms:
-        *     *(T *)(rX + <off>) = rY
-        *     rX = *(T *)(rY + <off>),
-        * we remember original and resolved field size to adjust direct
-        * memory loads of pointers and integers; this is necessary for 32-bit
-        * host kernel architectures, but also allows to automatically
-        * relocate fields that were resized from, e.g., u32 to u64, etc.
-        */
-       bool fail_memsz_adjust;
-       __u32 orig_sz;
-       __u32 orig_type_id;
-       __u32 new_sz;
-       __u32 new_type_id;
-};
-
-/* Calculate original and target relocation values, given local and target
- * specs and relocation kind. These values are calculated for each candidate.
- * If there are multiple candidates, resulting values should all be consistent
- * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
- * If instruction has to be poisoned, *poison will be set to true.
- */
-static int bpf_core_calc_relo(const struct bpf_program *prog,
-                             const struct bpf_core_relo *relo,
-                             int relo_idx,
-                             const struct bpf_core_spec *local_spec,
-                             const struct bpf_core_spec *targ_spec,
-                             struct bpf_core_relo_res *res)
-{
-       int err = -EOPNOTSUPP;
-
-       res->orig_val = 0;
-       res->new_val = 0;
-       res->poison = false;
-       res->validate = true;
-       res->fail_memsz_adjust = false;
-       res->orig_sz = res->new_sz = 0;
-       res->orig_type_id = res->new_type_id = 0;
-
-       if (core_relo_is_field_based(relo->kind)) {
-               err = bpf_core_calc_field_relo(prog, relo, local_spec,
-                                              &res->orig_val, &res->orig_sz,
-                                              &res->orig_type_id, &res->validate);
-               err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
-                                                     &res->new_val, &res->new_sz,
-                                                     &res->new_type_id, NULL);
-               if (err)
-                       goto done;
-               /* Validate if it's safe to adjust load/store memory size.
-                * Adjustments are performed only if original and new memory
-                * sizes differ.
-                */
-               res->fail_memsz_adjust = false;
-               if (res->orig_sz != res->new_sz) {
-                       const struct btf_type *orig_t, *new_t;
-
-                       orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
-                       new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
-
-                       /* There are two use cases in which it's safe to
-                        * adjust load/store's mem size:
-                        *   - reading a 32-bit kernel pointer, while on BPF
-                        *   size pointers are always 64-bit; in this case
-                        *   it's safe to "downsize" instruction size due to
-                        *   pointer being treated as unsigned integer with
-                        *   zero-extended upper 32-bits;
-                        *   - reading unsigned integers, again due to
-                        *   zero-extension is preserving the value correctly.
-                        *
-                        * In all other cases it's incorrect to attempt to
-                        * load/store field because read value will be
-                        * incorrect, so we poison relocated instruction.
-                        */
-                       if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
-                               goto done;
-                       if (btf_is_int(orig_t) && btf_is_int(new_t) &&
-                           btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
-                           btf_int_encoding(new_t) != BTF_INT_SIGNED)
-                               goto done;
-
-                       /* mark as invalid mem size adjustment, but this will
-                        * only be checked for LDX/STX/ST insns
-                        */
-                       res->fail_memsz_adjust = true;
-               }
-       } else if (core_relo_is_type_based(relo->kind)) {
-               err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
-               err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
-       } else if (core_relo_is_enumval_based(relo->kind)) {
-               err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
-               err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
-       }
-
-done:
-       if (err == -EUCLEAN) {
-               /* EUCLEAN is used to signal instruction poisoning request */
-               res->poison = true;
-               err = 0;
-       } else if (err == -EOPNOTSUPP) {
-               /* EOPNOTSUPP means unknown/unsupported relocation */
-               pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
-                       prog->name, relo_idx, core_relo_kind_str(relo->kind),
-                       relo->kind, relo->insn_off / 8);
-       }
-
-       return err;
-}
-
-/*
- * Turn instruction for which CO_RE relocation failed into invalid one with
- * distinct signature.
- */
-static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
-                                int insn_idx, struct bpf_insn *insn)
-{
-       pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
-                prog->name, relo_idx, insn_idx);
-       insn->code = BPF_JMP | BPF_CALL;
-       insn->dst_reg = 0;
-       insn->src_reg = 0;
-       insn->off = 0;
-       /* if this instruction is reachable (not a dead code),
-        * verifier will complain with the following message:
-        * invalid func unknown#195896080
-        */
-       insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
-}
-
-static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
-{
-       switch (BPF_SIZE(insn->code)) {
-       case BPF_DW: return 8;
-       case BPF_W: return 4;
-       case BPF_H: return 2;
-       case BPF_B: return 1;
-       default: return -1;
-       }
-}
-
-static int insn_bytes_to_bpf_size(__u32 sz)
-{
-       switch (sz) {
-       case 8: return BPF_DW;
-       case 4: return BPF_W;
-       case 2: return BPF_H;
-       case 1: return BPF_B;
-       default: return -1;
-       }
-}
-
-/*
- * Patch relocatable BPF instruction.
- *
- * Patched value is determined by relocation kind and target specification.
- * For existence relocations target spec will be NULL if field/type is not found.
- * Expected insn->imm value is determined using relocation kind and local
- * spec, and is checked before patching instruction. If actual insn->imm value
- * is wrong, bail out with error.
- *
- * Currently supported classes of BPF instruction are:
- * 1. rX = <imm> (assignment with immediate operand);
- * 2. rX += <imm> (arithmetic operations with immediate operand);
- * 3. rX = <imm64> (load with 64-bit immediate value);
- * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
- * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
- * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
- */
-static int bpf_core_patch_insn(struct bpf_program *prog,
-                              const struct bpf_core_relo *relo,
-                              int relo_idx,
-                              const struct bpf_core_relo_res *res)
-{
-       __u32 orig_val, new_val;
-       struct bpf_insn *insn;
-       int insn_idx;
-       __u8 class;
+static int bpf_core_apply_relo(struct bpf_program *prog,
+                              const struct bpf_core_relo *relo,
+                              int relo_idx,
+                              const struct btf *local_btf,
+                              struct hashmap *cand_cache)
+{
+       const void *type_key = u32_as_hash_key(relo->type_id);
+       struct bpf_core_cand_list *cands = NULL;
+       const char *prog_name = prog->name;
+       const struct btf_type *local_type;
+       const char *local_name;
+       __u32 local_id = relo->type_id;
+       struct bpf_insn *insn;
+       int insn_idx, err;
 
        if (relo->insn_off % BPF_INSN_SZ)
                return -EINVAL;
@@ -5944,259 +5132,10 @@ static int bpf_core_patch_insn(struct bpf_program *prog,
         * relocated, so it's enough to just subtract in-section offset
         */
        insn_idx = insn_idx - prog->sec_insn_off;
-       insn = &prog->insns[insn_idx];
-       class = BPF_CLASS(insn->code);
-
-       if (res->poison) {
-poison:
-               /* poison second part of ldimm64 to avoid confusing error from
-                * verifier about "unknown opcode 00"
-                */
-               if (is_ldimm64_insn(insn))
-                       bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
-               bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
-               return 0;
-       }
-
-       orig_val = res->orig_val;
-       new_val = res->new_val;
-
-       switch (class) {
-       case BPF_ALU:
-       case BPF_ALU64:
-               if (BPF_SRC(insn->code) != BPF_K)
-                       return -EINVAL;
-               if (res->validate && insn->imm != orig_val) {
-                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
-                               prog->name, relo_idx,
-                               insn_idx, insn->imm, orig_val, new_val);
-                       return -EINVAL;
-               }
-               orig_val = insn->imm;
-               insn->imm = new_val;
-               pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
-                        prog->name, relo_idx, insn_idx,
-                        orig_val, new_val);
-               break;
-       case BPF_LDX:
-       case BPF_ST:
-       case BPF_STX:
-               if (res->validate && insn->off != orig_val) {
-                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
-                               prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
-                       return -EINVAL;
-               }
-               if (new_val > SHRT_MAX) {
-                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
-                               prog->name, relo_idx, insn_idx, new_val);
-                       return -ERANGE;
-               }
-               if (res->fail_memsz_adjust) {
-                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
-                               "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
-                               prog->name, relo_idx, insn_idx);
-                       goto poison;
-               }
-
-               orig_val = insn->off;
-               insn->off = new_val;
-               pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
-                        prog->name, relo_idx, insn_idx, orig_val, new_val);
-
-               if (res->new_sz != res->orig_sz) {
-                       int insn_bytes_sz, insn_bpf_sz;
-
-                       insn_bytes_sz = insn_bpf_size_to_bytes(insn);
-                       if (insn_bytes_sz != res->orig_sz) {
-                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
-                                       prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
-                               return -EINVAL;
-                       }
-
-                       insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
-                       if (insn_bpf_sz < 0) {
-                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
-                                       prog->name, relo_idx, insn_idx, res->new_sz);
-                               return -EINVAL;
-                       }
-
-                       insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
-                       pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
-                                prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
-               }
-               break;
-       case BPF_LD: {
-               __u64 imm;
-
-               if (!is_ldimm64_insn(insn) ||
-                   insn[0].src_reg != 0 || insn[0].off != 0 ||
-                   insn_idx + 1 >= prog->insns_cnt ||
-                   insn[1].code != 0 || insn[1].dst_reg != 0 ||
-                   insn[1].src_reg != 0 || insn[1].off != 0) {
-                       pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
-                               prog->name, relo_idx, insn_idx);
-                       return -EINVAL;
-               }
-
-               imm = insn[0].imm + ((__u64)insn[1].imm << 32);
-               if (res->validate && imm != orig_val) {
-                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
-                               prog->name, relo_idx,
-                               insn_idx, (unsigned long long)imm,
-                               orig_val, new_val);
-                       return -EINVAL;
-               }
-
-               insn[0].imm = new_val;
-               insn[1].imm = 0; /* currently only 32-bit values are supported */
-               pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
-                        prog->name, relo_idx, insn_idx,
-                        (unsigned long long)imm, new_val);
-               break;
-       }
-       default:
-               pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
-                       prog->name, relo_idx, insn_idx, insn->code,
-                       insn->src_reg, insn->dst_reg, insn->off, insn->imm);
+       if (insn_idx > prog->insns_cnt)
                return -EINVAL;
-       }
-
-       return 0;
-}
-
-/* Output spec definition in the format:
- * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
- * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
- */
-static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
-{
-       const struct btf_type *t;
-       const struct btf_enum *e;
-       const char *s;
-       __u32 type_id;
-       int i;
-
-       type_id = spec->root_type_id;
-       t = btf__type_by_id(spec->btf, type_id);
-       s = btf__name_by_offset(spec->btf, t->name_off);
-
-       libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
-
-       if (core_relo_is_type_based(spec->relo_kind))
-               return;
-
-       if (core_relo_is_enumval_based(spec->relo_kind)) {
-               t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
-               e = btf_enum(t) + spec->raw_spec[0];
-               s = btf__name_by_offset(spec->btf, e->name_off);
-
-               libbpf_print(level, "::%s = %u", s, e->val);
-               return;
-       }
-
-       if (core_relo_is_field_based(spec->relo_kind)) {
-               for (i = 0; i < spec->len; i++) {
-                       if (spec->spec[i].name)
-                               libbpf_print(level, ".%s", spec->spec[i].name);
-                       else if (i > 0 || spec->spec[i].idx > 0)
-                               libbpf_print(level, "[%u]", spec->spec[i].idx);
-               }
-
-               libbpf_print(level, " (");
-               for (i = 0; i < spec->raw_len; i++)
-                       libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
-
-               if (spec->bit_offset % 8)
-                       libbpf_print(level, " @ offset %u.%u)",
-                                    spec->bit_offset / 8, spec->bit_offset % 8);
-               else
-                       libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
-               return;
-       }
-}
-
-static size_t bpf_core_hash_fn(const void *key, void *ctx)
-{
-       return (size_t)key;
-}
-
-static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
-{
-       return k1 == k2;
-}
-
-static void *u32_as_hash_key(__u32 x)
-{
-       return (void *)(uintptr_t)x;
-}
-
-/*
- * CO-RE relocate single instruction.
- *
- * The outline and important points of the algorithm:
- * 1. For given local type, find corresponding candidate target types.
- *    Candidate type is a type with the same "essential" name, ignoring
- *    everything after last triple underscore (___). E.g., `sample`,
- *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
- *    for each other. Names with triple underscore are referred to as
- *    "flavors" and are useful, among other things, to allow to
- *    specify/support incompatible variations of the same kernel struct, which
- *    might differ between different kernel versions and/or build
- *    configurations.
- *
- *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
- *    converter, when deduplicated BTF of a kernel still contains more than
- *    one different types with the same name. In that case, ___2, ___3, etc
- *    are appended starting from second name conflict. But start flavors are
- *    also useful to be defined "locally", in BPF program, to extract same
- *    data from incompatible changes between different kernel
- *    versions/configurations. For instance, to handle field renames between
- *    kernel versions, one can use two flavors of the struct name with the
- *    same common name and use conditional relocations to extract that field,
- *    depending on target kernel version.
- * 2. For each candidate type, try to match local specification to this
- *    candidate target type. Matching involves finding corresponding
- *    high-level spec accessors, meaning that all named fields should match,
- *    as well as all array accesses should be within the actual bounds. Also,
- *    types should be compatible (see bpf_core_fields_are_compat for details).
- * 3. It is supported and expected that there might be multiple flavors
- *    matching the spec. As long as all the specs resolve to the same set of
- *    offsets across all candidates, there is no error. If there is any
- *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
- *    imprefection of BTF deduplication, which can cause slight duplication of
- *    the same BTF type, if some directly or indirectly referenced (by
- *    pointer) type gets resolved to different actual types in different
- *    object files. If such situation occurs, deduplicated BTF will end up
- *    with two (or more) structurally identical types, which differ only in
- *    types they refer to through pointer. This should be OK in most cases and
- *    is not an error.
- * 4. Candidate types search is performed by linearly scanning through all
- *    types in target BTF. It is anticipated that this is overall more
- *    efficient memory-wise and not significantly worse (if not better)
- *    CPU-wise compared to prebuilding a map from all local type names to
- *    a list of candidate type names. It's also sped up by caching resolved
- *    list of matching candidates per each local "root" type ID, that has at
- *    least one bpf_core_relo associated with it. This list is shared
- *    between multiple relocations for the same type ID and is updated as some
- *    of the candidates are pruned due to structural incompatibility.
- */
-static int bpf_core_apply_relo(struct bpf_program *prog,
-                              const struct bpf_core_relo *relo,
-                              int relo_idx,
-                              const struct btf *local_btf,
-                              struct hashmap *cand_cache)
-{
-       struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
-       const void *type_key = u32_as_hash_key(relo->type_id);
-       struct bpf_core_relo_res cand_res, targ_res;
-       const struct btf_type *local_type;
-       const char *local_name;
-       struct core_cand_list *cands = NULL;
-       __u32 local_id;
-       const char *spec_str;
-       int i, j, err;
+       insn = &prog->insns[insn_idx];
 
-       local_id = relo->type_id;
        local_type = btf__type_by_id(local_btf, local_id);
        if (!local_type)
                return -EINVAL;
@@ -6205,51 +5144,19 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
        if (!local_name)
                return -EINVAL;
 
-       spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
-       if (str_is_empty(spec_str))
-               return -EINVAL;
-
        if (prog->obj->gen_loader) {
-               pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
+               pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n",
                        prog - prog->obj->programs, relo->insn_off / 8,
-                       local_name, spec_str, relo->kind);
+                       local_name, relo->kind);
                return -ENOTSUP;
        }
-       err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
-       if (err) {
-               pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
-                       prog->name, relo_idx, local_id, btf_kind_str(local_type),
-                       str_is_empty(local_name) ? "<anon>" : local_name,
-                       spec_str, err);
-               return -EINVAL;
-       }
-
-       pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
-                relo_idx, core_relo_kind_str(relo->kind), relo->kind);
-       bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
-       libbpf_print(LIBBPF_DEBUG, "\n");
-
-       /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
-       if (relo->kind == BPF_TYPE_ID_LOCAL) {
-               targ_res.validate = true;
-               targ_res.poison = false;
-               targ_res.orig_val = local_spec.root_type_id;
-               targ_res.new_val = local_spec.root_type_id;
-               goto patch_insn;
-       }
-
-       /* libbpf doesn't support candidate search for anonymous types */
-       if (str_is_empty(spec_str)) {
-               pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
-                       prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
-               return -EOPNOTSUPP;
-       }
 
-       if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
+       if (relo->kind != BPF_TYPE_ID_LOCAL &&
+           !hashmap__find(cand_cache, type_key, (void **)&cands)) {
                cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
                if (IS_ERR(cands)) {
                        pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
-                               prog->name, relo_idx, local_id, btf_kind_str(local_type),
+                               prog_name, relo_idx, local_id, btf_kind_str(local_type),
                                local_name, PTR_ERR(cands));
                        return PTR_ERR(cands);
                }
@@ -6260,97 +5167,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
                }
        }
 
-       for (i = 0, j = 0; i < cands->len; i++) {
-               err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
-                                         cands->cands[i].id, &cand_spec);
-               if (err < 0) {
-                       pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
-                               prog->name, relo_idx, i);
-                       bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
-                       libbpf_print(LIBBPF_WARN, ": %d\n", err);
-                       return err;
-               }
-
-               pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
-                        relo_idx, err == 0 ? "non-matching" : "matching", i);
-               bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
-               libbpf_print(LIBBPF_DEBUG, "\n");
-
-               if (err == 0)
-                       continue;
-
-               err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
-               if (err)
-                       return err;
-
-               if (j == 0) {
-                       targ_res = cand_res;
-                       targ_spec = cand_spec;
-               } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
-                       /* if there are many field relo candidates, they
-                        * should all resolve to the same bit offset
-                        */
-                       pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
-                               prog->name, relo_idx, cand_spec.bit_offset,
-                               targ_spec.bit_offset);
-                       return -EINVAL;
-               } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
-                       /* all candidates should result in the same relocation
-                        * decision and value, otherwise it's dangerous to
-                        * proceed due to ambiguity
-                        */
-                       pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
-                               prog->name, relo_idx,
-                               cand_res.poison ? "failure" : "success", cand_res.new_val,
-                               targ_res.poison ? "failure" : "success", targ_res.new_val);
-                       return -EINVAL;
-               }
-
-               cands->cands[j++] = cands->cands[i];
-       }
-
-       /*
-        * For BPF_FIELD_EXISTS relo or when used BPF program has field
-        * existence checks or kernel version/config checks, it's expected
-        * that we might not find any candidates. In this case, if field
-        * wasn't found in any candidate, the list of candidates shouldn't
-        * change at all, we'll just handle relocating appropriately,
-        * depending on relo's kind.
-        */
-       if (j > 0)
-               cands->len = j;
-
-       /*
-        * If no candidates were found, it might be both a programmer error,
-        * as well as expected case, depending whether instruction w/
-        * relocation is guarded in some way that makes it unreachable (dead
-        * code) if relocation can't be resolved. This is handled in
-        * bpf_core_patch_insn() uniformly by replacing that instruction with
-        * BPF helper call insn (using invalid helper ID). If that instruction
-        * is indeed unreachable, then it will be ignored and eliminated by
-        * verifier. If it was an error, then verifier will complain and point
-        * to a specific instruction number in its log.
-        */
-       if (j == 0) {
-               pr_debug("prog '%s': relo #%d: no matching targets found\n",
-                        prog->name, relo_idx);
-
-               /* calculate single target relo result explicitly */
-               err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
-               if (err)
-                       return err;
-       }
-
-patch_insn:
-       /* bpf_core_patch_insn() should know how to handle missing targ_spec */
-       err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
-       if (err) {
-               pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n",
-                       prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err);
-               return -EINVAL;
-       }
-
-       return 0;
+       return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands);
 }
 
 static int
@@ -6496,11 +5313,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
                                }
                                insn[1].imm = ext->kcfg.data_off;
                        } else /* EXT_KSYM */ {
-                               if (ext->ksym.type_id) { /* typed ksyms */
+                               if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
                                        insn[0].src_reg = BPF_PSEUDO_BTF_ID;
                                        insn[0].imm = ext->ksym.kernel_btf_id;
                                        insn[1].imm = ext->ksym.kernel_btf_obj_fd;
-                               } else { /* typeless ksyms */
+                               } else { /* typeless ksyms or unresolved typed ksyms */
                                        insn[0].imm = (__u32)ext->ksym.addr;
                                        insn[1].imm = ext->ksym.addr >> 32;
                                }
@@ -7190,7 +6007,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
 
        for (i = 0; i < obj->nr_programs; i++) {
                struct bpf_program *p = &obj->programs[i];
-               
+
                if (!p->nr_reloc)
                        continue;
 
@@ -7554,7 +6371,7 @@ static struct bpf_object *
 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
                   const struct bpf_object_open_opts *opts)
 {
-       const char *obj_name, *kconfig;
+       const char *obj_name, *kconfig, *btf_tmp_path;
        struct bpf_program *prog;
        struct bpf_object *obj;
        char tmp_name[64];
@@ -7585,11 +6402,26 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
        if (IS_ERR(obj))
                return obj;
 
+       btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
+       if (btf_tmp_path) {
+               if (strlen(btf_tmp_path) >= PATH_MAX) {
+                       err = -ENAMETOOLONG;
+                       goto out;
+               }
+               obj->btf_custom_path = strdup(btf_tmp_path);
+               if (!obj->btf_custom_path) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+       }
+
        kconfig = OPTS_GET(opts, kconfig, NULL);
        if (kconfig) {
                obj->kconfig = strdup(kconfig);
-               if (!obj->kconfig)
-                       return ERR_PTR(-ENOMEM);
+               if (!obj->kconfig) {
+                       err = -ENOMEM;
+                       goto out;
+               }
        }
 
        err = bpf_object__elf_init(obj);
@@ -7812,11 +6644,8 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
                                break;
                }
        }
-       if (id <= 0) {
-               pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
-                       __btf_kind_str(kind), ksym_name);
+       if (id <= 0)
                return -ESRCH;
-       }
 
        *res_btf = btf;
        *res_btf_fd = btf_fd;
@@ -7833,8 +6662,13 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
        struct btf *btf = NULL;
 
        id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd);
-       if (id < 0)
+       if (id == -ESRCH && ext->is_weak) {
+               return 0;
+       } else if (id < 0) {
+               pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
+                       ext->name);
                return id;
+       }
 
        /* find local type_id */
        local_type_id = ext->ksym.type_id;
@@ -8055,7 +6889,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
        err = err ? : bpf_object__sanitize_maps(obj);
        err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
        err = err ? : bpf_object__create_maps(obj);
-       err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
+       err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);
        err = err ? : bpf_object__load_progs(obj, attr->log_level);
 
        if (obj->gen_loader) {
@@ -8450,6 +7284,11 @@ const char *bpf_map__get_pin_path(const struct bpf_map *map)
        return map->pin_path;
 }
 
+const char *bpf_map__pin_path(const struct bpf_map *map)
+{
+       return map->pin_path;
+}
+
 bool bpf_map__is_pinned(const struct bpf_map *map)
 {
        return map->pinned;
@@ -8702,6 +7541,7 @@ void bpf_object__close(struct bpf_object *obj)
        for (i = 0; i < obj->nr_maps; i++)
                bpf_map__destroy(&obj->maps[i]);
 
+       zfree(&obj->btf_custom_path);
        zfree(&obj->kconfig);
        zfree(&obj->externs);
        obj->nr_extern = 0;
@@ -9471,7 +8311,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
        ret = snprintf(btf_type_name, sizeof(btf_type_name),
                       "%s%s", prefix, name);
        /* snprintf returns the number of characters written excluding the
-        * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
+        * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
         * indicates truncation.
         */
        if (ret < 0 || ret >= sizeof(btf_type_name))
@@ -9495,7 +8335,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
        struct btf *btf;
        int err;
 
-       btf = libbpf_find_kernel_btf();
+       btf = btf__load_vmlinux_btf();
        err = libbpf_get_error(btf);
        if (err) {
                pr_warn("vmlinux BTF is not found\n");
@@ -9514,8 +8354,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
 {
        struct bpf_prog_info_linear *info_linear;
        struct bpf_prog_info *info;
-       struct btf *btf = NULL;
-       int err = -EINVAL;
+       struct btf *btf;
+       int err;
 
        info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
        err = libbpf_get_error(info_linear);
@@ -9524,12 +8364,15 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
                        attach_prog_fd);
                return err;
        }
+
+       err = -EINVAL;
        info = &info_linear->info;
        if (!info->btf_id) {
                pr_warn("The target program doesn't have BTF\n");
                goto out;
        }
-       if (btf__get_from_id(info->btf_id, &btf)) {
+       btf = btf__load_from_kernel_by_id(info->btf_id);
+       if (libbpf_get_error(btf)) {
                pr_warn("Failed to get BTF of the program\n");
                goto out;
        }
@@ -10003,7 +8846,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 
 struct bpf_link {
        int (*detach)(struct bpf_link *link);
-       int (*destroy)(struct bpf_link *link);
+       void (*dealloc)(struct bpf_link *link);
        char *pin_path;         /* NULL, if not pinned */
        int fd;                 /* hook FD, -1 if not applicable */
        bool disconnected;
@@ -10013,7 +8856,7 @@ struct bpf_link {
 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
 {
        int ret;
-       
+
        ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
        return libbpf_err_errno(ret);
 }
@@ -10042,11 +8885,12 @@ int bpf_link__destroy(struct bpf_link *link)
 
        if (!link->disconnected && link->detach)
                err = link->detach(link);
-       if (link->destroy)
-               link->destroy(link);
        if (link->pin_path)
                free(link->pin_path);
-       free(link);
+       if (link->dealloc)
+               link->dealloc(link);
+       else
+               free(link);
 
        return libbpf_err(err);
 }
@@ -10143,23 +8987,42 @@ int bpf_link__unpin(struct bpf_link *link)
        return 0;
 }
 
-static int bpf_link__detach_perf_event(struct bpf_link *link)
+struct bpf_link_perf {
+       struct bpf_link link;
+       int perf_event_fd;
+};
+
+static int bpf_link_perf_detach(struct bpf_link *link)
 {
-       int err;
+       struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+       int err = 0;
 
-       err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
-       if (err)
+       if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
                err = -errno;
 
+       if (perf_link->perf_event_fd != link->fd)
+               close(perf_link->perf_event_fd);
        close(link->fd);
+
        return libbpf_err(err);
 }
 
-struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
+static void bpf_link_perf_dealloc(struct bpf_link *link)
+{
+       struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+
+       free(perf_link);
+}
+
+struct bpf_link *bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd,
+                                                    const struct bpf_perf_event_opts *opts)
 {
        char errmsg[STRERR_BUFSIZE];
-       struct bpf_link *link;
-       int prog_fd, err;
+       struct bpf_link_perf *link;
+       int prog_fd, link_fd = -1, err;
+
+       if (!OPTS_VALID(opts, bpf_perf_event_opts))
+               return libbpf_err_ptr(-EINVAL);
 
        if (pfd < 0) {
                pr_warn("prog '%s': invalid perf event FD %d\n",
@@ -10176,27 +9039,59 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pf
        link = calloc(1, sizeof(*link));
        if (!link)
                return libbpf_err_ptr(-ENOMEM);
-       link->detach = &bpf_link__detach_perf_event;
-       link->fd = pfd;
+       link->link.detach = &bpf_link_perf_detach;
+       link->link.dealloc = &bpf_link_perf_dealloc;
+       link->perf_event_fd = pfd;
 
-       if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
-               err = -errno;
-               free(link);
-               pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
-                       prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               if (err == -EPROTO)
-                       pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
-                               prog->name, pfd);
-               return libbpf_err_ptr(err);
+       if (kernel_supports(prog->obj, FEAT_PERF_LINK)) {
+               DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
+                       .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
+
+               link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
+               if (link_fd < 0) {
+                       err = -errno;
+                       pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
+                               prog->name, pfd,
+                               err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+                       goto err_out;
+               }
+               link->link.fd = link_fd;
+       } else {
+               if (OPTS_GET(opts, bpf_cookie, 0)) {
+                       pr_warn("prog '%s': user context value is not supported\n", prog->name);
+                       err = -EOPNOTSUPP;
+                       goto err_out;
+               }
+
+               if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
+                       err = -errno;
+                       pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
+                               prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+                       if (err == -EPROTO)
+                               pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+                                       prog->name, pfd);
+                       goto err_out;
+               }
+               link->link.fd = pfd;
        }
        if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
                err = -errno;
-               free(link);
-               pr_warn("prog '%s': failed to enable pfd %d: %s\n",
+               pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
                        prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               return libbpf_err_ptr(err);
+               goto err_out;
        }
-       return link;
+
+       return &link->link;
+err_out:
+       if (link_fd >= 0)
+               close(link_fd);
+       free(link);
+       return libbpf_err_ptr(err);
+}
+
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
+{
+       return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
 }
 
 /*
@@ -10257,13 +9152,19 @@ static int determine_uprobe_retprobe_bit(void)
        return parse_uint_from_file(file, "config:%d\n");
 }
 
+#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
+#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
+
 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
-                                uint64_t offset, int pid)
+                                uint64_t offset, int pid, size_t ref_ctr_off)
 {
        struct perf_event_attr attr = {};
        char errmsg[STRERR_BUFSIZE];
        int type, pfd, err;
 
+       if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
+               return -EINVAL;
+
        type = uprobe ? determine_uprobe_perf_type()
                      : determine_kprobe_perf_type();
        if (type < 0) {
@@ -10286,6 +9187,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
        }
        attr.size = sizeof(attr);
        attr.type = type;
+       attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
        attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
        attr.config2 = offset;           /* kprobe_addr or probe_offset */
 
@@ -10304,23 +9206,34 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
        return pfd;
 }
 
-struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
-                                           bool retprobe,
-                                           const char *func_name)
+struct bpf_link *
+bpf_program__attach_kprobe_opts(struct bpf_program *prog,
+                               const char *func_name,
+                               const struct bpf_kprobe_opts *opts)
 {
+       DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
        char errmsg[STRERR_BUFSIZE];
        struct bpf_link *link;
+       unsigned long offset;
+       bool retprobe;
        int pfd, err;
 
+       if (!OPTS_VALID(opts, bpf_kprobe_opts))
+               return libbpf_err_ptr(-EINVAL);
+
+       retprobe = OPTS_GET(opts, retprobe, false);
+       offset = OPTS_GET(opts, offset, 0);
+       pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
        pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
-                                   0 /* offset */, -1 /* pid */);
+                                   offset, -1 /* pid */, 0 /* ref_ctr_off */);
        if (pfd < 0) {
                pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
                        prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
                        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
                return libbpf_err_ptr(pfd);
        }
-       link = bpf_program__attach_perf_event(prog, pfd);
+       link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
        err = libbpf_get_error(link);
        if (err) {
                close(pfd);
@@ -10332,29 +9245,70 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
        return link;
 }
 
+struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
+                                           bool retprobe,
+                                           const char *func_name)
+{
+       DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
+               .retprobe = retprobe,
+       );
+
+       return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
+}
+
 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
                                      struct bpf_program *prog)
 {
+       DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
+       unsigned long offset = 0;
+       struct bpf_link *link;
        const char *func_name;
-       bool retprobe;
+       char *func;
+       int n, err;
 
        func_name = prog->sec_name + sec->len;
-       retprobe = strcmp(sec->sec, "kretprobe/") == 0;
+       opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0;
 
-       return bpf_program__attach_kprobe(prog, retprobe, func_name);
+       n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
+       if (n < 1) {
+               err = -EINVAL;
+               pr_warn("kprobe name is invalid: %s\n", func_name);
+               return libbpf_err_ptr(err);
+       }
+       if (opts.retprobe && offset != 0) {
+               free(func);
+               err = -EINVAL;
+               pr_warn("kretprobes do not support offset specification\n");
+               return libbpf_err_ptr(err);
+       }
+
+       opts.offset = offset;
+       link = bpf_program__attach_kprobe_opts(prog, func, &opts);
+       free(func);
+       return link;
 }
 
-struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
-                                           bool retprobe, pid_t pid,
-                                           const char *binary_path,
-                                           size_t func_offset)
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid,
+                               const char *binary_path, size_t func_offset,
+                               const struct bpf_uprobe_opts *opts)
 {
+       DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
        char errmsg[STRERR_BUFSIZE];
        struct bpf_link *link;
+       size_t ref_ctr_off;
        int pfd, err;
+       bool retprobe;
+
+       if (!OPTS_VALID(opts, bpf_uprobe_opts))
+               return libbpf_err_ptr(-EINVAL);
 
-       pfd = perf_event_open_probe(true /* uprobe */, retprobe,
-                                   binary_path, func_offset, pid);
+       retprobe = OPTS_GET(opts, retprobe, false);
+       ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
+       pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
+       pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
+                                   func_offset, pid, ref_ctr_off);
        if (pfd < 0) {
                pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
                        prog->name, retprobe ? "uretprobe" : "uprobe",
@@ -10362,7 +9316,7 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
                        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
                return libbpf_err_ptr(pfd);
        }
-       link = bpf_program__attach_perf_event(prog, pfd);
+       link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
        err = libbpf_get_error(link);
        if (err) {
                close(pfd);
@@ -10375,6 +9329,16 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
        return link;
 }
 
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
+                                           bool retprobe, pid_t pid,
+                                           const char *binary_path,
+                                           size_t func_offset)
+{
+       DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
+
+       return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
+}
+
 static int determine_tracepoint_id(const char *tp_category,
                                   const char *tp_name)
 {
@@ -10425,14 +9389,21 @@ static int perf_event_open_tracepoint(const char *tp_category,
        return pfd;
 }
 
-struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
-                                               const char *tp_category,
-                                               const char *tp_name)
+struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog,
+                                                    const char *tp_category,
+                                                    const char *tp_name,
+                                                    const struct bpf_tracepoint_opts *opts)
 {
+       DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
        char errmsg[STRERR_BUFSIZE];
        struct bpf_link *link;
        int pfd, err;
 
+       if (!OPTS_VALID(opts, bpf_tracepoint_opts))
+               return libbpf_err_ptr(-EINVAL);
+
+       pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
        pfd = perf_event_open_tracepoint(tp_category, tp_name);
        if (pfd < 0) {
                pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
@@ -10440,7 +9411,7 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
                        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
                return libbpf_err_ptr(pfd);
        }
-       link = bpf_program__attach_perf_event(prog, pfd);
+       link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
        err = libbpf_get_error(link);
        if (err) {
                close(pfd);
@@ -10452,6 +9423,13 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
        return link;
 }
 
+struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
+                                               const char *tp_category,
+                                               const char *tp_name)
+{
+       return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
+}
+
 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
                                  struct bpf_program *prog)
 {
index 6e61342..f177d89 100644 (file)
@@ -94,8 +94,15 @@ struct bpf_object_open_opts {
         * system Kconfig for CONFIG_xxx externs.
         */
        const char *kconfig;
+       /* Path to the custom BTF to be used for BPF CO-RE relocations.
+        * This custom BTF completely replaces the use of vmlinux BTF
+        * for the purpose of CO-RE relocations.
+        * NOTE: any other BPF feature (e.g., fentry/fexit programs,
+        * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux.
+        */
+       const char *btf_custom_path;
 };
-#define bpf_object_open_opts__last_field kconfig
+#define bpf_object_open_opts__last_field btf_custom_path
 
 LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
 LIBBPF_API struct bpf_object *
@@ -237,20 +244,86 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
 
 LIBBPF_API struct bpf_link *
 bpf_program__attach(struct bpf_program *prog);
+
+struct bpf_perf_event_opts {
+       /* size of this struct, for forward/backward compatiblity */
+       size_t sz;
+       /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+       __u64 bpf_cookie;
+};
+#define bpf_perf_event_opts__last_field bpf_cookie
+
 LIBBPF_API struct bpf_link *
 bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd,
+                                   const struct bpf_perf_event_opts *opts);
+
+struct bpf_kprobe_opts {
+       /* size of this struct, for forward/backward compatiblity */
+       size_t sz;
+       /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+       __u64 bpf_cookie;
+       /* function's offset to install kprobe to */
+       unsigned long offset;
+       /* kprobe is return probe */
+       bool retprobe;
+       size_t :0;
+};
+#define bpf_kprobe_opts__last_field retprobe
+
 LIBBPF_API struct bpf_link *
 bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
                           const char *func_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe_opts(struct bpf_program *prog,
+                                const char *func_name,
+                                const struct bpf_kprobe_opts *opts);
+
+struct bpf_uprobe_opts {
+       /* size of this struct, for forward/backward compatiblity */
+       size_t sz;
+       /* offset of kernel reference counted USDT semaphore, added in
+        * a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
+        */
+       size_t ref_ctr_offset;
+       /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+       __u64 bpf_cookie;
+       /* uprobe is return probe, invoked at function return time */
+       bool retprobe;
+       size_t :0;
+};
+#define bpf_uprobe_opts__last_field retprobe
+
 LIBBPF_API struct bpf_link *
 bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
                           pid_t pid, const char *binary_path,
                           size_t func_offset);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid,
+                               const char *binary_path, size_t func_offset,
+                               const struct bpf_uprobe_opts *opts);
+
+struct bpf_tracepoint_opts {
+       /* size of this struct, for forward/backward compatiblity */
+       size_t sz;
+       /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+       __u64 bpf_cookie;
+};
+#define bpf_tracepoint_opts__last_field bpf_cookie
+
 LIBBPF_API struct bpf_link *
 bpf_program__attach_tracepoint(struct bpf_program *prog,
                               const char *tp_category,
                               const char *tp_name);
 LIBBPF_API struct bpf_link *
+bpf_program__attach_tracepoint_opts(struct bpf_program *prog,
+                                   const char *tp_category,
+                                   const char *tp_name,
+                                   const struct bpf_tracepoint_opts *opts);
+
+LIBBPF_API struct bpf_link *
 bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
                                   const char *tp_name);
 LIBBPF_API struct bpf_link *
@@ -477,6 +550,7 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
 LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
 LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map);
 LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
 LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
 LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
index 944c99d..bbc53bb 100644 (file)
@@ -371,7 +371,18 @@ LIBBPF_0.4.0 {
 LIBBPF_0.5.0 {
        global:
                bpf_map__initial_value;
+               bpf_map__pin_path;
                bpf_map_lookup_and_delete_elem_flags;
+               bpf_program__attach_kprobe_opts;
+               bpf_program__attach_perf_event_opts;
+               bpf_program__attach_tracepoint_opts;
+               bpf_program__attach_uprobe_opts;
                bpf_object__gen_loader;
+               btf__load_from_kernel_by_id;
+               btf__load_from_kernel_by_id_split;
+               btf__load_into_kernel;
+               btf__load_module_btf;
+               btf__load_vmlinux_btf;
+               btf_dump__dump_type_data;
                libbpf_set_strict_mode;
 } LIBBPF_0.4.0;
index 016ca7c..533b021 100644 (file)
@@ -14,6 +14,7 @@
 #include <errno.h>
 #include <linux/err.h>
 #include "libbpf_legacy.h"
+#include "relo_core.h"
 
 /* make sure libbpf doesn't use kernel-only integer typedefs */
 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -195,6 +196,17 @@ void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
                     size_t cur_cnt, size_t max_cnt, size_t add_cnt);
 int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
 
+static inline bool libbpf_is_mem_zeroed(const char *p, ssize_t len)
+{
+       while (len > 0) {
+               if (*p)
+                       return false;
+               p++;
+               len--;
+       }
+       return true;
+}
+
 static inline bool libbpf_validate_opts(const char *opts,
                                        size_t opts_sz, size_t user_sz,
                                        const char *type_name)
@@ -203,16 +215,9 @@ static inline bool libbpf_validate_opts(const char *opts,
                pr_warn("%s size (%zu) is too small\n", type_name, user_sz);
                return false;
        }
-       if (user_sz > opts_sz) {
-               size_t i;
-
-               for (i = opts_sz; i < user_sz; i++) {
-                       if (opts[i]) {
-                               pr_warn("%s has non-zero extra bytes\n",
-                                       type_name);
-                               return false;
-                       }
-               }
+       if (!libbpf_is_mem_zeroed(opts + opts_sz, (ssize_t)user_sz - opts_sz)) {
+               pr_warn("%s has non-zero extra bytes\n", type_name);
+               return false;
        }
        return true;
 }
@@ -232,6 +237,14 @@ static inline bool libbpf_validate_opts(const char *opts,
                        (opts)->field = value;  \
        } while (0)
 
+#define OPTS_ZEROED(opts, last_nonzero_field)                                \
+({                                                                           \
+       ssize_t __off = offsetofend(typeof(*(opts)), last_nonzero_field);     \
+       !(opts) || libbpf_is_mem_zeroed((const void *)opts + __off,           \
+                                       (opts)->sz - __off);                  \
+})
+
+
 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
@@ -366,76 +379,6 @@ struct bpf_line_info_min {
        __u32   line_col;
 };
 
-/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
- * has to be adjusted by relocations.
- */
-enum bpf_core_relo_kind {
-       BPF_FIELD_BYTE_OFFSET = 0,      /* field byte offset */
-       BPF_FIELD_BYTE_SIZE = 1,        /* field size in bytes */
-       BPF_FIELD_EXISTS = 2,           /* field existence in target kernel */
-       BPF_FIELD_SIGNED = 3,           /* field signedness (0 - unsigned, 1 - signed) */
-       BPF_FIELD_LSHIFT_U64 = 4,       /* bitfield-specific left bitshift */
-       BPF_FIELD_RSHIFT_U64 = 5,       /* bitfield-specific right bitshift */
-       BPF_TYPE_ID_LOCAL = 6,          /* type ID in local BPF object */
-       BPF_TYPE_ID_TARGET = 7,         /* type ID in target kernel */
-       BPF_TYPE_EXISTS = 8,            /* type existence in target kernel */
-       BPF_TYPE_SIZE = 9,              /* type size in bytes */
-       BPF_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
-       BPF_ENUMVAL_VALUE = 11,         /* enum value integer value */
-};
-
-/* The minimum bpf_core_relo checked by the loader
- *
- * CO-RE relocation captures the following data:
- * - insn_off - instruction offset (in bytes) within a BPF program that needs
- *   its insn->imm field to be relocated with actual field info;
- * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- *   type or field;
- * - access_str_off - offset into corresponding .BTF string section. String
- *   interpretation depends on specific relocation kind:
- *     - for field-based relocations, string encodes an accessed field using
- *     a sequence of field and array indices, separated by colon (:). It's
- *     conceptually very close to LLVM's getelementptr ([0]) instruction's
- *     arguments for identifying offset to a field.
- *     - for type-based relocations, strings is expected to be just "0";
- *     - for enum value-based relocations, string contains an index of enum
- *     value within its enum type;
- *
- * Example to provide a better feel.
- *
- *   struct sample {
- *       int a;
- *       struct {
- *           int b[10];
- *       };
- *   };
- *
- *   struct sample *s = ...;
- *   int x = &s->a;     // encoded as "0:0" (a is field #0)
- *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1, 
- *                      // b is field #0 inside anon struct, accessing elem #5)
- *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
- *
- * type_id for all relocs in this example  will capture BTF type id of
- * `struct sample`.
- *
- * Such relocation is emitted when using __builtin_preserve_access_index()
- * Clang built-in, passing expression that captures field address, e.g.:
- *
- * bpf_probe_read(&dst, sizeof(dst),
- *               __builtin_preserve_access_index(&src->a.b.c));
- *
- * In this case Clang will emit field relocation recording necessary data to
- * be able to find offset of embedded `a.b.c` field within `src` struct.
- *
- *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
- */
-struct bpf_core_relo {
-       __u32   insn_off;
-       __u32   type_id;
-       __u32   access_str_off;
-       enum bpf_core_relo_kind kind;
-};
 
 typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx);
 typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx);
@@ -494,4 +437,14 @@ static inline void *libbpf_ptr(void *ret)
        return ret;
 }
 
+static inline bool str_is_empty(const char *s)
+{
+       return !s || !s[0];
+}
+
+static inline bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+       return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
new file mode 100644 (file)
index 0000000..4016ed4
--- /dev/null
@@ -0,0 +1,1295 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2019 Facebook */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+#include <linux/err.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
+#include "libbpf_internal.h"
+
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+       __u32 type_id;          /* struct/union type or array element type */
+       __u32 idx;              /* field index or array index */
+       const char *name;       /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+       const struct btf *btf;
+       /* high-level spec: named fields and array indices only */
+       struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+       /* original unresolved (no skip_mods_or_typedefs) root type ID */
+       __u32 root_type_id;
+       /* CO-RE relocation kind */
+       enum bpf_core_relo_kind relo_kind;
+       /* high-level spec length */
+       int len;
+       /* raw, low-level spec: 1-to-1 with accessor spec string */
+       int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+       /* raw spec length */
+       int raw_len;
+       /* field bit offset represented by spec */
+       __u32 bit_offset;
+};
+
+static bool is_flex_arr(const struct btf *btf,
+                       const struct bpf_core_accessor *acc,
+                       const struct btf_array *arr)
+{
+       const struct btf_type *t;
+
+       /* not a flexible array, if not inside a struct or has non-zero size */
+       if (!acc->name || arr->nelems > 0)
+               return false;
+
+       /* has to be the last member of enclosing struct */
+       t = btf__type_by_id(btf, acc->type_id);
+       return acc->idx == btf_vlen(t) - 1;
+}
+
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_FIELD_BYTE_OFFSET: return "byte_off";
+       case BPF_FIELD_BYTE_SIZE: return "byte_sz";
+       case BPF_FIELD_EXISTS: return "field_exists";
+       case BPF_FIELD_SIGNED: return "signed";
+       case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
+       case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+       case BPF_TYPE_ID_LOCAL: return "local_type_id";
+       case BPF_TYPE_ID_TARGET: return "target_type_id";
+       case BPF_TYPE_EXISTS: return "type_exists";
+       case BPF_TYPE_SIZE: return "type_size";
+       case BPF_ENUMVAL_EXISTS: return "enumval_exists";
+       case BPF_ENUMVAL_VALUE: return "enumval_value";
+       default: return "unknown";
+       }
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_FIELD_BYTE_OFFSET:
+       case BPF_FIELD_BYTE_SIZE:
+       case BPF_FIELD_EXISTS:
+       case BPF_FIELD_SIGNED:
+       case BPF_FIELD_LSHIFT_U64:
+       case BPF_FIELD_RSHIFT_U64:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_TYPE_ID_LOCAL:
+       case BPF_TYPE_ID_TARGET:
+       case BPF_TYPE_EXISTS:
+       case BPF_TYPE_SIZE:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_ENUMVAL_EXISTS:
+       case BPF_ENUMVAL_VALUE:
+               return true;
+       default:
+               return false;
+       }
+}
+
+/*
+ * Turn bpf_core_relo into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field bit offset, specified by accessor string. Low-level spec captures
+ * every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ *   struct sample {
+ *       int __unimportant;
+ *       struct {
+ *           int __1;
+ *           int __2;
+ *           int a[7];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *
+ *   int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ *   - field 'a' access (corresponds to '2' in low-level spec);
+ *   - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
+ */
+static int bpf_core_parse_spec(const struct btf *btf,
+                              __u32 type_id,
+                              const char *spec_str,
+                              enum bpf_core_relo_kind relo_kind,
+                              struct bpf_core_spec *spec)
+{
+       int access_idx, parsed_len, i;
+       struct bpf_core_accessor *acc;
+       const struct btf_type *t;
+       const char *name;
+       __u32 id;
+       __s64 sz;
+
+       if (str_is_empty(spec_str) || *spec_str == ':')
+               return -EINVAL;
+
+       memset(spec, 0, sizeof(*spec));
+       spec->btf = btf;
+       spec->root_type_id = type_id;
+       spec->relo_kind = relo_kind;
+
+       /* type-based relocations don't have a field access string */
+       if (core_relo_is_type_based(relo_kind)) {
+               if (strcmp(spec_str, "0"))
+                       return -EINVAL;
+               return 0;
+       }
+
+       /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+       while (*spec_str) {
+               if (*spec_str == ':')
+                       ++spec_str;
+               if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+                       return -EINVAL;
+               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+                       return -E2BIG;
+               spec_str += parsed_len;
+               spec->raw_spec[spec->raw_len++] = access_idx;
+       }
+
+       if (spec->raw_len == 0)
+               return -EINVAL;
+
+       t = skip_mods_and_typedefs(btf, type_id, &id);
+       if (!t)
+               return -EINVAL;
+
+       access_idx = spec->raw_spec[0];
+       acc = &spec->spec[0];
+       acc->type_id = id;
+       acc->idx = access_idx;
+       spec->len++;
+
+       if (core_relo_is_enumval_based(relo_kind)) {
+               if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+                       return -EINVAL;
+
+               /* record enumerator name in a first accessor */
+               acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+               return 0;
+       }
+
+       if (!core_relo_is_field_based(relo_kind))
+               return -EINVAL;
+
+       sz = btf__resolve_size(btf, id);
+       if (sz < 0)
+               return sz;
+       spec->bit_offset = access_idx * sz * 8;
+
+       for (i = 1; i < spec->raw_len; i++) {
+               t = skip_mods_and_typedefs(btf, id, &id);
+               if (!t)
+                       return -EINVAL;
+
+               access_idx = spec->raw_spec[i];
+               acc = &spec->spec[spec->len];
+
+               if (btf_is_composite(t)) {
+                       const struct btf_member *m;
+                       __u32 bit_offset;
+
+                       if (access_idx >= btf_vlen(t))
+                               return -EINVAL;
+
+                       bit_offset = btf_member_bit_offset(t, access_idx);
+                       spec->bit_offset += bit_offset;
+
+                       m = btf_members(t) + access_idx;
+                       if (m->name_off) {
+                               name = btf__name_by_offset(btf, m->name_off);
+                               if (str_is_empty(name))
+                                       return -EINVAL;
+
+                               acc->type_id = id;
+                               acc->idx = access_idx;
+                               acc->name = name;
+                               spec->len++;
+                       }
+
+                       id = m->type;
+               } else if (btf_is_array(t)) {
+                       const struct btf_array *a = btf_array(t);
+                       bool flex;
+
+                       t = skip_mods_and_typedefs(btf, a->type, &id);
+                       if (!t)
+                               return -EINVAL;
+
+                       flex = is_flex_arr(btf, acc - 1, a);
+                       if (!flex && access_idx >= a->nelems)
+                               return -EINVAL;
+
+                       spec->spec[spec->len].type_id = id;
+                       spec->spec[spec->len].idx = access_idx;
+                       spec->len++;
+
+                       sz = btf__resolve_size(btf, id);
+                       if (sz < 0)
+                               return sz;
+                       spec->bit_offset += access_idx * sz * 8;
+               } else {
+                       pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+                               type_id, spec_str, i, id, btf_kind_str(t));
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
+ *   - any two STRUCTs/UNIONs are compatible and can be mixed;
+ *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
+ *   - any two PTRs are always compatible;
+ *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
+ *     least one of enums should be anonymous;
+ *   - for ENUMs, check sizes, names are ignored;
+ *   - for INT, size and signedness are ignored;
+ *   - any two FLOATs are always compatible;
+ *   - for ARRAY, dimensionality is ignored, element types are checked for
+ *     compatibility recursively;
+ *   - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+                                     __u32 local_id,
+                                     const struct btf *targ_btf,
+                                     __u32 targ_id)
+{
+       const struct btf_type *local_type, *targ_type;
+
+recur:
+       local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+       if (!local_type || !targ_type)
+               return -EINVAL;
+
+       if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+               return 1;
+       if (btf_kind(local_type) != btf_kind(targ_type))
+               return 0;
+
+       switch (btf_kind(local_type)) {
+       case BTF_KIND_PTR:
+       case BTF_KIND_FLOAT:
+               return 1;
+       case BTF_KIND_FWD:
+       case BTF_KIND_ENUM: {
+               const char *local_name, *targ_name;
+               size_t local_len, targ_len;
+
+               local_name = btf__name_by_offset(local_btf,
+                                                local_type->name_off);
+               targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
+               local_len = bpf_core_essential_name_len(local_name);
+               targ_len = bpf_core_essential_name_len(targ_name);
+               /* one of them is anonymous or both w/ same flavor-less names */
+               return local_len == 0 || targ_len == 0 ||
+                      (local_len == targ_len &&
+                       strncmp(local_name, targ_name, local_len) == 0);
+       }
+       case BTF_KIND_INT:
+               /* just reject deprecated bitfield-like integers; all other
+                * integers are by default compatible between each other
+                */
+               return btf_int_offset(local_type) == 0 &&
+                      btf_int_offset(targ_type) == 0;
+       case BTF_KIND_ARRAY:
+               local_id = btf_array(local_type)->type;
+               targ_id = btf_array(targ_type)->type;
+               goto recur;
+       default:
+               pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
+                       btf_kind(local_type), local_id, targ_id);
+               return 0;
+       }
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * bit offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+                                const struct bpf_core_accessor *local_acc,
+                                const struct btf *targ_btf,
+                                __u32 targ_id,
+                                struct bpf_core_spec *spec,
+                                __u32 *next_targ_id)
+{
+       const struct btf_type *local_type, *targ_type;
+       const struct btf_member *local_member, *m;
+       const char *local_name, *targ_name;
+       __u32 local_id;
+       int i, n, found;
+
+       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+       if (!targ_type)
+               return -EINVAL;
+       if (!btf_is_composite(targ_type))
+               return 0;
+
+       local_id = local_acc->type_id;
+       local_type = btf__type_by_id(local_btf, local_id);
+       local_member = btf_members(local_type) + local_acc->idx;
+       local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+       n = btf_vlen(targ_type);
+       m = btf_members(targ_type);
+       for (i = 0; i < n; i++, m++) {
+               __u32 bit_offset;
+
+               bit_offset = btf_member_bit_offset(targ_type, i);
+
+               /* too deep struct/union/array nesting */
+               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+                       return -E2BIG;
+
+               /* speculate this member will be the good one */
+               spec->bit_offset += bit_offset;
+               spec->raw_spec[spec->raw_len++] = i;
+
+               targ_name = btf__name_by_offset(targ_btf, m->name_off);
+               if (str_is_empty(targ_name)) {
+                       /* embedded struct/union, we need to go deeper */
+                       found = bpf_core_match_member(local_btf, local_acc,
+                                                     targ_btf, m->type,
+                                                     spec, next_targ_id);
+                       if (found) /* either found or error */
+                               return found;
+               } else if (strcmp(local_name, targ_name) == 0) {
+                       /* matching named field */
+                       struct bpf_core_accessor *targ_acc;
+
+                       targ_acc = &spec->spec[spec->len++];
+                       targ_acc->type_id = targ_id;
+                       targ_acc->idx = i;
+                       targ_acc->name = targ_name;
+
+                       *next_targ_id = m->type;
+                       found = bpf_core_fields_are_compat(local_btf,
+                                                          local_member->type,
+                                                          targ_btf, m->type);
+                       if (!found)
+                               spec->len--; /* pop accessor */
+                       return found;
+               }
+               /* member turned out not to be what we looked for */
+               spec->bit_offset -= bit_offset;
+               spec->raw_len--;
+       }
+
+       return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + bit offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+                              const struct btf *targ_btf, __u32 targ_id,
+                              struct bpf_core_spec *targ_spec)
+{
+       const struct btf_type *targ_type;
+       const struct bpf_core_accessor *local_acc;
+       struct bpf_core_accessor *targ_acc;
+       int i, sz, matched;
+
+       memset(targ_spec, 0, sizeof(*targ_spec));
+       targ_spec->btf = targ_btf;
+       targ_spec->root_type_id = targ_id;
+       targ_spec->relo_kind = local_spec->relo_kind;
+
+       if (core_relo_is_type_based(local_spec->relo_kind)) {
+               return bpf_core_types_are_compat(local_spec->btf,
+                                                local_spec->root_type_id,
+                                                targ_btf, targ_id);
+       }
+
+       local_acc = &local_spec->spec[0];
+       targ_acc = &targ_spec->spec[0];
+
+       if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+               size_t local_essent_len, targ_essent_len;
+               const struct btf_enum *e;
+               const char *targ_name;
+
+               /* has to resolve to an enum */
+               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+               if (!btf_is_enum(targ_type))
+                       return 0;
+
+               local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+               for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
+                       targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+                       targ_essent_len = bpf_core_essential_name_len(targ_name);
+                       if (targ_essent_len != local_essent_len)
+                               continue;
+                       if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+                               targ_acc->type_id = targ_id;
+                               targ_acc->idx = i;
+                               targ_acc->name = targ_name;
+                               targ_spec->len++;
+                               targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+                               targ_spec->raw_len++;
+                               return 1;
+                       }
+               }
+               return 0;
+       }
+
+       if (!core_relo_is_field_based(local_spec->relo_kind))
+               return -EINVAL;
+
+       for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+                                                  &targ_id);
+               if (!targ_type)
+                       return -EINVAL;
+
+               if (local_acc->name) {
+                       matched = bpf_core_match_member(local_spec->btf,
+                                                       local_acc,
+                                                       targ_btf, targ_id,
+                                                       targ_spec, &targ_id);
+                       if (matched <= 0)
+                               return matched;
+               } else {
+                       /* for i=0, targ_id is already treated as array element
+                        * type (because it's the original struct), for others
+                        * we should find array element type first
+                        */
+                       if (i > 0) {
+                               const struct btf_array *a;
+                               bool flex;
+
+                               if (!btf_is_array(targ_type))
+                                       return 0;
+
+                               a = btf_array(targ_type);
+                               flex = is_flex_arr(targ_btf, targ_acc - 1, a);
+                               if (!flex && local_acc->idx >= a->nelems)
+                                       return 0;
+                               if (!skip_mods_and_typedefs(targ_btf, a->type,
+                                                           &targ_id))
+                                       return -EINVAL;
+                       }
+
+                       /* too deep struct/union/array nesting */
+                       if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+                               return -E2BIG;
+
+                       targ_acc->type_id = targ_id;
+                       targ_acc->idx = local_acc->idx;
+                       targ_acc->name = NULL;
+                       targ_spec->len++;
+                       targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+                       targ_spec->raw_len++;
+
+                       sz = btf__resolve_size(targ_btf, targ_id);
+                       if (sz < 0)
+                               return sz;
+                       targ_spec->bit_offset += local_acc->idx * sz * 8;
+               }
+       }
+
+       return 1;
+}
+
+static int bpf_core_calc_field_relo(const char *prog_name,
+                                   const struct bpf_core_relo *relo,
+                                   const struct bpf_core_spec *spec,
+                                   __u32 *val, __u32 *field_sz, __u32 *type_id,
+                                   bool *validate)
+{
+       const struct bpf_core_accessor *acc;
+       const struct btf_type *t;
+       __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+       const struct btf_member *m;
+       const struct btf_type *mt;
+       bool bitfield;
+       __s64 sz;
+
+       *field_sz = 0;
+
+       if (relo->kind == BPF_FIELD_EXISTS) {
+               *val = spec ? 1 : 0;
+               return 0;
+       }
+
+       if (!spec)
+               return -EUCLEAN; /* request instruction poisoning */
+
+       acc = &spec->spec[spec->len - 1];
+       t = btf__type_by_id(spec->btf, acc->type_id);
+
+       /* a[n] accessor needs special handling */
+       if (!acc->name) {
+               if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
+                       *val = spec->bit_offset / 8;
+                       /* remember field size for load/store mem size */
+                       sz = btf__resolve_size(spec->btf, acc->type_id);
+                       if (sz < 0)
+                               return -EINVAL;
+                       *field_sz = sz;
+                       *type_id = acc->type_id;
+               } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
+                       sz = btf__resolve_size(spec->btf, acc->type_id);
+                       if (sz < 0)
+                               return -EINVAL;
+                       *val = sz;
+               } else {
+                       pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
+                               prog_name, relo->kind, relo->insn_off / 8);
+                       return -EINVAL;
+               }
+               if (validate)
+                       *validate = true;
+               return 0;
+       }
+
+       m = btf_members(t) + acc->idx;
+       mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
+       bit_off = spec->bit_offset;
+       bit_sz = btf_member_bitfield_size(t, acc->idx);
+
+       bitfield = bit_sz > 0;
+       if (bitfield) {
+               byte_sz = mt->size;
+               byte_off = bit_off / 8 / byte_sz * byte_sz;
+               /* figure out smallest int size necessary for bitfield load */
+               while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
+                       if (byte_sz >= 8) {
+                               /* bitfield can't be read with 64-bit read */
+                               pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
+                                       prog_name, relo->kind, relo->insn_off / 8);
+                               return -E2BIG;
+                       }
+                       byte_sz *= 2;
+                       byte_off = bit_off / 8 / byte_sz * byte_sz;
+               }
+       } else {
+               sz = btf__resolve_size(spec->btf, field_type_id);
+               if (sz < 0)
+                       return -EINVAL;
+               byte_sz = sz;
+               byte_off = spec->bit_offset / 8;
+               bit_sz = byte_sz * 8;
+       }
+
+       /* for bitfields, all the relocatable aspects are ambiguous and we
+        * might disagree with compiler, so turn off validation of expected
+        * value, except for signedness
+        */
+       if (validate)
+               *validate = !bitfield;
+
+       switch (relo->kind) {
+       case BPF_FIELD_BYTE_OFFSET:
+               *val = byte_off;
+               if (!bitfield) {
+                       *field_sz = byte_sz;
+                       *type_id = field_type_id;
+               }
+               break;
+       case BPF_FIELD_BYTE_SIZE:
+               *val = byte_sz;
+               break;
+       case BPF_FIELD_SIGNED:
+               /* enums will be assumed unsigned */
+               *val = btf_is_enum(mt) ||
+                      (btf_int_encoding(mt) & BTF_INT_SIGNED);
+               if (validate)
+                       *validate = true; /* signedness is never ambiguous */
+               break;
+       case BPF_FIELD_LSHIFT_U64:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+               *val = 64 - (bit_off + bit_sz - byte_off  * 8);
+#else
+               *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
+#endif
+               break;
+       case BPF_FIELD_RSHIFT_U64:
+               *val = 64 - bit_sz;
+               if (validate)
+                       *validate = true; /* right shift is never ambiguous */
+               break;
+       case BPF_FIELD_EXISTS:
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+                                  const struct bpf_core_spec *spec,
+                                  __u32 *val)
+{
+       __s64 sz;
+
+       /* type-based relos return zero when target type is not found */
+       if (!spec) {
+               *val = 0;
+               return 0;
+       }
+
+       switch (relo->kind) {
+       case BPF_TYPE_ID_TARGET:
+               *val = spec->root_type_id;
+               break;
+       case BPF_TYPE_EXISTS:
+               *val = 1;
+               break;
+       case BPF_TYPE_SIZE:
+               sz = btf__resolve_size(spec->btf, spec->root_type_id);
+               if (sz < 0)
+                       return -EINVAL;
+               *val = sz;
+               break;
+       case BPF_TYPE_ID_LOCAL:
+       /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+                                     const struct bpf_core_spec *spec,
+                                     __u32 *val)
+{
+       const struct btf_type *t;
+       const struct btf_enum *e;
+
+       switch (relo->kind) {
+       case BPF_ENUMVAL_EXISTS:
+               *val = spec ? 1 : 0;
+               break;
+       case BPF_ENUMVAL_VALUE:
+               if (!spec)
+                       return -EUCLEAN; /* request instruction poisoning */
+               t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+               e = btf_enum(t) + spec->spec[0].idx;
+               *val = e->val;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+struct bpf_core_relo_res
+{
+       /* expected value in the instruction, unless validate == false */
+       __u32 orig_val;
+       /* new value that needs to be patched up to */
+       __u32 new_val;
+       /* relocation unsuccessful, poison instruction, but don't fail load */
+       bool poison;
+       /* some relocations can't be validated against orig_val */
+       bool validate;
+       /* for field byte offset relocations or the forms:
+        *     *(T *)(rX + <off>) = rY
+        *     rX = *(T *)(rY + <off>),
+        * we remember original and resolved field size to adjust direct
+        * memory loads of pointers and integers; this is necessary for 32-bit
+        * host kernel architectures, but also allows to automatically
+        * relocate fields that were resized from, e.g., u32 to u64, etc.
+        */
+       bool fail_memsz_adjust;
+       __u32 orig_sz;
+       __u32 orig_type_id;
+       __u32 new_sz;
+       __u32 new_type_id;
+};
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const char *prog_name,
+                             const struct bpf_core_relo *relo,
+                             int relo_idx,
+                             const struct bpf_core_spec *local_spec,
+                             const struct bpf_core_spec *targ_spec,
+                             struct bpf_core_relo_res *res)
+{
+       int err = -EOPNOTSUPP;
+
+       res->orig_val = 0;
+       res->new_val = 0;
+       res->poison = false;
+       res->validate = true;
+       res->fail_memsz_adjust = false;
+       res->orig_sz = res->new_sz = 0;
+       res->orig_type_id = res->new_type_id = 0;
+
+       if (core_relo_is_field_based(relo->kind)) {
+               err = bpf_core_calc_field_relo(prog_name, relo, local_spec,
+                                              &res->orig_val, &res->orig_sz,
+                                              &res->orig_type_id, &res->validate);
+               err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec,
+                                                     &res->new_val, &res->new_sz,
+                                                     &res->new_type_id, NULL);
+               if (err)
+                       goto done;
+               /* Validate if it's safe to adjust load/store memory size.
+                * Adjustments are performed only if original and new memory
+                * sizes differ.
+                */
+               res->fail_memsz_adjust = false;
+               if (res->orig_sz != res->new_sz) {
+                       const struct btf_type *orig_t, *new_t;
+
+                       orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
+                       new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+
+                       /* There are two use cases in which it's safe to
+                        * adjust load/store's mem size:
+                        *   - reading a 32-bit kernel pointer, while on BPF
+                        *   size pointers are always 64-bit; in this case
+                        *   it's safe to "downsize" instruction size due to
+                        *   pointer being treated as unsigned integer with
+                        *   zero-extended upper 32-bits;
+                        *   - reading unsigned integers, again due to
+                        *   zero-extension is preserving the value correctly.
+                        *
+                        * In all other cases it's incorrect to attempt to
+                        * load/store field because read value will be
+                        * incorrect, so we poison relocated instruction.
+                        */
+                       if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
+                               goto done;
+                       if (btf_is_int(orig_t) && btf_is_int(new_t) &&
+                           btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
+                           btf_int_encoding(new_t) != BTF_INT_SIGNED)
+                               goto done;
+
+                       /* mark as invalid mem size adjustment, but this will
+                        * only be checked for LDX/STX/ST insns
+                        */
+                       res->fail_memsz_adjust = true;
+               }
+       } else if (core_relo_is_type_based(relo->kind)) {
+               err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
+               err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+       } else if (core_relo_is_enumval_based(relo->kind)) {
+               err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+               err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
+       }
+
+done:
+       if (err == -EUCLEAN) {
+               /* EUCLEAN is used to signal instruction poisoning request */
+               res->poison = true;
+               err = 0;
+       } else if (err == -EOPNOTSUPP) {
+               /* EOPNOTSUPP means unknown/unsupported relocation */
+               pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+                       prog_name, relo_idx, core_relo_kind_str(relo->kind),
+                       relo->kind, relo->insn_off / 8);
+       }
+
+       return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(const char *prog_name, int relo_idx,
+                                int insn_idx, struct bpf_insn *insn)
+{
+       pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+                prog_name, relo_idx, insn_idx);
+       insn->code = BPF_JMP | BPF_CALL;
+       insn->dst_reg = 0;
+       insn->src_reg = 0;
+       insn->off = 0;
+       /* if this instruction is reachable (not a dead code),
+        * verifier will complain with the following message:
+        * invalid func unknown#195896080
+        */
+       insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+{
+       switch (BPF_SIZE(insn->code)) {
+       case BPF_DW: return 8;
+       case BPF_W: return 4;
+       case BPF_H: return 2;
+       case BPF_B: return 1;
+       default: return -1;
+       }
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+       switch (sz) {
+       case 8: return BPF_DW;
+       case 4: return BPF_W;
+       case 2: return BPF_H;
+       case 1: return BPF_B;
+       default: return -1;
+       }
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ *
+ * Patched value is determined by relocation kind and target specification.
+ * For existence relocations target spec will be NULL if field/type is not found.
+ * Expected insn->imm value is determined using relocation kind and local
+ * spec, and is checked before patching instruction. If actual insn->imm value
+ * is wrong, bail out with error.
+ *
+ * Currently supported classes of BPF instruction are:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value);
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
+ */
+static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+                              int insn_idx, const struct bpf_core_relo *relo,
+                              int relo_idx, const struct bpf_core_relo_res *res)
+{
+       __u32 orig_val, new_val;
+       __u8 class;
+
+       class = BPF_CLASS(insn->code);
+
+       if (res->poison) {
+poison:
+               /* poison second part of ldimm64 to avoid confusing error from
+                * verifier about "unknown opcode 00"
+                */
+               if (is_ldimm64_insn(insn))
+                       bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1);
+               bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn);
+               return 0;
+       }
+
+       orig_val = res->orig_val;
+       new_val = res->new_val;
+
+       switch (class) {
+       case BPF_ALU:
+       case BPF_ALU64:
+               if (BPF_SRC(insn->code) != BPF_K)
+                       return -EINVAL;
+               if (res->validate && insn->imm != orig_val) {
+                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
+                               prog_name, relo_idx,
+                               insn_idx, insn->imm, orig_val, new_val);
+                       return -EINVAL;
+               }
+               orig_val = insn->imm;
+               insn->imm = new_val;
+               pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
+                        prog_name, relo_idx, insn_idx,
+                        orig_val, new_val);
+               break;
+       case BPF_LDX:
+       case BPF_ST:
+       case BPF_STX:
+               if (res->validate && insn->off != orig_val) {
+                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
+                               prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val);
+                       return -EINVAL;
+               }
+               if (new_val > SHRT_MAX) {
+                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
+                               prog_name, relo_idx, insn_idx, new_val);
+                       return -ERANGE;
+               }
+               if (res->fail_memsz_adjust) {
+                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
+                               "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
+                               prog_name, relo_idx, insn_idx);
+                       goto poison;
+               }
+
+               orig_val = insn->off;
+               insn->off = new_val;
+               pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
+                        prog_name, relo_idx, insn_idx, orig_val, new_val);
+
+               if (res->new_sz != res->orig_sz) {
+                       int insn_bytes_sz, insn_bpf_sz;
+
+                       insn_bytes_sz = insn_bpf_size_to_bytes(insn);
+                       if (insn_bytes_sz != res->orig_sz) {
+                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
+                                       prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
+                               return -EINVAL;
+                       }
+
+                       insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
+                       if (insn_bpf_sz < 0) {
+                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
+                                       prog_name, relo_idx, insn_idx, res->new_sz);
+                               return -EINVAL;
+                       }
+
+                       insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
+                       pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
+                                prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
+               }
+               break;
+       case BPF_LD: {
+               __u64 imm;
+
+               if (!is_ldimm64_insn(insn) ||
+                   insn[0].src_reg != 0 || insn[0].off != 0 ||
+                   insn[1].code != 0 || insn[1].dst_reg != 0 ||
+                   insn[1].src_reg != 0 || insn[1].off != 0) {
+                       pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+                               prog_name, relo_idx, insn_idx);
+                       return -EINVAL;
+               }
+
+               imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+               if (res->validate && imm != orig_val) {
+                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+                               prog_name, relo_idx,
+                               insn_idx, (unsigned long long)imm,
+                               orig_val, new_val);
+                       return -EINVAL;
+               }
+
+               insn[0].imm = new_val;
+               insn[1].imm = 0; /* currently only 32-bit values are supported */
+               pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+                        prog_name, relo_idx, insn_idx,
+                        (unsigned long long)imm, new_val);
+               break;
+       }
+       default:
+               pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
+                       prog_name, relo_idx, insn_idx, insn->code,
+                       insn->src_reg, insn->dst_reg, insn->off, insn->imm);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+       const struct btf_type *t;
+       const struct btf_enum *e;
+       const char *s;
+       __u32 type_id;
+       int i;
+
+       type_id = spec->root_type_id;
+       t = btf__type_by_id(spec->btf, type_id);
+       s = btf__name_by_offset(spec->btf, t->name_off);
+
+       libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+
+       if (core_relo_is_type_based(spec->relo_kind))
+               return;
+
+       if (core_relo_is_enumval_based(spec->relo_kind)) {
+               t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+               e = btf_enum(t) + spec->raw_spec[0];
+               s = btf__name_by_offset(spec->btf, e->name_off);
+
+               libbpf_print(level, "::%s = %u", s, e->val);
+               return;
+       }
+
+       if (core_relo_is_field_based(spec->relo_kind)) {
+               for (i = 0; i < spec->len; i++) {
+                       if (spec->spec[i].name)
+                               libbpf_print(level, ".%s", spec->spec[i].name);
+                       else if (i > 0 || spec->spec[i].idx > 0)
+                               libbpf_print(level, "[%u]", spec->spec[i].idx);
+               }
+
+               libbpf_print(level, " (");
+               for (i = 0; i < spec->raw_len; i++)
+                       libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+
+               if (spec->bit_offset % 8)
+                       libbpf_print(level, " @ offset %u.%u)",
+                                    spec->bit_offset / 8, spec->bit_offset % 8);
+               else
+                       libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
+               return;
+       }
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ *    Candidate type is a type with the same "essential" name, ignoring
+ *    everything after last triple underscore (___). E.g., `sample`,
+ *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ *    for each other. Names with triple underscore are referred to as
+ *    "flavors" and are useful, among other things, to allow to
+ *    specify/support incompatible variations of the same kernel struct, which
+ *    might differ between different kernel versions and/or build
+ *    configurations.
+ *
+ *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ *    converter, when deduplicated BTF of a kernel still contains more than
+ *    one different types with the same name. In that case, ___2, ___3, etc
+ *    are appended starting from second name conflict. But start flavors are
+ *    also useful to be defined "locally", in BPF program, to extract same
+ *    data from incompatible changes between different kernel
+ *    versions/configurations. For instance, to handle field renames between
+ *    kernel versions, one can use two flavors of the struct name with the
+ *    same common name and use conditional relocations to extract that field,
+ *    depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ *    candidate target type. Matching involves finding corresponding
+ *    high-level spec accessors, meaning that all named fields should match,
+ *    as well as all array accesses should be within the actual bounds. Also,
+ *    types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ *    matching the spec. As long as all the specs resolve to the same set of
+ *    offsets across all candidates, there is no error. If there is any
+ *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ *    imprefection of BTF deduplication, which can cause slight duplication of
+ *    the same BTF type, if some directly or indirectly referenced (by
+ *    pointer) type gets resolved to different actual types in different
+ *    object files. If such situation occurs, deduplicated BTF will end up
+ *    with two (or more) structurally identical types, which differ only in
+ *    types they refer to through pointer. This should be OK in most cases and
+ *    is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ *    types in target BTF. It is anticipated that this is overall more
+ *    efficient memory-wise and not significantly worse (if not better)
+ *    CPU-wise compared to prebuilding a map from all local type names to
+ *    a list of candidate type names. It's also sped up by caching resolved
+ *    list of matching candidates per each local "root" type ID, that has at
+ *    least one bpf_core_relo associated with it. This list is shared
+ *    between multiple relocations for the same type ID and is updated as some
+ *    of the candidates are pruned due to structural incompatibility.
+ */
+int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
+                            int insn_idx,
+                            const struct bpf_core_relo *relo,
+                            int relo_idx,
+                            const struct btf *local_btf,
+                            struct bpf_core_cand_list *cands)
+{
+       struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
+       struct bpf_core_relo_res cand_res, targ_res;
+       const struct btf_type *local_type;
+       const char *local_name;
+       __u32 local_id;
+       const char *spec_str;
+       int i, j, err;
+
+       local_id = relo->type_id;
+       local_type = btf__type_by_id(local_btf, local_id);
+       if (!local_type)
+               return -EINVAL;
+
+       local_name = btf__name_by_offset(local_btf, local_type->name_off);
+       if (!local_name)
+               return -EINVAL;
+
+       spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+       if (str_is_empty(spec_str))
+               return -EINVAL;
+
+       err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
+       if (err) {
+               pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+                       prog_name, relo_idx, local_id, btf_kind_str(local_type),
+                       str_is_empty(local_name) ? "<anon>" : local_name,
+                       spec_str, err);
+               return -EINVAL;
+       }
+
+       pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
+                relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+       bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+       libbpf_print(LIBBPF_DEBUG, "\n");
+
+       /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+       if (relo->kind == BPF_TYPE_ID_LOCAL) {
+               targ_res.validate = true;
+               targ_res.poison = false;
+               targ_res.orig_val = local_spec.root_type_id;
+               targ_res.new_val = local_spec.root_type_id;
+               goto patch_insn;
+       }
+
+       /* libbpf doesn't support candidate search for anonymous types */
+       if (str_is_empty(spec_str)) {
+               pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+                       prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+               return -EOPNOTSUPP;
+       }
+
+
+       for (i = 0, j = 0; i < cands->len; i++) {
+               err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
+                                         cands->cands[i].id, &cand_spec);
+               if (err < 0) {
+                       pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
+                               prog_name, relo_idx, i);
+                       bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+                       libbpf_print(LIBBPF_WARN, ": %d\n", err);
+                       return err;
+               }
+
+               pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
+                        relo_idx, err == 0 ? "non-matching" : "matching", i);
+               bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+               libbpf_print(LIBBPF_DEBUG, "\n");
+
+               if (err == 0)
+                       continue;
+
+               err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+               if (err)
+                       return err;
+
+               if (j == 0) {
+                       targ_res = cand_res;
+                       targ_spec = cand_spec;
+               } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
+                       /* if there are many field relo candidates, they
+                        * should all resolve to the same bit offset
+                        */
+                       pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
+                               prog_name, relo_idx, cand_spec.bit_offset,
+                               targ_spec.bit_offset);
+                       return -EINVAL;
+               } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+                       /* all candidates should result in the same relocation
+                        * decision and value, otherwise it's dangerous to
+                        * proceed due to ambiguity
+                        */
+                       pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+                               prog_name, relo_idx,
+                               cand_res.poison ? "failure" : "success", cand_res.new_val,
+                               targ_res.poison ? "failure" : "success", targ_res.new_val);
+                       return -EINVAL;
+               }
+
+               cands->cands[j++] = cands->cands[i];
+       }
+
+       /*
+        * For BPF_FIELD_EXISTS relo or when used BPF program has field
+        * existence checks or kernel version/config checks, it's expected
+        * that we might not find any candidates. In this case, if field
+        * wasn't found in any candidate, the list of candidates shouldn't
+        * change at all, we'll just handle relocating appropriately,
+        * depending on relo's kind.
+        */
+       if (j > 0)
+               cands->len = j;
+
+       /*
+        * If no candidates were found, it might be both a programmer error,
+        * as well as expected case, depending whether instruction w/
+        * relocation is guarded in some way that makes it unreachable (dead
+        * code) if relocation can't be resolved. This is handled in
+        * bpf_core_patch_insn() uniformly by replacing that instruction with
+        * BPF helper call insn (using invalid helper ID). If that instruction
+        * is indeed unreachable, then it will be ignored and eliminated by
+        * verifier. If it was an error, then verifier will complain and point
+        * to a specific instruction number in its log.
+        */
+       if (j == 0) {
+               pr_debug("prog '%s': relo #%d: no matching targets found\n",
+                        prog_name, relo_idx);
+
+               /* calculate single target relo result explicitly */
+               err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res);
+               if (err)
+                       return err;
+       }
+
+patch_insn:
+       /* bpf_core_patch_insn() should know how to handle missing targ_spec */
+       err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res);
+       if (err) {
+               pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
+                       prog_name, relo_idx, relo->insn_off / 8, err);
+               return -EINVAL;
+       }
+
+       return 0;
+}
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
new file mode 100644 (file)
index 0000000..3b9f8f1
--- /dev/null
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2019 Facebook */
+
+#ifndef __RELO_CORE_H
+#define __RELO_CORE_H
+
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations.
+ */
+enum bpf_core_relo_kind {
+       BPF_FIELD_BYTE_OFFSET = 0,      /* field byte offset */
+       BPF_FIELD_BYTE_SIZE = 1,        /* field size in bytes */
+       BPF_FIELD_EXISTS = 2,           /* field existence in target kernel */
+       BPF_FIELD_SIGNED = 3,           /* field signedness (0 - unsigned, 1 - signed) */
+       BPF_FIELD_LSHIFT_U64 = 4,       /* bitfield-specific left bitshift */
+       BPF_FIELD_RSHIFT_U64 = 5,       /* bitfield-specific right bitshift */
+       BPF_TYPE_ID_LOCAL = 6,          /* type ID in local BPF object */
+       BPF_TYPE_ID_TARGET = 7,         /* type ID in target kernel */
+       BPF_TYPE_EXISTS = 8,            /* type existence in target kernel */
+       BPF_TYPE_SIZE = 9,              /* type size in bytes */
+       BPF_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
+       BPF_ENUMVAL_VALUE = 11,         /* enum value integer value */
+};
+
+/* The minimum bpf_core_relo checked by the loader
+ *
+ * CO-RE relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ *   its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ *   type or field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ *   interpretation depends on specific relocation kind:
+ *     - for field-based relocations, string encodes an accessed field using
+ *     a sequence of field and array indices, separated by colon (:). It's
+ *     conceptually very close to LLVM's getelementptr ([0]) instruction's
+ *     arguments for identifying offset to a field.
+ *     - for type-based relocations, strings is expected to be just "0";
+ *     - for enum value-based relocations, string contains an index of enum
+ *     value within its enum type;
+ *
+ * Example to provide a better feel.
+ *
+ *   struct sample {
+ *       int a;
+ *       struct {
+ *           int b[10];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *   int x = &s->a;     // encoded as "0:0" (a is field #0)
+ *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1,
+ *                      // b is field #0 inside anon struct, accessing elem #5)
+ *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example  will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ *               __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_core_relo {
+       __u32   insn_off;
+       __u32   type_id;
+       __u32   access_str_off;
+       enum bpf_core_relo_kind kind;
+};
+
+struct bpf_core_cand {
+       const struct btf *btf;
+       const struct btf_type *t;
+       const char *name;
+       __u32 id;
+};
+
+/* dynamically sized list of type IDs and its associated struct btf */
+struct bpf_core_cand_list {
+       struct bpf_core_cand *cands;
+       int len;
+};
+
+int bpf_core_apply_relo_insn(const char *prog_name,
+                            struct bpf_insn *insn, int insn_idx,
+                            const struct bpf_core_relo *relo, int relo_idx,
+                            const struct btf *local_btf,
+                            struct bpf_core_cand_list *cands);
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+                             const struct btf *targ_btf, __u32 targ_id);
+
+size_t bpf_core_essential_name_len(const char *name);
+#endif
index cdecda1..996d025 100644 (file)
@@ -223,10 +223,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
                        free(info_linear);
                        return -1;
                }
-               if (btf__get_from_id(info->btf_id, &btf)) {
+               btf = btf__load_from_kernel_by_id(info->btf_id);
+               if (libbpf_get_error(btf)) {
                        pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
                        err = -1;
-                       btf = NULL;
                        goto out;
                }
                perf_env__fetch_btf(env, info->btf_id, btf);
@@ -296,7 +296,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
 
 out:
        free(info_linear);
-       free(btf);
+       btf__free(btf);
        return err ? -1 : 0;
 }
 
@@ -478,7 +478,8 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
        if (btf_id == 0)
                goto out;
 
-       if (btf__get_from_id(btf_id, &btf)) {
+       btf = btf__load_from_kernel_by_id(btf_id);
+       if (libbpf_get_error(btf)) {
                pr_debug("%s: failed to get BTF of id %u, aborting\n",
                         __func__, btf_id);
                goto out;
@@ -486,7 +487,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
        perf_env__fetch_btf(env, btf_id, btf);
 
 out:
-       free(btf);
+       btf__free(btf);
        close(fd);
 }
 
index 8150e03..ba0f208 100644 (file)
@@ -64,8 +64,8 @@ static char *bpf_target_prog_name(int tgt_fd)
        struct bpf_prog_info_linear *info_linear;
        struct bpf_func_info *func_info;
        const struct btf_type *t;
+       struct btf *btf = NULL;
        char *name = NULL;
-       struct btf *btf;
 
        info_linear = bpf_program__get_prog_info_linear(
                tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
@@ -74,12 +74,17 @@ static char *bpf_target_prog_name(int tgt_fd)
                return NULL;
        }
 
-       if (info_linear->info.btf_id == 0 ||
-           btf__get_from_id(info_linear->info.btf_id, &btf)) {
+       if (info_linear->info.btf_id == 0) {
                pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
                goto out;
        }
 
+       btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+       if (libbpf_get_error(btf)) {
+               pr_debug("failed to load btf for prog FD %d\n", tgt_fd);
+               goto out;
+       }
+
        func_info = u64_to_ptr(info_linear->info.func_info);
        t = btf__type_by_id(btf, func_info[0].type_id);
        if (!t) {
@@ -89,6 +94,7 @@ static char *bpf_target_prog_name(int tgt_fd)
        }
        name = strdup(btf__name_by_offset(btf, t->name_off));
 out:
+       btf__free(btf);
        free(info_linear);
        return name;
 }
index fb010a3..da9e8b6 100644 (file)
@@ -38,6 +38,7 @@ TARGETS += mount_setattr
 TARGETS += mqueue
 TARGETS += nci
 TARGETS += net
+TARGETS += net/af_unix
 TARGETS += net/forwarding
 TARGETS += net/mptcp
 TARGETS += netfilter
index addcfd8..433f8be 100644 (file)
@@ -23,7 +23,6 @@ test_skb_cgroup_id_user
 test_cgroup_storage
 test_flow_dissector
 flow_dissector_load
-test_netcnt
 test_tcpnotify_user
 test_libbpf
 test_tcp_check_syncookie_user
index f405b20..866531c 100644 (file)
@@ -38,7 +38,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
        test_verifier_log test_dev_cgroup \
        test_sock test_sockmap get_cgroup_id_user \
        test_cgroup_storage \
-       test_netcnt test_tcpnotify_user test_sysctl \
+       test_tcpnotify_user test_sysctl \
        test_progs-no_alu32
 
 # Also test bpf-gcc, if present
@@ -79,7 +79,7 @@ TEST_PROGS := test_kmod.sh \
 
 TEST_PROGS_EXTENDED := with_addr.sh \
        with_tunnels.sh \
-       test_xdp_vlan.sh
+       test_xdp_vlan.sh test_bpftool.py
 
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
@@ -187,6 +187,8 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
                    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) &&      \
                    cp $(SCRATCH_DIR)/runqslower $@
 
+TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
+
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
 
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
@@ -197,7 +199,6 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c
 $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
-$(OUTPUT)/test_netcnt: cgroup_helpers.c
 $(OUTPUT)/test_sock_fields: cgroup_helpers.c
 $(OUTPUT)/test_sysctl: cgroup_helpers.c
 
index 8deec1c..9b17f28 100644 (file)
@@ -19,6 +19,13 @@ the CI. It builds the kernel (without overwriting your existing Kconfig), recomp
 bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and
 saves the resulting output (by default in ``~/.bpf_selftests``).
 
+Script dependencies:
+- clang (preferably built from sources, https://github.com/llvm/llvm-project);
+- pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/);
+- qemu;
+- docutils (for ``rst2man``);
+- libcap-devel.
+
 For more information on about using the script, run:
 
 .. code-block:: console
index 029589c..b1ede6f 100644 (file)
 SEC("struct_ops/"#name) \
 BPF_PROG(name, args)
 
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
 #define tcp_jiffies32 ((__u32)bpf_jiffies64())
 
 struct sock_common {
@@ -27,6 +31,7 @@ enum sk_pacing {
 
 struct sock {
        struct sock_common      __sk_common;
+#define sk_state               __sk_common.skc_state
        unsigned long           sk_pacing_rate;
        __u32                   sk_pacing_status; /* see enum sk_pacing */
 } __attribute__((preserve_access_index));
@@ -203,6 +208,20 @@ static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
        return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
 }
 
+static __always_inline bool tcp_cc_eq(const char *a, const char *b)
+{
+       int i;
+
+       for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+               if (a[i] != b[i])
+                       return false;
+               if (!a[i])
+                       break;
+       }
+
+       return true;
+}
+
 extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
 extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
 
index 81084c1..0ab1c88 100644 (file)
@@ -6,19 +6,39 @@
 
 #define MAX_PERCPU_PACKETS 32
 
-struct percpu_net_cnt {
-       __u64 packets;
-       __u64 bytes;
+/* sizeof(struct bpf_local_storage_elem):
+ *
+ * It really is about 128 bytes on x86_64, but allocate more to account for
+ * possible layout changes, different architectures, etc.
+ * The kernel will wrap up to PAGE_SIZE internally anyway.
+ */
+#define SIZEOF_BPF_LOCAL_STORAGE_ELEM          256
 
-       __u64 prev_ts;
+/* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */
+#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE       (0xFFFF - \
+                                                SIZEOF_BPF_LOCAL_STORAGE_ELEM)
 
-       __u64 prev_packets;
-       __u64 prev_bytes;
+#define PCPU_MIN_UNIT_SIZE                     32768
+
+union percpu_net_cnt {
+       struct {
+               __u64 packets;
+               __u64 bytes;
+
+               __u64 prev_ts;
+
+               __u64 prev_packets;
+               __u64 prev_bytes;
+       };
+       __u8 data[PCPU_MIN_UNIT_SIZE];
 };
 
-struct net_cnt {
-       __u64 packets;
-       __u64 bytes;
+union net_cnt {
+       struct {
+               __u64 packets;
+               __u64 bytes;
+       };
+       __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE];
 };
 
 #endif
index 2060bc1..7e9f637 100644 (file)
@@ -66,17 +66,13 @@ int settimeo(int fd, int timeout_ms)
 
 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
 
-int start_server(int family, int type, const char *addr_str, __u16 port,
-                int timeout_ms)
+static int __start_server(int type, const struct sockaddr *addr,
+                         socklen_t addrlen, int timeout_ms, bool reuseport)
 {
-       struct sockaddr_storage addr = {};
-       socklen_t len;
+       int on = 1;
        int fd;
 
-       if (make_sockaddr(family, addr_str, port, &addr, &len))
-               return -1;
-
-       fd = socket(family, type, 0);
+       fd = socket(addr->sa_family, type, 0);
        if (fd < 0) {
                log_err("Failed to create server socket");
                return -1;
@@ -85,7 +81,13 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
        if (settimeo(fd, timeout_ms))
                goto error_close;
 
-       if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+       if (reuseport &&
+           setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
+               log_err("Failed to set SO_REUSEPORT");
+               return -1;
+       }
+
+       if (bind(fd, addr, addrlen) < 0) {
                log_err("Failed to bind socket");
                goto error_close;
        }
@@ -104,6 +106,69 @@ error_close:
        return -1;
 }
 
+int start_server(int family, int type, const char *addr_str, __u16 port,
+                int timeout_ms)
+{
+       struct sockaddr_storage addr;
+       socklen_t addrlen;
+
+       if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+               return -1;
+
+       return __start_server(type, (struct sockaddr *)&addr,
+                             addrlen, timeout_ms, false);
+}
+
+int *start_reuseport_server(int family, int type, const char *addr_str,
+                           __u16 port, int timeout_ms, unsigned int nr_listens)
+{
+       struct sockaddr_storage addr;
+       unsigned int nr_fds = 0;
+       socklen_t addrlen;
+       int *fds;
+
+       if (!nr_listens)
+               return NULL;
+
+       if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+               return NULL;
+
+       fds = malloc(sizeof(*fds) * nr_listens);
+       if (!fds)
+               return NULL;
+
+       fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen,
+                               timeout_ms, true);
+       if (fds[0] == -1)
+               goto close_fds;
+       nr_fds = 1;
+
+       if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
+               goto close_fds;
+
+       for (; nr_fds < nr_listens; nr_fds++) {
+               fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr,
+                                            addrlen, timeout_ms, true);
+               if (fds[nr_fds] == -1)
+                       goto close_fds;
+       }
+
+       return fds;
+
+close_fds:
+       free_fds(fds, nr_fds);
+       return NULL;
+}
+
+void free_fds(int *fds, unsigned int nr_close_fds)
+{
+       if (fds) {
+               while (nr_close_fds)
+                       close(fds[--nr_close_fds]);
+               free(fds);
+       }
+}
+
 int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
                     int timeout_ms)
 {
@@ -153,13 +218,18 @@ static int connect_fd_to_addr(int fd,
        return 0;
 }
 
-int connect_to_fd(int server_fd, int timeout_ms)
+static const struct network_helper_opts default_opts;
+
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
 {
        struct sockaddr_storage addr;
        struct sockaddr_in *addr_in;
        socklen_t addrlen, optlen;
        int fd, type;
 
+       if (!opts)
+               opts = &default_opts;
+
        optlen = sizeof(type);
        if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
                log_err("getsockopt(SOL_TYPE)");
@@ -179,7 +249,12 @@ int connect_to_fd(int server_fd, int timeout_ms)
                return -1;
        }
 
-       if (settimeo(fd, timeout_ms))
+       if (settimeo(fd, opts->timeout_ms))
+               goto error_close;
+
+       if (opts->cc && opts->cc[0] &&
+           setsockopt(fd, SOL_TCP, TCP_CONGESTION, opts->cc,
+                      strlen(opts->cc) + 1))
                goto error_close;
 
        if (connect_fd_to_addr(fd, &addr, addrlen))
@@ -192,6 +267,15 @@ error_close:
        return -1;
 }
 
+int connect_to_fd(int server_fd, int timeout_ms)
+{
+       struct network_helper_opts opts = {
+               .timeout_ms = timeout_ms,
+       };
+
+       return connect_to_fd_opts(server_fd, &opts);
+}
+
 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
 {
        struct sockaddr_storage addr;
@@ -217,6 +301,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
        if (family == AF_INET) {
                struct sockaddr_in *sin = (void *)addr;
 
+               memset(addr, 0, sizeof(*sin));
                sin->sin_family = AF_INET;
                sin->sin_port = htons(port);
                if (addr_str &&
@@ -230,6 +315,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
        } else if (family == AF_INET6) {
                struct sockaddr_in6 *sin6 = (void *)addr;
 
+               memset(addr, 0, sizeof(*sin6));
                sin6->sin6_family = AF_INET6;
                sin6->sin6_port = htons(port);
                if (addr_str &&
@@ -243,3 +329,15 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
        }
        return -1;
 }
+
+char *ping_command(int family)
+{
+       if (family == AF_INET6) {
+               /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
+               if (!system("which ping6 >/dev/null 2>&1"))
+                       return "ping6";
+               else
+                       return "ping -6";
+       }
+       return "ping";
+}
index 5e0d51c..da7e132 100644 (file)
@@ -17,6 +17,11 @@ typedef __u16 __sum16;
 #define VIP_NUM 5
 #define MAGIC_BYTES 123
 
+struct network_helper_opts {
+       const char *cc;
+       int timeout_ms;
+};
+
 /* ipv4 test vector */
 struct ipv4_packet {
        struct ethhdr eth;
@@ -36,11 +41,17 @@ extern struct ipv6_packet pkt_v6;
 int settimeo(int fd, int timeout_ms);
 int start_server(int family, int type, const char *addr, __u16 port,
                 int timeout_ms);
+int *start_reuseport_server(int family, int type, const char *addr_str,
+                           __u16 port, int timeout_ms,
+                           unsigned int nr_listens);
+void free_fds(int *fds, unsigned int nr_close_fds);
 int connect_to_fd(int server_fd, int timeout_ms);
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
 int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
                     int timeout_ms);
 int make_sockaddr(int family, const char *addr_str, __u16 port,
                  struct sockaddr_storage *addr, socklen_t *len);
+char *ping_command(int family);
 
 #endif
index ec11e20..bf307bb 100644 (file)
@@ -2,79 +2,28 @@
 #include <test_progs.h>
 #include "test_attach_probe.skel.h"
 
-#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
-
-#define OP_RT_RA_MASK   0xffff0000UL
-#define LIS_R2          0x3c400000UL
-#define ADDIS_R2_R12    0x3c4c0000UL
-#define ADDI_R2_R2      0x38420000UL
-
-static ssize_t get_offset(ssize_t addr, ssize_t base)
-{
-       u32 *insn = (u32 *) addr;
-
-       /*
-        * A PPC64 ABIv2 function may have a local and a global entry
-        * point. We need to use the local entry point when patching
-        * functions, so identify and step over the global entry point
-        * sequence.
-        *
-        * The global entry point sequence is always of the form:
-        *
-        * addis r2,r12,XXXX
-        * addi  r2,r2,XXXX
-        *
-        * A linker optimisation may convert the addis to lis:
-        *
-        * lis   r2,XXXX
-        * addi  r2,r2,XXXX
-        */
-       if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
-            ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
-           ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
-               return (ssize_t)(insn + 2) - base;
-       else
-               return addr - base;
-}
-#else
-#define get_offset(addr, base) (addr - base)
-#endif
-
-ssize_t get_base_addr() {
-       size_t start, offset;
-       char buf[256];
-       FILE *f;
-
-       f = fopen("/proc/self/maps", "r");
-       if (!f)
-               return -errno;
-
-       while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
-                     &start, buf, &offset) == 3) {
-               if (strcmp(buf, "r-xp") == 0) {
-                       fclose(f);
-                       return start - offset;
-               }
-       }
-
-       fclose(f);
-       return -EINVAL;
-}
+/* this is how USDT semaphore is actually defined, except volatile modifier */
+volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
 
 void test_attach_probe(void)
 {
+       DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
        int duration = 0;
        struct bpf_link *kprobe_link, *kretprobe_link;
        struct bpf_link *uprobe_link, *uretprobe_link;
        struct test_attach_probe* skel;
        size_t uprobe_offset;
-       ssize_t base_addr;
+       ssize_t base_addr, ref_ctr_offset;
 
        base_addr = get_base_addr();
        if (CHECK(base_addr < 0, "get_base_addr",
                  "failed to find base addr: %zd", base_addr))
                return;
-       uprobe_offset = get_offset((size_t)&get_base_addr, base_addr);
+       uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+       ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
+       if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
+               return;
 
        skel = test_attach_probe__open_and_load();
        if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -96,20 +45,28 @@ void test_attach_probe(void)
                goto cleanup;
        skel->links.handle_kretprobe = kretprobe_link;
 
-       uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
-                                                false /* retprobe */,
-                                                0 /* self pid */,
-                                                "/proc/self/exe",
-                                                uprobe_offset);
+       ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
+
+       uprobe_opts.retprobe = false;
+       uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+       uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+                                                     0 /* self pid */,
+                                                     "/proc/self/exe",
+                                                     uprobe_offset,
+                                                     &uprobe_opts);
        if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
                goto cleanup;
        skel->links.handle_uprobe = uprobe_link;
 
-       uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe,
-                                                   true /* retprobe */,
-                                                   -1 /* any pid */,
-                                                   "/proc/self/exe",
-                                                   uprobe_offset);
+       ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
+
+       /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
+       uprobe_opts.retprobe = true;
+       uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+       uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+                                                        -1 /* any pid */,
+                                                        "/proc/self/exe",
+                                                        uprobe_offset, &uprobe_opts);
        if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
                goto cleanup;
        skel->links.handle_uretprobe = uretprobe_link;
@@ -136,4 +93,5 @@ void test_attach_probe(void)
 
 cleanup:
        test_attach_probe__destroy(skel);
+       ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
new file mode 100644 (file)
index 0000000..5eea3c3
--- /dev/null
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "test_bpf_cookie.skel.h"
+
+static void kprobe_subtest(struct test_bpf_cookie *skel)
+{
+       DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
+       struct bpf_link *link1 = NULL, *link2 = NULL;
+       struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+
+       /* attach two kprobes */
+       opts.bpf_cookie = 0x1;
+       opts.retprobe = false;
+       link1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+                                                SYS_NANOSLEEP_KPROBE_NAME, &opts);
+       if (!ASSERT_OK_PTR(link1, "link1"))
+               goto cleanup;
+
+       opts.bpf_cookie = 0x2;
+       opts.retprobe = false;
+       link2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+                                                SYS_NANOSLEEP_KPROBE_NAME, &opts);
+       if (!ASSERT_OK_PTR(link2, "link2"))
+               goto cleanup;
+
+       /* attach two kretprobes */
+       opts.bpf_cookie = 0x10;
+       opts.retprobe = true;
+       retlink1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+                                                   SYS_NANOSLEEP_KPROBE_NAME, &opts);
+       if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+               goto cleanup;
+
+       opts.bpf_cookie = 0x20;
+       opts.retprobe = true;
+       retlink2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+                                                   SYS_NANOSLEEP_KPROBE_NAME, &opts);
+       if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+               goto cleanup;
+
+       /* trigger kprobe && kretprobe */
+       usleep(1);
+
+       ASSERT_EQ(skel->bss->kprobe_res, 0x1 | 0x2, "kprobe_res");
+       ASSERT_EQ(skel->bss->kretprobe_res, 0x10 | 0x20, "kretprobe_res");
+
+cleanup:
+       bpf_link__destroy(link1);
+       bpf_link__destroy(link2);
+       bpf_link__destroy(retlink1);
+       bpf_link__destroy(retlink2);
+}
+
+static void uprobe_subtest(struct test_bpf_cookie *skel)
+{
+       DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
+       struct bpf_link *link1 = NULL, *link2 = NULL;
+       struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+       size_t uprobe_offset;
+       ssize_t base_addr;
+
+       base_addr = get_base_addr();
+       uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+       /* attach two uprobes */
+       opts.bpf_cookie = 0x100;
+       opts.retprobe = false;
+       link1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */,
+                                               "/proc/self/exe", uprobe_offset, &opts);
+       if (!ASSERT_OK_PTR(link1, "link1"))
+               goto cleanup;
+
+       opts.bpf_cookie = 0x200;
+       opts.retprobe = false;
+       link2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, -1 /* any pid */,
+                                               "/proc/self/exe", uprobe_offset, &opts);
+       if (!ASSERT_OK_PTR(link2, "link2"))
+               goto cleanup;
+
+       /* attach two uretprobes */
+       opts.bpf_cookie = 0x1000;
+       opts.retprobe = true;
+       retlink1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */,
+                                                  "/proc/self/exe", uprobe_offset, &opts);
+       if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+               goto cleanup;
+
+       opts.bpf_cookie = 0x2000;
+       opts.retprobe = true;
+       retlink2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, 0 /* self pid */,
+                                                  "/proc/self/exe", uprobe_offset, &opts);
+       if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+               goto cleanup;
+
+       /* trigger uprobe && uretprobe */
+       get_base_addr();
+
+       ASSERT_EQ(skel->bss->uprobe_res, 0x100 | 0x200, "uprobe_res");
+       ASSERT_EQ(skel->bss->uretprobe_res, 0x1000 | 0x2000, "uretprobe_res");
+
+cleanup:
+       bpf_link__destroy(link1);
+       bpf_link__destroy(link2);
+       bpf_link__destroy(retlink1);
+       bpf_link__destroy(retlink2);
+}
+
+static void tp_subtest(struct test_bpf_cookie *skel)
+{
+       DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts);
+       struct bpf_link *link1 = NULL, *link2 = NULL, *link3 = NULL;
+
+       /* attach first tp prog */
+       opts.bpf_cookie = 0x10000;
+       link1 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp1,
+                                                   "syscalls", "sys_enter_nanosleep", &opts);
+       if (!ASSERT_OK_PTR(link1, "link1"))
+               goto cleanup;
+
+       /* attach second tp prog */
+       opts.bpf_cookie = 0x20000;
+       link2 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp2,
+                                                   "syscalls", "sys_enter_nanosleep", &opts);
+       if (!ASSERT_OK_PTR(link2, "link2"))
+               goto cleanup;
+
+       /* trigger tracepoints */
+       usleep(1);
+
+       ASSERT_EQ(skel->bss->tp_res, 0x10000 | 0x20000, "tp_res1");
+
+       /* now we detach first prog and will attach third one, which causes
+        * two internal calls to bpf_prog_array_copy(), shuffling
+        * bpf_prog_array_items around. We test here that we don't lose track
+        * of associated bpf_cookies.
+        */
+       bpf_link__destroy(link1);
+       link1 = NULL;
+       kern_sync_rcu();
+       skel->bss->tp_res = 0;
+
+       /* attach third tp prog */
+       opts.bpf_cookie = 0x40000;
+       link3 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp3,
+                                                   "syscalls", "sys_enter_nanosleep", &opts);
+       if (!ASSERT_OK_PTR(link3, "link3"))
+               goto cleanup;
+
+       /* trigger tracepoints */
+       usleep(1);
+
+       ASSERT_EQ(skel->bss->tp_res, 0x20000 | 0x40000, "tp_res2");
+
+cleanup:
+       bpf_link__destroy(link1);
+       bpf_link__destroy(link2);
+       bpf_link__destroy(link3);
+}
+
+static void burn_cpu(void)
+{
+       volatile int j = 0;
+       cpu_set_t cpu_set;
+       int i, err;
+
+       /* generate some branches on cpu 0 */
+       CPU_ZERO(&cpu_set);
+       CPU_SET(0, &cpu_set);
+       err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+       ASSERT_OK(err, "set_thread_affinity");
+
+       /* spin the loop for a while (random high number) */
+       for (i = 0; i < 1000000; ++i)
+               ++j;
+}
+
+static void pe_subtest(struct test_bpf_cookie *skel)
+{
+       DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts);
+       struct bpf_link *link = NULL;
+       struct perf_event_attr attr;
+       int pfd = -1;
+
+       /* create perf event */
+       memset(&attr, 0, sizeof(attr));
+       attr.size = sizeof(attr);
+       attr.type = PERF_TYPE_SOFTWARE;
+       attr.config = PERF_COUNT_SW_CPU_CLOCK;
+       attr.freq = 1;
+       attr.sample_freq = 4000;
+       pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+       if (!ASSERT_GE(pfd, 0, "perf_fd"))
+               goto cleanup;
+
+       opts.bpf_cookie = 0x100000;
+       link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+       if (!ASSERT_OK_PTR(link, "link1"))
+               goto cleanup;
+
+       burn_cpu(); /* trigger BPF prog */
+
+       ASSERT_EQ(skel->bss->pe_res, 0x100000, "pe_res1");
+
+       /* prevent bpf_link__destroy() closing pfd itself */
+       bpf_link__disconnect(link);
+       /* close BPF link's FD explicitly */
+       close(bpf_link__fd(link));
+       /* free up memory used by struct bpf_link */
+       bpf_link__destroy(link);
+       link = NULL;
+       kern_sync_rcu();
+       skel->bss->pe_res = 0;
+
+       opts.bpf_cookie = 0x200000;
+       link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+       if (!ASSERT_OK_PTR(link, "link2"))
+               goto cleanup;
+
+       burn_cpu(); /* trigger BPF prog */
+
+       ASSERT_EQ(skel->bss->pe_res, 0x200000, "pe_res2");
+
+cleanup:
+       close(pfd);
+       bpf_link__destroy(link);
+}
+
+void test_bpf_cookie(void)
+{
+       struct test_bpf_cookie *skel;
+
+       skel = test_bpf_cookie__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "skel_open"))
+               return;
+
+       skel->bss->my_tid = syscall(SYS_gettid);
+
+       if (test__start_subtest("kprobe"))
+               kprobe_subtest(skel);
+       if (test__start_subtest("uprobe"))
+               uprobe_subtest(skel);
+       if (test__start_subtest("tracepoint"))
+               tp_subtest(skel);
+       if (test__start_subtest("perf_event"))
+               pe_subtest(skel);
+
+       test_bpf_cookie__destroy(skel);
+}
index 1f1aade..77ac24b 100644 (file)
@@ -13,6 +13,7 @@
 #include "bpf_iter_tcp6.skel.h"
 #include "bpf_iter_udp4.skel.h"
 #include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_unix.skel.h"
 #include "bpf_iter_test_kern1.skel.h"
 #include "bpf_iter_test_kern2.skel.h"
 #include "bpf_iter_test_kern3.skel.h"
@@ -313,6 +314,19 @@ static void test_udp6(void)
        bpf_iter_udp6__destroy(skel);
 }
 
+static void test_unix(void)
+{
+       struct bpf_iter_unix *skel;
+
+       skel = bpf_iter_unix__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "bpf_iter_unix__open_and_load"))
+               return;
+
+       do_dummy_read(skel->progs.dump_unix);
+
+       bpf_iter_unix__destroy(skel);
+}
+
 /* The expected string is less than 16 bytes */
 static int do_read_with_fd(int iter_fd, const char *expected,
                           bool read_one_char)
@@ -1255,6 +1269,8 @@ void test_bpf_iter(void)
                test_udp4();
        if (test__start_subtest("udp6"))
                test_udp6();
+       if (test__start_subtest("unix"))
+               test_unix();
        if (test__start_subtest("anon"))
                test_anon_iter(false);
        if (test__start_subtest("anon-read-one-char"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
new file mode 100644 (file)
index 0000000..85babb0
--- /dev/null
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "bpf_dctcp.skel.h"
+#include "bpf_cubic.skel.h"
+#include "bpf_iter_setsockopt.skel.h"
+
+static int create_netns(void)
+{
+       if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+               return -1;
+
+       if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+               return -1;
+
+       return 0;
+}
+
+static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds)
+{
+       unsigned int i;
+
+       for (i = 0; i < nr_fds; i++) {
+               if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic",
+                              sizeof("bpf_cubic")))
+                       return i;
+       }
+
+       return nr_fds;
+}
+
+static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds)
+{
+       char tcp_cc[16];
+       socklen_t optlen = sizeof(tcp_cc);
+       unsigned int i;
+
+       for (i = 0; i < nr_fds; i++) {
+               if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION,
+                              tcp_cc, &optlen) ||
+                   strcmp(tcp_cc, "bpf_dctcp"))
+                       return i;
+       }
+
+       return nr_fds;
+}
+
+static int *make_established(int listen_fd, unsigned int nr_est,
+                            int **paccepted_fds)
+{
+       int *est_fds, *accepted_fds;
+       unsigned int i;
+
+       est_fds = malloc(sizeof(*est_fds) * nr_est);
+       if (!est_fds)
+               return NULL;
+
+       accepted_fds = malloc(sizeof(*accepted_fds) * nr_est);
+       if (!accepted_fds) {
+               free(est_fds);
+               return NULL;
+       }
+
+       for (i = 0; i < nr_est; i++) {
+               est_fds[i] = connect_to_fd(listen_fd, 0);
+               if (est_fds[i] == -1)
+                       break;
+               if (set_bpf_cubic(&est_fds[i], 1) != 1) {
+                       close(est_fds[i]);
+                       break;
+               }
+
+               accepted_fds[i] = accept(listen_fd, NULL, 0);
+               if (accepted_fds[i] == -1) {
+                       close(est_fds[i]);
+                       break;
+               }
+       }
+
+       if (!ASSERT_EQ(i, nr_est, "create established fds")) {
+               free_fds(accepted_fds, i);
+               free_fds(est_fds, i);
+               return NULL;
+       }
+
+       *paccepted_fds = accepted_fds;
+       return est_fds;
+}
+
+static unsigned short get_local_port(int fd)
+{
+       struct sockaddr_in6 addr;
+       socklen_t addrlen = sizeof(addr);
+
+       if (!getsockname(fd, &addr, &addrlen))
+               return ntohs(addr.sin6_port);
+
+       return 0;
+}
+
+static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel,
+                                  bool random_retry)
+{
+       int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL;
+       unsigned int nr_reuse_listens = 256, nr_est = 256;
+       int err, iter_fd = -1, listen_fd = -1;
+       char buf;
+
+       /* Prepare non-reuseport listen_fd */
+       listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+       if (!ASSERT_GE(listen_fd, 0, "start_server"))
+               return;
+       if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1,
+                      "set listen_fd to cubic"))
+               goto done;
+       iter_skel->bss->listen_hport = get_local_port(listen_fd);
+       if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0,
+                       "get_local_port(listen_fd)"))
+               goto done;
+
+       /* Connect to non-reuseport listen_fd */
+       est_fds = make_established(listen_fd, nr_est, &accepted_fds);
+       if (!ASSERT_OK_PTR(est_fds, "create established"))
+               goto done;
+
+       /* Prepare reuseport listen fds */
+       reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM,
+                                                 "::1", 0, 0,
+                                                 nr_reuse_listens);
+       if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server"))
+               goto done;
+       if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens),
+                      nr_reuse_listens, "set reuse_listen_fds to cubic"))
+               goto done;
+       iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]);
+       if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0,
+                       "get_local_port(reuse_listen_fds[0])"))
+               goto done;
+
+       /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */
+       iter_skel->bss->random_retry = random_retry;
+       iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc));
+       if (!ASSERT_GE(iter_fd, 0, "create iter_fd"))
+               goto done;
+
+       while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+              errno == EAGAIN)
+               ;
+       if (!ASSERT_OK(err, "read iter error"))
+               goto done;
+
+       /* Check reuseport listen fds for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens),
+                 nr_reuse_listens,
+                 "check reuse_listen_fds dctcp");
+
+       /* Check non reuseport listen fd for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1,
+                 "check listen_fd dctcp");
+
+       /* Check established fds for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est,
+                 "check est_fds dctcp");
+
+       /* Check accepted fds for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est,
+                 "check accepted_fds dctcp");
+
+done:
+       if (iter_fd != -1)
+               close(iter_fd);
+       if (listen_fd != -1)
+               close(listen_fd);
+       free_fds(reuse_listen_fds, nr_reuse_listens);
+       free_fds(accepted_fds, nr_est);
+       free_fds(est_fds, nr_est);
+}
+
+void test_bpf_iter_setsockopt(void)
+{
+       struct bpf_iter_setsockopt *iter_skel = NULL;
+       struct bpf_cubic *cubic_skel = NULL;
+       struct bpf_dctcp *dctcp_skel = NULL;
+       struct bpf_link *cubic_link = NULL;
+       struct bpf_link *dctcp_link = NULL;
+
+       if (create_netns())
+               return;
+
+       /* Load iter_skel */
+       iter_skel = bpf_iter_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(iter_skel, "iter_skel"))
+               return;
+       iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL);
+       if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter"))
+               goto done;
+
+       /* Load bpf_cubic */
+       cubic_skel = bpf_cubic__open_and_load();
+       if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel"))
+               goto done;
+       cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+       if (!ASSERT_OK_PTR(cubic_link, "cubic_link"))
+               goto done;
+
+       /* Load bpf_dctcp */
+       dctcp_skel = bpf_dctcp__open_and_load();
+       if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+               goto done;
+       dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+       if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link"))
+               goto done;
+
+       do_bpf_iter_setsockopt(iter_skel, true);
+       do_bpf_iter_setsockopt(iter_skel, false);
+
+done:
+       bpf_link__destroy(cubic_link);
+       bpf_link__destroy(dctcp_link);
+       bpf_cubic__destroy(cubic_skel);
+       bpf_dctcp__destroy(dctcp_skel);
+       bpf_iter_setsockopt__destroy(iter_skel);
+}
index efe1e97..94e03df 100644 (file)
@@ -4,37 +4,22 @@
 #include <linux/err.h>
 #include <netinet/tcp.h>
 #include <test_progs.h>
+#include "network_helpers.h"
 #include "bpf_dctcp.skel.h"
 #include "bpf_cubic.skel.h"
 #include "bpf_tcp_nogpl.skel.h"
+#include "bpf_dctcp_release.skel.h"
 
 #define min(a, b) ((a) < (b) ? (a) : (b))
 
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
 static const unsigned int total_bytes = 10 * 1024 * 1024;
-static const struct timeval timeo_sec = { .tv_sec = 10 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
 static int expected_stg = 0xeB9F;
 static int stop, duration;
 
-static int settimeo(int fd)
-{
-       int err;
-
-       err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
-                        timeo_optlen);
-       if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n",
-                 errno))
-               return -1;
-
-       err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
-                        timeo_optlen);
-       if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n",
-                 errno))
-               return -1;
-
-       return 0;
-}
-
 static int settcpca(int fd, const char *tcp_ca)
 {
        int err;
@@ -61,7 +46,7 @@ static void *server(void *arg)
                goto done;
        }
 
-       if (settimeo(fd)) {
+       if (settimeo(fd, 0)) {
                err = -errno;
                goto done;
        }
@@ -114,7 +99,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
        }
 
        if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
-           settimeo(lfd) || settimeo(fd))
+           settimeo(lfd, 0) || settimeo(fd, 0))
                goto done;
 
        /* bind, listen and start server thread to accept */
@@ -267,6 +252,77 @@ static void test_invalid_license(void)
        libbpf_set_print(old_print_fn);
 }
 
+static void test_dctcp_fallback(void)
+{
+       int err, lfd = -1, cli_fd = -1, srv_fd = -1;
+       struct network_helper_opts opts = {
+               .cc = "cubic",
+       };
+       struct bpf_dctcp *dctcp_skel;
+       struct bpf_link *link = NULL;
+       char srv_cc[16];
+       socklen_t cc_len = sizeof(srv_cc);
+
+       dctcp_skel = bpf_dctcp__open();
+       if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+               return;
+       strcpy(dctcp_skel->rodata->fallback, "cubic");
+       if (!ASSERT_OK(bpf_dctcp__load(dctcp_skel), "bpf_dctcp__load"))
+               goto done;
+
+       link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+       if (!ASSERT_OK_PTR(link, "dctcp link"))
+               goto done;
+
+       lfd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+       if (!ASSERT_GE(lfd, 0, "lfd") ||
+           !ASSERT_OK(settcpca(lfd, "bpf_dctcp"), "lfd=>bpf_dctcp"))
+               goto done;
+
+       cli_fd = connect_to_fd_opts(lfd, &opts);
+       if (!ASSERT_GE(cli_fd, 0, "cli_fd"))
+               goto done;
+
+       srv_fd = accept(lfd, NULL, 0);
+       if (!ASSERT_GE(srv_fd, 0, "srv_fd"))
+               goto done;
+       ASSERT_STREQ(dctcp_skel->bss->cc_res, "cubic", "cc_res");
+       ASSERT_EQ(dctcp_skel->bss->tcp_cdg_res, -ENOTSUPP, "tcp_cdg_res");
+
+       err = getsockopt(srv_fd, SOL_TCP, TCP_CONGESTION, srv_cc, &cc_len);
+       if (!ASSERT_OK(err, "getsockopt(srv_fd, TCP_CONGESTION)"))
+               goto done;
+       ASSERT_STREQ(srv_cc, "cubic", "srv_fd cc");
+
+done:
+       bpf_link__destroy(link);
+       bpf_dctcp__destroy(dctcp_skel);
+       if (lfd != -1)
+               close(lfd);
+       if (srv_fd != -1)
+               close(srv_fd);
+       if (cli_fd != -1)
+               close(cli_fd);
+}
+
+static void test_rel_setsockopt(void)
+{
+       struct bpf_dctcp_release *rel_skel;
+       libbpf_print_fn_t old_print_fn;
+
+       err_str = "unknown func bpf_setsockopt";
+       found = false;
+
+       old_print_fn = libbpf_set_print(libbpf_debug_print);
+       rel_skel = bpf_dctcp_release__open_and_load();
+       libbpf_set_print(old_print_fn);
+
+       ASSERT_ERR_PTR(rel_skel, "rel_skel");
+       ASSERT_TRUE(found, "expected_err_msg");
+
+       bpf_dctcp_release__destroy(rel_skel);
+}
+
 void test_bpf_tcp_ca(void)
 {
        if (test__start_subtest("dctcp"))
@@ -275,4 +331,8 @@ void test_bpf_tcp_ca(void)
                test_cubic();
        if (test__start_subtest("invalid_license"))
                test_invalid_license();
+       if (test__start_subtest("dctcp_fallback"))
+               test_dctcp_fallback();
+       if (test__start_subtest("rel_setsockopt"))
+               test_rel_setsockopt();
 }
index 857e3f2..649f873 100644 (file)
@@ -4350,7 +4350,8 @@ static void do_test_file(unsigned int test_num)
                goto done;
        }
 
-       err = btf__get_from_id(info.btf_id, &btf);
+       btf = btf__load_from_kernel_by_id(info.btf_id);
+       err = libbpf_get_error(btf);
        if (CHECK(err, "cannot get btf from kernel, err: %d", err))
                goto done;
 
@@ -4386,6 +4387,7 @@ skip:
        fprintf(stderr, "OK");
 
 done:
+       btf__free(btf);
        free(func_info);
        bpf_object__close(obj);
 }
index 1b90e68..52ccf0c 100644 (file)
@@ -232,7 +232,593 @@ err_out:
        btf__free(btf);
 }
 
+#define STRSIZE                                4096
+
+static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args)
+{
+       char *s = ctx, new[STRSIZE];
+
+       vsnprintf(new, STRSIZE, fmt, args);
+       if (strlen(s) < STRSIZE)
+               strncat(s, new, STRSIZE - strlen(s) - 1);
+}
+
+static int btf_dump_data(struct btf *btf, struct btf_dump *d,
+                        char *name, char *prefix, __u64 flags, void *ptr,
+                        size_t ptr_sz, char *str, const char *expected_val)
+{
+       DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+       size_t type_sz;
+       __s32 type_id;
+       int ret = 0;
+
+       if (flags & BTF_F_COMPACT)
+               opts.compact = true;
+       if (flags & BTF_F_NONAME)
+               opts.skip_names = true;
+       if (flags & BTF_F_ZERO)
+               opts.emit_zeroes = true;
+       if (prefix) {
+               ASSERT_STRNEQ(name, prefix, strlen(prefix),
+                             "verify prefix match");
+               name += strlen(prefix) + 1;
+       }
+       type_id = btf__find_by_name(btf, name);
+       if (!ASSERT_GE(type_id, 0, "find type id"))
+               return -ENOENT;
+       type_sz = btf__resolve_size(btf, type_id);
+       str[0] = '\0';
+       ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts);
+       if (type_sz <= ptr_sz) {
+               if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+                       return -EINVAL;
+       } else {
+               if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
+                       return -EINVAL;
+       }
+       if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
+               return -EFAULT;
+       return 0;
+}
+
+#define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags,       \
+                          _expected, ...)                              \
+       do {                                                            \
+               char __ptrtype[64] = #_type;                            \
+               char *_ptrtype = (char *)__ptrtype;                     \
+               _type _ptrdata = __VA_ARGS__;                           \
+               void *_ptr = &_ptrdata;                                 \
+                                                                       \
+               (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \
+                                    _ptr, sizeof(_type), _str,         \
+                                    _expected);                        \
+       } while (0)
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix,  _str, _type, _flags,    \
+                            ...)                                       \
+       TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags,        \
+                          "(" #_type ")" #__VA_ARGS__, __VA_ARGS__)
+
+/* overflow test; pass typesize < expected type size, ensure E2BIG returned */
+#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz,        \
+                               _expected, ...)                         \
+       do {                                                            \
+               char __ptrtype[64] = #_type;                            \
+               char *_ptrtype = (char *)__ptrtype;                     \
+               _type _ptrdata = __VA_ARGS__;                           \
+               void *_ptr = &_ptrdata;                                 \
+                                                                       \
+               (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0,      \
+                                    _ptr, _type_sz, _str, _expected);  \
+       } while (0)
+
+#define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags,  \
+                         _expected, ...)                               \
+       do {                                                            \
+               _type _ptrdata = __VA_ARGS__;                           \
+               void *_ptr = &_ptrdata;                                 \
+                                                                       \
+               (void) btf_dump_data(_b, _d, _var, _prefix, _flags,     \
+                                    _ptr, sizeof(_type), _str,         \
+                                    _expected);                        \
+       } while (0)
+
+static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d,
+                                  char *str)
+{
+#ifdef __SIZEOF_INT128__
+       __int128 i = 0xffffffffffffffff;
+
+       /* this dance is required because we cannot directly initialize
+        * a 128-bit value to anything larger than a 64-bit value.
+        */
+       i = (i << 64) | (i - 1);
+#endif
+       /* simple int */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+                          "1234", 1234);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234);
+
+       /* zero value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(int)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+                          "-4567", -4567);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567);
+
+       TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1);
+
+#ifdef __SIZEOF_INT128__
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT,
+                          "(__int128)0xffffffffffffffff",
+                          0xffffffffffffffff);
+       ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str,
+                               "(__int128)0xfffffffffffffffffffffffffffffffe"),
+                 "dump __int128");
+#endif
+}
+
+static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d,
+                                    char *str)
+{
+       float t1 = 1.234567;
+       float t2 = -1.234567;
+       float t3 = 0.0;
+       double t4 = 5.678912;
+       double t5 = -5.678912;
+       double t6 = 0.0;
+       long double t7 = 9.876543;
+       long double t8 = -9.876543;
+       long double t9 = 0.0;
+
+       /* since the kernel does not likely have any float types in its BTF, we
+        * will need to add some of various sizes.
+        */
+
+       ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float");
+       ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str,
+                               "(test_float)1.234567"), "dump float");
+       ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str,
+                               "(test_float)-1.234567"), "dump float");
+       ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str,
+                               "(test_float)0.000000"), "dump float");
+
+       ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str,
+                 "(test_double)5.678912"), "dump double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str,
+                 "(test_double)-5.678912"), "dump double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str,
+                               "(test_double)0.000000"), "dump double");
+
+       ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16,
+                               str, "(test_long_double)9.876543"),
+                               "dump long_double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16,
+                               str, "(test_long_double)-9.876543"),
+                               "dump long_double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16,
+                               str, "(test_long_double)0.000000"),
+                               "dump long_double");
+}
+
+static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d,
+                                   char *str)
+{
+       /* simple char */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+                          "100", 100);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100);
+       /* zero value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT,
+                          "(char)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(char)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0);
+
+       TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100);
+}
+
+static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d,
+                                      char *str)
+{
+       /* simple typedef */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+                          "1", 1);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1);
+       /* zero value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(u64)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0);
+
+       /* typedef struct */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT,
+                            {.counter = (int)1,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+                          "{1,}", { .counter = 1 });
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"      .counter = (int)1,\n"
+"}",
+                          {.counter = 1,});
+       /* typedef with 0 value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}",
+                          {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+                          "{}", {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"}",
+                          {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(atomic_t){.counter = (int)0,}",
+                          {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "{0,}", {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO,
+"(atomic_t){\n"
+"      .counter = (int)0,\n"
+"}",
+                          { .counter = 0,});
+
+       /* overflow should show type but not value since it overflows */
+       TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1,
+                               "(atomic_t){\n", { .counter = 1});
+}
+
+static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d,
+                                   char *str)
+{
+       /* enum where enum value does (and does not) exist */
+       TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+                            BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+                          "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "BPF_MAP_CREATE",
+                          BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+                          "(enum bpf_cmd)BPF_MAP_CREATE",
+                          BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "BPF_MAP_CREATE", 0);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_ZERO,
+                          "(enum bpf_cmd)BPF_MAP_CREATE",
+                          BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "BPF_MAP_CREATE", BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "2000", 2000);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+                          "(enum bpf_cmd)2000", 2000);
+
+       TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd,
+                               sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE);
+}
+
+static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
+                                     char *str)
+{
+       DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+       char zero_data[512] = { };
+       char type_data[512];
+       void *fops = type_data;
+       void *skb = type_data;
+       size_t type_sz;
+       __s32 type_id;
+       char *cmpstr;
+       int ret;
+
+       memset(type_data, 255, sizeof(type_data));
+
+       /* simple struct */
+       TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+                            {.name_off = (__u32)3,.val = (__s32)-1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{3,-1,}",
+                          { .name_off = 3, .val = -1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"      .name_off = (__u32)3,\n"
+"      .val = (__s32)-1,\n"
+"}",
+                          { .name_off = 3, .val = -1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{-1,}",
+                          { .name_off = 0, .val = -1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "{0,-1,}",
+                          { .name_off = 0, .val = -1,});
+       /* empty struct should be printed */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+                          "(struct btf_enum){}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_ZERO,
+                          "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_ZERO,
+"(struct btf_enum){\n"
+"      .name_off = (__u32)0,\n"
+"      .val = (__s32)0,\n"
+"}",
+                          { .name_off = 0, .val = 0,});
+
+       /* struct with pointers */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+                          "(struct list_head){.next = (struct list_head *)0x1,}",
+                          { .next = (struct list_head *)1 });
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"      .next = (struct list_head *)0x1,\n"
+"}",
+                          { .next = (struct list_head *)1 });
+       /* NULL pointer should not be displayed */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+                          "(struct list_head){}",
+                          { .next = (struct list_head *)0 });
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"}",
+                          { .next = (struct list_head *)0 });
+
+       /* struct with function pointers */
+       type_id = btf__find_by_name(btf, "file_operations");
+       if (ASSERT_GT(type_id, 0, "find type id")) {
+               type_sz = btf__resolve_size(btf, type_id);
+               str[0] = '\0';
+
+               ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts);
+               ASSERT_EQ(ret, type_sz,
+                         "unexpected return value dumping file_operations");
+               cmpstr =
+"(struct file_operations){\n"
+"      .owner = (struct module *)0xffffffffffffffff,\n"
+"      .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,";
+
+               ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations");
+       }
+
+       /* struct with char array */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+                          "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}",
+                          { .name = "foo",});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{['f','o','o',],}",
+                          {.name = "foo",});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0,
+"(struct bpf_prog_info){\n"
+"      .name = (char[16])[\n"
+"              'f',\n"
+"              'o',\n"
+"              'o',\n"
+"      ],\n"
+"}",
+                          {.name = "foo",});
+       /* leading null char means do not display string */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+                          "(struct bpf_prog_info){}",
+                          {.name = {'\0', 'f', 'o', 'o'}});
+       /* handle non-printable characters */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+                          "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}",
+                          { .name = {1, 2, 3, 0}});
+
+       /* struct with non-char array */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+                          "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}",
+                          { .cb = {1, 2, 3, 4, 5,},});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{[1,2,3,4,5,],}",
+                          { .cb = { 1, 2, 3, 4, 5},});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+"      .cb = (__u32[5])[\n"
+"              1,\n"
+"              2,\n"
+"              3,\n"
+"              4,\n"
+"              5,\n"
+"      ],\n"
+"}",
+                          { .cb = { 1, 2, 3, 4, 5},});
+       /* For non-char, arrays, show non-zero values only */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+                          "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}",
+                          { .cb = { 0, 0, 1, 0, 0},});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+"      .cb = (__u32[5])[\n"
+"              0,\n"
+"              0,\n"
+"              1,\n"
+"              0,\n"
+"              0,\n"
+"      ],\n"
+"}",
+                          { .cb = { 0, 0, 1, 0, 0},});
+
+       /* struct with bitfields */
+       TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+               {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{1,0x2,0x3,4,5,}",
+                          { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+                            .imm = 5,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0,
+"(struct bpf_insn){\n"
+"      .code = (__u8)1,\n"
+"      .dst_reg = (__u8)0x2,\n"
+"      .src_reg = (__u8)0x3,\n"
+"      .off = (__s16)4,\n"
+"      .imm = (__s32)5,\n"
+"}",
+                          {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5});
+
+       /* zeroed bitfields should not be displayed */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+                          "(struct bpf_insn){.dst_reg = (__u8)0x1,}",
+                          { .code = 0, .dst_reg = 1});
+
+       /* struct with enum bitfield */
+       type_id = btf__find_by_name(btf, "fs_context");
+       if (ASSERT_GT(type_id,  0, "find fs_context")) {
+               type_sz = btf__resolve_size(btf, type_id);
+               str[0] = '\0';
+
+               opts.emit_zeroes = true;
+               ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts);
+               ASSERT_EQ(ret, type_sz,
+                         "unexpected return value dumping fs_context");
+
+               ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL,
+                                 "bitfield value not present");
+       }
+
+       /* struct with nested anon union */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT,
+                          "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}",
+                          { .op = 1, .args = { 1, 2, 3, 4}});
+
+       /* union with nested struct */
+       TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
+                          "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}",
+                          { .map = { .map_fd = 1 }});
+
+       /* struct skb with nested structs/unions; because type output is so
+        * complex, we don't do a string comparison, just verify we return
+        * the type size as the amount of data displayed.
+        */
+       type_id = btf__find_by_name(btf, "sk_buff");
+       if (ASSERT_GT(type_id, 0, "find struct sk_buff")) {
+               type_sz = btf__resolve_size(btf, type_id);
+               str[0] = '\0';
+
+               ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts);
+               ASSERT_EQ(ret, type_sz,
+                         "unexpected return value dumping sk_buff");
+       }
+
+       /* overflow bpf_sock_ops struct with final element nonzero/zero.
+        * Regardless of the value of the final field, we don't have all the
+        * data we need to display it, so we should trigger an overflow.
+        * In other words oveflow checking should trump "is field zero?"
+        * checks because if we've overflowed, it shouldn't matter what the
+        * field is - we can't trust its value so shouldn't display it.
+        */
+       TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+                               sizeof(struct bpf_sock_ops) - 1,
+                               "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+                               { .op = 1, .skb_tcp_flags = 2});
+       TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+                               sizeof(struct bpf_sock_ops) - 1,
+                               "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+                               { .op = 1, .skb_tcp_flags = 0});
+}
+
+static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
+                                  char *str)
+{
+       TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
+                         "int cpu_number = (int)100", 100);
+       TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT,
+                         "static int cpu_profile_flip = (int)2", 2);
+}
+
+static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
+                            const char *name, const char *expected_val,
+                            void *data, size_t data_sz)
+{
+       DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+       int ret = 0, cmp;
+       size_t secsize;
+       __s32 type_id;
+
+       opts.compact = true;
+
+       type_id = btf__find_by_name(btf, name);
+       if (!ASSERT_GT(type_id, 0, "find type id"))
+               return;
+
+       secsize = btf__resolve_size(btf, type_id);
+       ASSERT_EQ(secsize,  0, "verify section size");
+
+       str[0] = '\0';
+       ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts);
+       ASSERT_EQ(ret, 0, "unexpected return value");
+
+       cmp = strcmp(str, expected_val);
+       ASSERT_EQ(cmp, 0, "ensure expected/actual match");
+}
+
+static void test_btf_dump_datasec_data(char *str)
+{
+       struct btf *btf = btf__parse("xdping_kern.o", NULL);
+       struct btf_dump_opts opts = { .ctx = str };
+       char license[4] = "GPL";
+       struct btf_dump *d;
+
+       if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found"))
+               return;
+
+       d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+       if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+               return;
+
+       test_btf_datasec(btf, d, str, "license",
+                        "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];",
+                        license, sizeof(license));
+}
+
 void test_btf_dump() {
+       char str[STRSIZE];
+       struct btf_dump_opts opts = { .ctx = str };
+       struct btf_dump *d;
+       struct btf *btf;
        int i;
 
        for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) {
@@ -245,4 +831,33 @@ void test_btf_dump() {
        }
        if (test__start_subtest("btf_dump: incremental"))
                test_btf_dump_incremental();
+
+       btf = libbpf_find_kernel_btf();
+       if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
+               return;
+
+       d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+       if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+               return;
+
+       /* Verify type display for various types. */
+       if (test__start_subtest("btf_dump: int_data"))
+               test_btf_dump_int_data(btf, d, str);
+       if (test__start_subtest("btf_dump: float_data"))
+               test_btf_dump_float_data(btf, d, str);
+       if (test__start_subtest("btf_dump: char_data"))
+               test_btf_dump_char_data(btf, d, str);
+       if (test__start_subtest("btf_dump: typedef_data"))
+               test_btf_dump_typedef_data(btf, d, str);
+       if (test__start_subtest("btf_dump: enum_data"))
+               test_btf_dump_enum_data(btf, d, str);
+       if (test__start_subtest("btf_dump: struct_data"))
+               test_btf_dump_struct_data(btf, d, str);
+       if (test__start_subtest("btf_dump: var_data"))
+               test_btf_dump_var_data(btf, d, str);
+       btf_dump__free(d);
+       btf__free(btf);
+
+       if (test__start_subtest("btf_dump: datasec_data"))
+               test_btf_dump_datasec_data(str);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_module.c b/tools/testing/selftests/bpf/prog_tests/btf_module.c
new file mode 100644 (file)
index 0000000..2239d1f
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static const char *module_name = "bpf_testmod";
+static const char *symbol_name = "bpf_testmod_test_read";
+
+void test_btf_module()
+{
+       struct btf *vmlinux_btf, *module_btf;
+       __s32 type_id;
+
+       if (!env.has_testmod) {
+               test__skip();
+               return;
+       }
+
+       vmlinux_btf = btf__load_vmlinux_btf();
+       if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+               return;
+
+       module_btf = btf__load_module_btf(module_name, vmlinux_btf);
+       if (!ASSERT_OK_PTR(module_btf, "could not load module BTF"))
+               goto cleanup;
+
+       type_id = btf__find_by_name(module_btf, symbol_name);
+       ASSERT_GT(type_id, 0, "func not found");
+
+cleanup:
+       btf__free(module_btf);
+       btf__free(vmlinux_btf);
+}
index 981c251..3d4b2a3 100644 (file)
@@ -53,8 +53,8 @@ void test_core_autosize(void)
        char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
        int err, fd = -1, zero = 0;
        int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
        struct test_core_autosize* skel = NULL;
-       struct bpf_object_load_attr load_attr = {};
        struct bpf_program *prog;
        struct bpf_map *bss_map;
        struct btf *btf = NULL;
@@ -125,9 +125,10 @@ void test_core_autosize(void)
        fd = -1;
 
        /* open and load BPF program with custom BTF as the kernel BTF */
-       skel = test_core_autosize__open();
+       open_opts.btf_custom_path = btf_file;
+       skel = test_core_autosize__open_opts(&open_opts);
        if (!ASSERT_OK_PTR(skel, "skel_open"))
-               return;
+               goto cleanup;
 
        /* disable handle_signed() for now */
        prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
@@ -135,9 +136,7 @@ void test_core_autosize(void)
                goto cleanup;
        bpf_program__set_autoload(prog, false);
 
-       load_attr.obj = skel->obj;
-       load_attr.target_btf_path = btf_file;
-       err = bpf_object__load_xattr(&load_attr);
+       err = bpf_object__load(skel->obj);
        if (!ASSERT_OK(err, "prog_load"))
                goto cleanup;
 
@@ -204,14 +203,13 @@ void test_core_autosize(void)
        skel = NULL;
 
        /* now re-load with handle_signed() enabled, it should fail loading */
-       skel = test_core_autosize__open();
+       open_opts.btf_custom_path = btf_file;
+       skel = test_core_autosize__open_opts(&open_opts);
        if (!ASSERT_OK_PTR(skel, "skel_open"))
-               return;
+               goto cleanup;
 
-       load_attr.obj = skel->obj;
-       load_attr.target_btf_path = btf_file;
-       err = bpf_object__load_xattr(&load_attr);
-       if (!ASSERT_ERR(err, "bad_prog_load"))
+       err = test_core_autosize__load(skel);
+       if (!ASSERT_ERR(err, "skel_load"))
                goto cleanup;
 
 cleanup:
index d02e064..4739b15 100644 (file)
@@ -816,7 +816,7 @@ static size_t roundup_page(size_t sz)
 void test_core_reloc(void)
 {
        const size_t mmap_sz = roundup_page(sizeof(struct data));
-       struct bpf_object_load_attr load_attr = {};
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
        struct core_reloc_test_case *test_case;
        const char *tp_name, *probe_name;
        int err, i, equal;
@@ -846,9 +846,16 @@ void test_core_reloc(void)
                                continue;
                }
 
-               obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
+               if (test_case->btf_src_file) {
+                       err = access(test_case->btf_src_file, R_OK);
+                       if (!ASSERT_OK(err, "btf_src_file"))
+                               goto cleanup;
+               }
+
+               open_opts.btf_custom_path = test_case->btf_src_file;
+               obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts);
                if (!ASSERT_OK_PTR(obj, "obj_open"))
-                       continue;
+                       goto cleanup;
 
                probe_name = "raw_tracepoint/sys_enter";
                tp_name = "sys_enter";
@@ -862,17 +869,7 @@ void test_core_reloc(void)
                          "prog '%s' not found\n", probe_name))
                        goto cleanup;
 
-
-               if (test_case->btf_src_file) {
-                       err = access(test_case->btf_src_file, R_OK);
-                       if (!ASSERT_OK(err, "btf_src_file"))
-                               goto cleanup;
-               }
-
-               load_attr.obj = obj;
-               load_attr.log_level = 0;
-               load_attr.target_btf_path = test_case->btf_src_file;
-               err = bpf_object__load_xattr(&load_attr);
+               err = bpf_object__load(obj);
                if (err) {
                        if (!test_case->fails)
                                ASSERT_OK(err, "obj_load");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
new file mode 100644 (file)
index 0000000..02a465f
--- /dev/null
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "get_func_ip_test.skel.h"
+
+void test_get_func_ip_test(void)
+{
+       struct get_func_ip_test *skel = NULL;
+       __u32 duration = 0, retval;
+       int err, prog_fd;
+
+       skel = get_func_ip_test__open();
+       if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
+               return;
+
+       /* test6 is x86_64 specifc because of the instruction
+        * offset, disabling it for all other archs
+        */
+#ifndef __x86_64__
+       bpf_program__set_autoload(skel->progs.test6, false);
+       bpf_program__set_autoload(skel->progs.test7, false);
+#endif
+
+       err = get_func_ip_test__load(skel);
+       if (!ASSERT_OK(err, "get_func_ip_test__load"))
+               goto cleanup;
+
+       err = get_func_ip_test__attach(skel);
+       if (!ASSERT_OK(err, "get_func_ip_test__attach"))
+               goto cleanup;
+
+       prog_fd = bpf_program__fd(skel->progs.test1);
+       err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+                               NULL, NULL, &retval, &duration);
+       ASSERT_OK(err, "test_run");
+       ASSERT_EQ(retval, 0, "test_run");
+
+       prog_fd = bpf_program__fd(skel->progs.test5);
+       err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+                               NULL, NULL, &retval, &duration);
+
+       ASSERT_OK(err, "test_run");
+
+       ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+       ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
+       ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
+       ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
+       ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
+#ifdef __x86_64__
+       ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
+       ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
+#endif
+
+cleanup:
+       get_func_ip_test__destroy(skel);
+}
index 30a7b9b..9611f2b 100644 (file)
@@ -44,7 +44,7 @@ static void test_subprog(void)
        ASSERT_OK(err, "bpf_prog_test_run(test1)");
        ASSERT_EQ(retval, 10, "test1-retval");
        ASSERT_NEQ(skel->data->active_res, -1, "active_res");
-       ASSERT_EQ(skel->data->sk_state, BPF_TCP_CLOSE, "sk_state");
+       ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
 
        kfunc_call_test_subprog__destroy(skel);
 }
index 67bebd3..cf3acfa 100644 (file)
@@ -6,6 +6,7 @@
 #include <bpf/btf.h>
 #include "test_ksyms_btf.skel.h"
 #include "test_ksyms_btf_null_check.skel.h"
+#include "test_ksyms_weak.skel.h"
 
 static int duration;
 
@@ -81,6 +82,33 @@ static void test_null_check(void)
        test_ksyms_btf_null_check__destroy(skel);
 }
 
+static void test_weak_syms(void)
+{
+       struct test_ksyms_weak *skel;
+       struct test_ksyms_weak__data *data;
+       int err;
+
+       skel = test_ksyms_weak__open_and_load();
+       if (CHECK(!skel, "test_ksyms_weak__open_and_load", "failed\n"))
+               return;
+
+       err = test_ksyms_weak__attach(skel);
+       if (CHECK(err, "test_ksyms_weak__attach", "skeleton attach failed: %d\n", err))
+               goto cleanup;
+
+       /* trigger tracepoint */
+       usleep(1);
+
+       data = skel->data;
+       ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym");
+       ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym");
+       ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym");
+       ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym");
+
+cleanup:
+       test_ksyms_weak__destroy(skel);
+}
+
 void test_ksyms_btf(void)
 {
        int percpu_datasec;
@@ -105,4 +133,7 @@ void test_ksyms_btf(void)
 
        if (test__start_subtest("null_check"))
                test_null_check();
+
+       if (test__start_subtest("weak_ksyms"))
+               test_weak_syms();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
new file mode 100644 (file)
index 0000000..6ede48b
--- /dev/null
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "netcnt_prog.skel.h"
+#include "netcnt_common.h"
+
+#define CG_NAME "/netcnt"
+
+void test_netcnt(void)
+{
+       union percpu_net_cnt *percpu_netcnt = NULL;
+       struct bpf_cgroup_storage_key key;
+       int map_fd, percpu_map_fd;
+       struct netcnt_prog *skel;
+       unsigned long packets;
+       union net_cnt netcnt;
+       unsigned long bytes;
+       int cpu, nproc;
+       int cg_fd = -1;
+       char cmd[128];
+
+       skel = netcnt_prog__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
+               return;
+
+       nproc = get_nprocs_conf();
+       percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+       if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
+               goto err;
+
+       cg_fd = test__join_cgroup(CG_NAME);
+       if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup"))
+               goto err;
+
+       skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd);
+       if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt,
+                          "attach_cgroup(bpf_nextcnt)"))
+               goto err;
+
+       snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6));
+       ASSERT_OK(system(cmd), cmd);
+
+       map_fd = bpf_map__fd(skel->maps.netcnt);
+       if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key"))
+               goto err;
+
+       if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)"))
+               goto err;
+
+       percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt);
+       if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]),
+                      "bpf_map_lookup_elem(percpu_netcnt)"))
+               goto err;
+
+       /* Some packets can be still in per-cpu cache, but not more than
+        * MAX_PERCPU_PACKETS.
+        */
+       packets = netcnt.packets;
+       bytes = netcnt.bytes;
+       for (cpu = 0; cpu < nproc; cpu++) {
+               ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS");
+
+               packets += percpu_netcnt[cpu].packets;
+               bytes += percpu_netcnt[cpu].bytes;
+       }
+
+       /* No packets should be lost */
+       ASSERT_EQ(packets, 10000, "packets");
+
+       /* Let's check that bytes counter matches the number of packets
+        * multiplied by the size of ipv6 ICMP packet.
+        */
+       ASSERT_EQ(bytes, packets * 104, "bytes");
+
+err:
+       if (cg_fd != -1)
+               close(cg_fd);
+       free(percpu_netcnt);
+       netcnt_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
new file mode 100644 (file)
index 0000000..71d8f3b
--- /dev/null
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "netns_cookie_prog.skel.h"
+#include "network_helpers.h"
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+static int duration;
+
+void test_netns_cookie(void)
+{
+       int server_fd = -1, client_fd = -1, cgroup_fd = -1;
+       int err, val, ret, map, verdict;
+       struct netns_cookie_prog *skel;
+       uint64_t cookie_expected_value;
+       socklen_t vallen = sizeof(cookie_expected_value);
+       static const char send_msg[] = "message";
+
+       skel = netns_cookie_prog__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "skel_open"))
+               return;
+
+       cgroup_fd = test__join_cgroup("/netns_cookie");
+       if (CHECK(cgroup_fd < 0, "join_cgroup", "cgroup creation failed\n"))
+               goto done;
+
+       skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup(
+               skel->progs.get_netns_cookie_sockops, cgroup_fd);
+       if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach"))
+               goto done;
+
+       verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg);
+       map = bpf_map__fd(skel->maps.sock_map);
+       err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+       if (!ASSERT_OK(err, "prog_attach"))
+               goto done;
+
+       server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+       if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
+               goto done;
+
+       client_fd = connect_to_fd(server_fd, 0);
+       if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno))
+               goto done;
+
+       ret = send(client_fd, send_msg, sizeof(send_msg), 0);
+       if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", ret))
+               goto done;
+
+       err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sockops_netns_cookies),
+                                 &client_fd, &val);
+       if (!ASSERT_OK(err, "map_lookup(sockops_netns_cookies)"))
+               goto done;
+
+       err = getsockopt(client_fd, SOL_SOCKET, SO_NETNS_COOKIE,
+                        &cookie_expected_value, &vallen);
+       if (!ASSERT_OK(err, "getsockopt"))
+               goto done;
+
+       ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+       err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies),
+                                 &client_fd, &val);
+       if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)"))
+               goto done;
+
+       ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+done:
+       if (server_fd != -1)
+               close(server_fd);
+       if (client_fd != -1)
+               close(client_fd);
+       if (cgroup_fd != -1)
+               close(cgroup_fd);
+       netns_cookie_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
new file mode 100644 (file)
index 0000000..b1abd0c
--- /dev/null
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "test_perf_link.skel.h"
+
+static void burn_cpu(void)
+{
+       volatile int j = 0;
+       cpu_set_t cpu_set;
+       int i, err;
+
+       /* generate some branches on cpu 0 */
+       CPU_ZERO(&cpu_set);
+       CPU_SET(0, &cpu_set);
+       err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+       ASSERT_OK(err, "set_thread_affinity");
+
+       /* spin the loop for a while (random high number) */
+       for (i = 0; i < 1000000; ++i)
+               ++j;
+}
+
+void test_perf_link(void)
+{
+       struct test_perf_link *skel = NULL;
+       struct perf_event_attr attr;
+       int pfd = -1, link_fd = -1, err;
+       int run_cnt_before, run_cnt_after;
+       struct bpf_link_info info;
+       __u32 info_len = sizeof(info);
+
+       /* create perf event */
+       memset(&attr, 0, sizeof(attr));
+       attr.size = sizeof(attr);
+       attr.type = PERF_TYPE_SOFTWARE;
+       attr.config = PERF_COUNT_SW_CPU_CLOCK;
+       attr.freq = 1;
+       attr.sample_freq = 4000;
+       pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+       if (!ASSERT_GE(pfd, 0, "perf_fd"))
+               goto cleanup;
+
+       skel = test_perf_link__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "skel_load"))
+               goto cleanup;
+
+       link_fd = bpf_link_create(bpf_program__fd(skel->progs.handler), pfd,
+                                 BPF_PERF_EVENT, NULL);
+       if (!ASSERT_GE(link_fd, 0, "link_fd"))
+               goto cleanup;
+
+       memset(&info, 0, sizeof(info));
+       err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
+       if (!ASSERT_OK(err, "link_get_info"))
+               goto cleanup;
+
+       ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "link_type");
+       ASSERT_GT(info.id, 0, "link_id");
+       ASSERT_GT(info.prog_id, 0, "link_prog_id");
+
+       /* ensure we get at least one perf_event prog execution */
+       burn_cpu();
+       ASSERT_GT(skel->bss->run_cnt, 0, "run_cnt");
+
+       /* perf_event is still active, but we close link and BPF program
+        * shouldn't be executed anymore
+        */
+       close(link_fd);
+       link_fd = -1;
+
+       /* make sure there are no stragglers */
+       kern_sync_rcu();
+
+       run_cnt_before = skel->bss->run_cnt;
+       burn_cpu();
+       run_cnt_after = skel->bss->run_cnt;
+
+       ASSERT_EQ(run_cnt_before, run_cnt_after, "run_cnt_before_after");
+
+cleanup:
+       if (link_fd >= 0)
+               close(link_fd);
+       if (pfd >= 0)
+               close(pfd);
+       test_perf_link__destroy(skel);
+}
index fcf54b3..d4b953a 100644 (file)
@@ -125,6 +125,10 @@ void test_pinning(void)
        if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
                goto out;
 
+       /* get pinning path */
+       if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path"))
+               goto out;
+
        /* set pinning path of other map and re-pin all */
        map = bpf_object__find_map_by_name(obj, "nopinmap");
        if (CHECK(!map, "find map", "NULL map"))
@@ -134,6 +138,11 @@ void test_pinning(void)
        if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
                goto out;
 
+       /* get pinning path after set */
+       if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath,
+                         "get pin path after set"))
+               goto out;
+
        /* should only pin the one unpinned map */
        err = bpf_object__pin_maps(obj, NULL);
        if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
index de26881..4e91f4d 100644 (file)
@@ -34,8 +34,8 @@ void test_reference_tracking(void)
                if (!test__start_subtest(title))
                        continue;
 
-               /* Expect verifier failure if test name has 'fail' */
-               if (strstr(title, "fail") != NULL) {
+               /* Expect verifier failure if test name has 'err' */
+               if (strstr(title, "err_") != NULL) {
                        libbpf_print_fn_t old_print_fn;
 
                        old_print_fn = libbpf_set_print(NULL);
index 023cc53..776916b 100644 (file)
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <sys/time.h>
+#include <sys/resource.h>
 #include "test_send_signal_kern.skel.h"
 
 int sigusr1_received = 0;
@@ -10,29 +12,25 @@ static void sigusr1_handler(int signum)
 }
 
 static void test_send_signal_common(struct perf_event_attr *attr,
-                                   bool signal_thread,
-                                   const char *test_name)
+                                   bool signal_thread)
 {
        struct test_send_signal_kern *skel;
        int pipe_c2p[2], pipe_p2c[2];
        int err = -1, pmu_fd = -1;
-       __u32 duration = 0;
        char buf[256];
        pid_t pid;
 
-       if (CHECK(pipe(pipe_c2p), test_name,
-                 "pipe pipe_c2p error: %s\n", strerror(errno)))
+       if (!ASSERT_OK(pipe(pipe_c2p), "pipe_c2p"))
                return;
 
-       if (CHECK(pipe(pipe_p2c), test_name,
-                 "pipe pipe_p2c error: %s\n", strerror(errno))) {
+       if (!ASSERT_OK(pipe(pipe_p2c), "pipe_p2c")) {
                close(pipe_c2p[0]);
                close(pipe_c2p[1]);
                return;
        }
 
        pid = fork();
-       if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
+       if (!ASSERT_GE(pid, 0, "fork")) {
                close(pipe_c2p[0]);
                close(pipe_c2p[1]);
                close(pipe_p2c[0]);
@@ -41,26 +39,40 @@ static void test_send_signal_common(struct perf_event_attr *attr,
        }
 
        if (pid == 0) {
+               int old_prio;
+
                /* install signal handler and notify parent */
                signal(SIGUSR1, sigusr1_handler);
 
                close(pipe_c2p[0]); /* close read */
                close(pipe_p2c[1]); /* close write */
 
+               /* boost with a high priority so we got a higher chance
+                * that if an interrupt happens, the underlying task
+                * is this process.
+                */
+               errno = 0;
+               old_prio = getpriority(PRIO_PROCESS, 0);
+               ASSERT_OK(errno, "getpriority");
+               ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority");
+
                /* notify parent signal handler is installed */
-               CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+               ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
 
                /* make sure parent enabled bpf program to send_signal */
-               CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+               ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
 
                /* wait a little for signal handler */
                sleep(1);
 
                buf[0] = sigusr1_received ? '2' : '0';
-               CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+               ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
 
                /* wait for parent notification and exit */
-               CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+               ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
+
+               /* restore the old priority */
+               ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority");
 
                close(pipe_c2p[1]);
                close(pipe_p2c[0]);
@@ -71,20 +83,19 @@ static void test_send_signal_common(struct perf_event_attr *attr,
        close(pipe_p2c[0]); /* close read */
 
        skel = test_send_signal_kern__open_and_load();
-       if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+       if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
                goto skel_open_load_failure;
 
        if (!attr) {
                err = test_send_signal_kern__attach(skel);
-               if (CHECK(err, "skel_attach", "skeleton attach failed\n")) {
+               if (!ASSERT_OK(err, "skel_attach")) {
                        err = -1;
                        goto destroy_skel;
                }
        } else {
                pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
                                 -1 /* group id */, 0 /* flags */);
-               if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
-                       strerror(errno))) {
+               if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) {
                        err = -1;
                        goto destroy_skel;
                }
@@ -96,7 +107,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
        }
 
        /* wait until child signal handler installed */
-       CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+       ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read");
 
        /* trigger the bpf send_signal */
        skel->bss->pid = pid;
@@ -104,21 +115,21 @@ static void test_send_signal_common(struct perf_event_attr *attr,
        skel->bss->signal_thread = signal_thread;
 
        /* notify child that bpf program can send_signal now */
-       CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+       ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
 
        /* wait for result */
        err = read(pipe_c2p[0], buf, 1);
-       if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
+       if (!ASSERT_GE(err, 0, "reading pipe"))
                goto disable_pmu;
-       if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
+       if (!ASSERT_GT(err, 0, "reading pipe error: size 0")) {
                err = -1;
                goto disable_pmu;
        }
 
-       CHECK(buf[0] != '2', test_name, "incorrect result\n");
+       ASSERT_EQ(buf[0], '2', "incorrect result");
 
        /* notify child safe to exit */
-       CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+       ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
 
 disable_pmu:
        close(pmu_fd);
@@ -132,7 +143,7 @@ skel_open_load_failure:
 
 static void test_send_signal_tracepoint(bool signal_thread)
 {
-       test_send_signal_common(NULL, signal_thread, "tracepoint");
+       test_send_signal_common(NULL, signal_thread);
 }
 
 static void test_send_signal_perf(bool signal_thread)
@@ -143,7 +154,7 @@ static void test_send_signal_perf(bool signal_thread)
                .config = PERF_COUNT_SW_CPU_CLOCK,
        };
 
-       test_send_signal_common(&attr, signal_thread, "perf_sw_event");
+       test_send_signal_common(&attr, signal_thread);
 }
 
 static void test_send_signal_nmi(bool signal_thread)
@@ -172,7 +183,7 @@ static void test_send_signal_nmi(bool signal_thread)
                close(pmu_fd);
        }
 
-       test_send_signal_common(&attr, signal_thread, "perf_hw_event");
+       test_send_signal_common(&attr, signal_thread);
 }
 
 void test_send_signal(void)
index dffbcaa..8fd1b4b 100644 (file)
@@ -19,7 +19,7 @@
 #define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
 #define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
 
-#define EXP_STR_OUT  "str1 longstr"
+#define EXP_STR_OUT  "str1         a  b c      d e longstr"
 #define EXP_STR_RET  sizeof(EXP_STR_OUT)
 
 #define EXP_OVER_OUT "%over"
@@ -114,6 +114,8 @@ void test_snprintf_negative(void)
        ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
        ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
        ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+       ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6");
+       ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
        ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
        ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
 }
index 515229f..5c59790 100644 (file)
@@ -351,9 +351,11 @@ static void test_insert_opened(int family, int sotype, int mapfd)
        errno = 0;
        value = s;
        err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
-       if (!err || errno != EOPNOTSUPP)
-               FAIL_ERRNO("map_update: expected EOPNOTSUPP");
-
+       if (sotype == SOCK_STREAM) {
+               if (!err || errno != EOPNOTSUPP)
+                       FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+       } else if (err)
+               FAIL_ERRNO("map_update: expected success");
        xclose(s);
 }
 
@@ -919,6 +921,23 @@ static const char *redir_mode_str(enum redir_mode mode)
        }
 }
 
+static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+{
+       u64 value;
+       u32 key;
+       int err;
+
+       key = 0;
+       value = fd1;
+       err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+       if (err)
+               return err;
+
+       key = 1;
+       value = fd2;
+       return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+}
+
 static void redir_to_connected(int family, int sotype, int sock_mapfd,
                               int verd_mapfd, enum redir_mode mode)
 {
@@ -928,9 +947,9 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
        unsigned int pass;
        socklen_t len;
        int err, n;
-       u64 value;
        u32 key;
        char b;
+       int retries = 100;
 
        zero_verdict_count(verd_mapfd);
 
@@ -965,15 +984,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
        if (p1 < 0)
                goto close_cli1;
 
-       key = 0;
-       value = p0;
-       err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
-       if (err)
-               goto close_peer1;
-
-       key = 1;
-       value = p1;
-       err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+       err = add_to_sockmap(sock_mapfd, p0, p1);
        if (err)
                goto close_peer1;
 
@@ -991,10 +1002,15 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
                goto close_peer1;
        if (pass != 1)
                FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
+again:
        n = read(c0, &b, 1);
-       if (n < 0)
+       if (n < 0) {
+               if (errno == EAGAIN && retries--) {
+                       usleep(1000);
+                       goto again;
+               }
                FAIL_ERRNO("%s: read", log_prefix);
+       }
        if (n == 0)
                FAIL("%s: incomplete read", log_prefix);
 
@@ -1061,7 +1077,6 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
        int s, c, p, err, n;
        unsigned int drop;
        socklen_t len;
-       u64 value;
        u32 key;
 
        zero_verdict_count(verd_mapfd);
@@ -1086,15 +1101,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
        if (p < 0)
                goto close_cli;
 
-       key = 0;
-       value = s;
-       err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
-       if (err)
-               goto close_peer;
-
-       key = 1;
-       value = p;
-       err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+       err = add_to_sockmap(sock_mapfd, s, p);
        if (err)
                goto close_peer;
 
@@ -1346,7 +1353,6 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
        int s1, s2, c, err;
        unsigned int drop;
        socklen_t len;
-       u64 value;
        u32 key;
 
        zero_verdict_count(verd_map);
@@ -1360,16 +1366,10 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
        if (s2 < 0)
                goto close_srv1;
 
-       key = 0;
-       value = s1;
-       err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+       err = add_to_sockmap(sock_map, s1, s2);
        if (err)
                goto close_srv2;
 
-       key = 1;
-       value = s2;
-       err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
-
        /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
        len = sizeof(addr);
        err = xgetsockname(s2, sockaddr(&addr), &len);
@@ -1441,6 +1441,8 @@ static const char *family_str(sa_family_t family)
                return "IPv4";
        case AF_INET6:
                return "IPv6";
+       case AF_UNIX:
+               return "Unix";
        default:
                return "unknown";
        }
@@ -1563,6 +1565,101 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
        }
 }
 
+static void unix_redir_to_connected(int sotype, int sock_mapfd,
+                              int verd_mapfd, enum redir_mode mode)
+{
+       const char *log_prefix = redir_mode_str(mode);
+       int c0, c1, p0, p1;
+       unsigned int pass;
+       int retries = 100;
+       int err, n;
+       int sfd[2];
+       u32 key;
+       char b;
+
+       zero_verdict_count(verd_mapfd);
+
+       if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+               return;
+       c0 = sfd[0], p0 = sfd[1];
+
+       if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+               goto close0;
+       c1 = sfd[0], p1 = sfd[1];
+
+       err = add_to_sockmap(sock_mapfd, p0, p1);
+       if (err)
+               goto close;
+
+       n = write(c1, "a", 1);
+       if (n < 0)
+               FAIL_ERRNO("%s: write", log_prefix);
+       if (n == 0)
+               FAIL("%s: incomplete write", log_prefix);
+       if (n < 1)
+               goto close;
+
+       key = SK_PASS;
+       err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+       if (err)
+               goto close;
+       if (pass != 1)
+               FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+again:
+       n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+       if (n < 0) {
+               if (errno == EAGAIN && retries--) {
+                       usleep(1000);
+                       goto again;
+               }
+               FAIL_ERRNO("%s: read", log_prefix);
+       }
+       if (n == 0)
+               FAIL("%s: incomplete read", log_prefix);
+
+close:
+       xclose(c1);
+       xclose(p1);
+close0:
+       xclose(c0);
+       xclose(p0);
+}
+
+static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+                                       struct bpf_map *inner_map, int sotype)
+{
+       int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+       int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+       int sock_map = bpf_map__fd(inner_map);
+       int err;
+
+       err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+       if (err)
+               return;
+
+       skel->bss->test_ingress = false;
+       unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
+       skel->bss->test_ingress = true;
+       unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
+
+       xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+                           int sotype)
+{
+       const char *family_name, *map_name;
+       char s[MAX_TEST_NAME];
+
+       family_name = family_str(AF_UNIX);
+       map_name = map_type_str(map);
+       snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+       if (!test__start_subtest(s))
+               return;
+       unix_skb_redir_to_connected(skel, map, sotype);
+}
+
 static void test_reuseport(struct test_sockmap_listen *skel,
                           struct bpf_map *map, int family, int sotype)
 {
@@ -1603,33 +1700,27 @@ static void test_reuseport(struct test_sockmap_listen *skel,
        }
 }
 
-static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
-                                  int verd_mapfd, enum redir_mode mode)
+static int inet_socketpair(int family, int type, int *s, int *c)
 {
-       const char *log_prefix = redir_mode_str(mode);
        struct sockaddr_storage addr;
-       int c0, c1, p0, p1;
-       unsigned int pass;
-       int retries = 100;
        socklen_t len;
-       int err, n;
-       u64 value;
-       u32 key;
-       char b;
-
-       zero_verdict_count(verd_mapfd);
+       int p0, c0;
+       int err;
 
-       p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+       p0 = socket_loopback(family, type | SOCK_NONBLOCK);
        if (p0 < 0)
-               return;
+               return p0;
+
        len = sizeof(addr);
        err = xgetsockname(p0, sockaddr(&addr), &len);
        if (err)
                goto close_peer0;
 
-       c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
-       if (c0 < 0)
+       c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
+       if (c0 < 0) {
+               err = c0;
                goto close_peer0;
+       }
        err = xconnect(c0, sockaddr(&addr), len);
        if (err)
                goto close_cli0;
@@ -1640,35 +1731,133 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
        if (err)
                goto close_cli0;
 
-       p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
-       if (p1 < 0)
-               goto close_cli0;
-       err = xgetsockname(p1, sockaddr(&addr), &len);
+       *s = p0;
+       *c = c0;
+       return 0;
+
+close_cli0:
+       xclose(c0);
+close_peer0:
+       xclose(p0);
+       return err;
+}
+
+static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
+                                  enum redir_mode mode)
+{
+       const char *log_prefix = redir_mode_str(mode);
+       int c0, c1, p0, p1;
+       unsigned int pass;
+       int retries = 100;
+       int err, n;
+       u32 key;
+       char b;
+
+       zero_verdict_count(verd_mapfd);
+
+       err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+       if (err)
+               return;
+       err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
        if (err)
                goto close_cli0;
 
-       c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
-       if (c1 < 0)
-               goto close_peer1;
-       err = xconnect(c1, sockaddr(&addr), len);
+       err = add_to_sockmap(sock_mapfd, p0, p1);
        if (err)
                goto close_cli1;
-       err = xgetsockname(c1, sockaddr(&addr), &len);
-       if (err)
+
+       n = write(c1, "a", 1);
+       if (n < 0)
+               FAIL_ERRNO("%s: write", log_prefix);
+       if (n == 0)
+               FAIL("%s: incomplete write", log_prefix);
+       if (n < 1)
                goto close_cli1;
-       err = xconnect(p1, sockaddr(&addr), len);
+
+       key = SK_PASS;
+       err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
        if (err)
                goto close_cli1;
+       if (pass != 1)
+               FAIL("%s: want pass count 1, have %d", log_prefix, pass);
 
-       key = 0;
-       value = p0;
-       err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+again:
+       n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+       if (n < 0) {
+               if (errno == EAGAIN && retries--) {
+                       usleep(1000);
+                       goto again;
+               }
+               FAIL_ERRNO("%s: read", log_prefix);
+       }
+       if (n == 0)
+               FAIL("%s: incomplete read", log_prefix);
+
+close_cli1:
+       xclose(c1);
+       xclose(p1);
+close_cli0:
+       xclose(c0);
+       xclose(p0);
+}
+
+static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
+                                      struct bpf_map *inner_map, int family)
+{
+       int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+       int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+       int sock_map = bpf_map__fd(inner_map);
+       int err;
+
+       err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
        if (err)
-               goto close_cli1;
+               return;
 
-       key = 1;
-       value = p1;
-       err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+       skel->bss->test_ingress = false;
+       udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
+       skel->bss->test_ingress = true;
+       udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
+
+       xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+                          int family)
+{
+       const char *family_name, *map_name;
+       char s[MAX_TEST_NAME];
+
+       family_name = family_str(family);
+       map_name = map_type_str(map);
+       snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+       if (!test__start_subtest(s))
+               return;
+       udp_skb_redir_to_connected(skel, map, family);
+}
+
+static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
+                                       int verd_mapfd, enum redir_mode mode)
+{
+       const char *log_prefix = redir_mode_str(mode);
+       int c0, c1, p0, p1;
+       unsigned int pass;
+       int retries = 100;
+       int err, n;
+       int sfd[2];
+       u32 key;
+       char b;
+
+       zero_verdict_count(verd_mapfd);
+
+       if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+               return;
+       c0 = sfd[0], p0 = sfd[1];
+
+       err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
+       if (err)
+               goto close;
+
+       err = add_to_sockmap(sock_mapfd, p0, p1);
        if (err)
                goto close_cli1;
 
@@ -1690,8 +1879,10 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
 again:
        n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
        if (n < 0) {
-               if (errno == EAGAIN && retries--)
+               if (errno == EAGAIN && retries--) {
+                       usleep(1000);
                        goto again;
+               }
                FAIL_ERRNO("%s: read", log_prefix);
        }
        if (n == 0)
@@ -1699,16 +1890,102 @@ again:
 
 close_cli1:
        xclose(c1);
-close_peer1:
+       xclose(p1);
+close:
+       xclose(c0);
+       xclose(p0);
+}
+
+static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+                                           struct bpf_map *inner_map, int family)
+{
+       int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+       int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+       int sock_map = bpf_map__fd(inner_map);
+       int err;
+
+       err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+       if (err)
+               return;
+
+       skel->bss->test_ingress = false;
+       inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+                                   REDIR_EGRESS);
+       inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+                                   REDIR_EGRESS);
+       skel->bss->test_ingress = true;
+       inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+                                   REDIR_INGRESS);
+       inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+                                   REDIR_INGRESS);
+
+       xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
+                                       int verd_mapfd, enum redir_mode mode)
+{
+       const char *log_prefix = redir_mode_str(mode);
+       int c0, c1, p0, p1;
+       unsigned int pass;
+       int err, n;
+       int sfd[2];
+       u32 key;
+       char b;
+       int retries = 100;
+
+       zero_verdict_count(verd_mapfd);
+
+       err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+       if (err)
+               return;
+
+       if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+               goto close_cli0;
+       c1 = sfd[0], p1 = sfd[1];
+
+       err = add_to_sockmap(sock_mapfd, p0, p1);
+       if (err)
+               goto close;
+
+       n = write(c1, "a", 1);
+       if (n < 0)
+               FAIL_ERRNO("%s: write", log_prefix);
+       if (n == 0)
+               FAIL("%s: incomplete write", log_prefix);
+       if (n < 1)
+               goto close;
+
+       key = SK_PASS;
+       err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+       if (err)
+               goto close;
+       if (pass != 1)
+               FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+again:
+       n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+       if (n < 0) {
+               if (errno == EAGAIN && retries--) {
+                       usleep(1000);
+                       goto again;
+               }
+               FAIL_ERRNO("%s: read", log_prefix);
+       }
+       if (n == 0)
+               FAIL("%s: incomplete read", log_prefix);
+
+close:
+       xclose(c1);
        xclose(p1);
 close_cli0:
        xclose(c0);
-close_peer0:
        xclose(p0);
+
 }
 
-static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
-                                      struct bpf_map *inner_map, int family)
+static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
+                                           struct bpf_map *inner_map, int family)
 {
        int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
        int verdict_map = bpf_map__fd(skel->maps.verdict_map);
@@ -1720,17 +1997,21 @@ static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
                return;
 
        skel->bss->test_ingress = false;
-       udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
-                              REDIR_EGRESS);
+       unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+                                    REDIR_EGRESS);
+       unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+                                    REDIR_EGRESS);
        skel->bss->test_ingress = true;
-       udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
-                              REDIR_INGRESS);
+       unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+                                    REDIR_INGRESS);
+       unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+                                    REDIR_INGRESS);
 
        xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
 }
 
-static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
-                          int family)
+static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+                               int family)
 {
        const char *family_name, *map_name;
        char s[MAX_TEST_NAME];
@@ -1740,7 +2021,8 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map
        snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
        if (!test__start_subtest(s))
                return;
-       udp_skb_redir_to_connected(skel, map, family);
+       inet_unix_skb_redir_to_connected(skel, map, family);
+       unix_inet_skb_redir_to_connected(skel, map, family);
 }
 
 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
@@ -1752,6 +2034,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
        test_reuseport(skel, map, family, SOCK_STREAM);
        test_reuseport(skel, map, family, SOCK_DGRAM);
        test_udp_redir(skel, map, family);
+       test_udp_unix_redir(skel, map, family);
 }
 
 void test_sockmap_listen(void)
@@ -1767,10 +2050,14 @@ void test_sockmap_listen(void)
        skel->bss->test_sockmap = true;
        run_tests(skel, skel->maps.sock_map, AF_INET);
        run_tests(skel, skel->maps.sock_map, AF_INET6);
+       test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
+       test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
 
        skel->bss->test_sockmap = false;
        run_tests(skel, skel->maps.sock_hash, AF_INET);
        run_tests(skel, skel->maps.sock_hash, AF_INET6);
+       test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
+       test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
 
        test_sockmap_listen__destroy(skel);
 }
index ec281b0..86f9768 100644 (file)
@@ -195,8 +195,10 @@ static void run_test(int cgroup_fd)
 
        pthread_mutex_lock(&server_started_mtx);
        if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
-                                     (void *)&server_fd)))
+                                     (void *)&server_fd))) {
+               pthread_mutex_unlock(&server_started_mtx);
                goto close_server_fd;
+       }
        pthread_cond_wait(&server_started, &server_started_mtx);
        pthread_mutex_unlock(&server_started_mtx);
 
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
new file mode 100644 (file)
index 0000000..6b53b3c
--- /dev/null
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <netinet/tcp.h>
+#include "sockopt_qos_to_cc.skel.h"
+
+static void run_setsockopt_test(int cg_fd, int sock_fd)
+{
+       socklen_t optlen;
+       char cc[16]; /* TCP_CA_NAME_MAX */
+       int buf;
+       int err = -1;
+
+       buf = 0x2D;
+       err = setsockopt(sock_fd, SOL_IPV6, IPV6_TCLASS, &buf, sizeof(buf));
+       if (!ASSERT_OK(err, "setsockopt(sock_fd, IPV6_TCLASS)"))
+               return;
+
+       /* Verify the setsockopt cc change */
+       optlen = sizeof(cc);
+       err = getsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, cc, &optlen);
+       if (!ASSERT_OK(err, "getsockopt(sock_fd, TCP_CONGESTION)"))
+               return;
+
+       if (!ASSERT_STREQ(cc, "reno", "getsockopt(sock_fd, TCP_CONGESTION)"))
+               return;
+}
+
+void test_sockopt_qos_to_cc(void)
+{
+       struct sockopt_qos_to_cc *skel;
+       char cc_cubic[16] = "cubic"; /* TCP_CA_NAME_MAX */
+       int cg_fd = -1;
+       int sock_fd = -1;
+       int err;
+
+       cg_fd = test__join_cgroup("/sockopt_qos_to_cc");
+       if (!ASSERT_GE(cg_fd, 0, "cg-join(sockopt_qos_to_cc)"))
+               return;
+
+       skel = sockopt_qos_to_cc__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "skel"))
+               goto done;
+
+       sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+       if (!ASSERT_GE(sock_fd, 0, "v6 socket open"))
+               goto done;
+
+       err = setsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, &cc_cubic,
+                        sizeof(cc_cubic));
+       if (!ASSERT_OK(err, "setsockopt(sock_fd, TCP_CONGESTION)"))
+               goto done;
+
+       skel->links.sockopt_qos_to_cc =
+               bpf_program__attach_cgroup(skel->progs.sockopt_qos_to_cc,
+                                          cg_fd);
+       if (!ASSERT_OK_PTR(skel->links.sockopt_qos_to_cc,
+                          "prog_attach(sockopt_qos_to_cc)"))
+               goto done;
+
+       run_setsockopt_test(cg_fd, sock_fd);
+
+done:
+       if (sock_fd != -1)
+               close(sock_fd);
+       if (cg_fd != -1)
+               close(cg_fd);
+       /* destroy can take null and error pointer */
+       sockopt_qos_to_cc__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
new file mode 100644 (file)
index 0000000..53f0e0f
--- /dev/null
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <linux/ptrace.h>
+#include "test_task_pt_regs.skel.h"
+
+void test_task_pt_regs(void)
+{
+       struct test_task_pt_regs *skel;
+       struct bpf_link *uprobe_link;
+       size_t uprobe_offset;
+       ssize_t base_addr;
+       bool match;
+
+       base_addr = get_base_addr();
+       if (!ASSERT_GT(base_addr, 0, "get_base_addr"))
+               return;
+       uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+       skel = test_task_pt_regs__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "skel_open"))
+               return;
+       if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+               goto cleanup;
+
+       uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
+                                                false /* retprobe */,
+                                                0 /* self pid */,
+                                                "/proc/self/exe",
+                                                uprobe_offset);
+       if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
+               goto cleanup;
+       skel->links.handle_uprobe = uprobe_link;
+
+       /* trigger & validate uprobe */
+       get_base_addr();
+
+       if (!ASSERT_EQ(skel->bss->uprobe_res, 1, "check_uprobe_res"))
+               goto cleanup;
+
+       match = !memcmp(&skel->bss->current_regs, &skel->bss->ctx_regs,
+                       sizeof(skel->bss->current_regs));
+       ASSERT_TRUE(match, "check_regs_match");
+
+cleanup:
+       test_task_pt_regs__destroy(skel);
+}
index 5703c91..e7201ba 100644 (file)
 #define _GNU_SOURCE
 
 #include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
 #include <linux/limits.h>
 #include <linux/sysctl.h>
-#include <linux/if_tun.h>
-#include <linux/if.h>
 #include <sched.h>
 #include <stdbool.h>
 #include <stdio.h>
-#include <sys/stat.h>
 #include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
 
 #include "test_progs.h"
 #include "network_helpers.h"
@@ -391,9 +392,7 @@ done:
 
 static int test_ping(int family, const char *addr)
 {
-       const char *ping = family == AF_INET6 ? "ping6" : "ping";
-
-       SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
+       SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
        return 0;
 fail:
        return -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
new file mode 100644 (file)
index 0000000..25f40e1
--- /dev/null
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer.skel.h"
+
+static int timer(struct timer *timer_skel)
+{
+       int err, prog_fd;
+       __u32 duration = 0, retval;
+
+       err = timer__attach(timer_skel);
+       if (!ASSERT_OK(err, "timer_attach"))
+               return err;
+
+       ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
+       ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
+
+       prog_fd = bpf_program__fd(timer_skel->progs.test1);
+       err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+                               NULL, NULL, &retval, &duration);
+       ASSERT_OK(err, "test_run");
+       ASSERT_EQ(retval, 0, "test_run");
+       timer__detach(timer_skel);
+
+       usleep(50); /* 10 usecs should be enough, but give it extra */
+       /* check that timer_cb1() was executed 10+10 times */
+       ASSERT_EQ(timer_skel->data->callback_check, 42, "callback_check2");
+       ASSERT_EQ(timer_skel->data->callback2_check, 42, "callback2_check2");
+
+       /* check that timer_cb2() was executed twice */
+       ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data");
+
+       /* check that there were no errors in timer execution */
+       ASSERT_EQ(timer_skel->bss->err, 0, "err");
+
+       /* check that code paths completed */
+       ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
+
+       return 0;
+}
+
+void test_timer(void)
+{
+       struct timer *timer_skel = NULL;
+       int err;
+
+       timer_skel = timer__open_and_load();
+       if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+               goto cleanup;
+
+       err = timer(timer_skel);
+       ASSERT_OK(err, "timer");
+cleanup:
+       timer__destroy(timer_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
new file mode 100644 (file)
index 0000000..ced8f6c
--- /dev/null
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer_mim.skel.h"
+#include "timer_mim_reject.skel.h"
+
+static int timer_mim(struct timer_mim *timer_skel)
+{
+       __u32 duration = 0, retval;
+       __u64 cnt1, cnt2;
+       int err, prog_fd, key1 = 1;
+
+       err = timer_mim__attach(timer_skel);
+       if (!ASSERT_OK(err, "timer_attach"))
+               return err;
+
+       prog_fd = bpf_program__fd(timer_skel->progs.test1);
+       err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+                               NULL, NULL, &retval, &duration);
+       ASSERT_OK(err, "test_run");
+       ASSERT_EQ(retval, 0, "test_run");
+       timer_mim__detach(timer_skel);
+
+       /* check that timer_cb[12] are incrementing 'cnt' */
+       cnt1 = READ_ONCE(timer_skel->bss->cnt);
+       for (int i = 0; i < 100; i++) {
+               cnt2 = READ_ONCE(timer_skel->bss->cnt);
+               if (cnt2 != cnt1)
+                       break;
+               usleep(200); /* 100 times more than interval */
+       }
+       ASSERT_GT(cnt2, cnt1, "cnt");
+
+       ASSERT_EQ(timer_skel->bss->err, 0, "err");
+       /* check that code paths completed */
+       ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok");
+
+       close(bpf_map__fd(timer_skel->maps.inner_htab));
+       err = bpf_map_delete_elem(bpf_map__fd(timer_skel->maps.outer_arr), &key1);
+       ASSERT_EQ(err, 0, "delete inner map");
+
+       /* check that timer_cb[12] are no longer running */
+       cnt1 = READ_ONCE(timer_skel->bss->cnt);
+       for (int i = 0; i < 100; i++) {
+               usleep(200); /* 100 times more than interval */
+               cnt2 = READ_ONCE(timer_skel->bss->cnt);
+               if (cnt2 == cnt1)
+                       break;
+       }
+       ASSERT_EQ(cnt2, cnt1, "cnt");
+
+       return 0;
+}
+
+void test_timer_mim(void)
+{
+       struct timer_mim_reject *timer_reject_skel = NULL;
+       libbpf_print_fn_t old_print_fn = NULL;
+       struct timer_mim *timer_skel = NULL;
+       int err;
+
+       old_print_fn = libbpf_set_print(NULL);
+       timer_reject_skel = timer_mim_reject__open_and_load();
+       libbpf_set_print(old_print_fn);
+       if (!ASSERT_ERR_PTR(timer_reject_skel, "timer_reject_skel_load"))
+               goto cleanup;
+
+       timer_skel = timer_mim__open_and_load();
+       if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+               goto cleanup;
+
+       err = timer_mim(timer_skel);
+       ASSERT_OK(err, "timer_mim");
+cleanup:
+       timer_mim__destroy(timer_skel);
+       timer_mim_reject__destroy(timer_reject_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
new file mode 100644 (file)
index 0000000..370d220
--- /dev/null
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/**
+ * Test XDP bonding support
+ *
+ * Sets up two bonded veth pairs between two fresh namespaces
+ * and verifies that XDP_TX program loaded on a bond device
+ * are correctly loaded onto the slave devices and XDP_TX'd
+ * packets are balanced using bonding.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include <linux/if_bonding.h>
+#include <linux/limits.h>
+#include <linux/udp.h>
+
+#include "xdp_dummy.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
+#include "xdp_tx.skel.h"
+
+#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}
+#define BOND1_MAC_STR "00:11:22:33:44:55"
+#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}
+#define BOND2_MAC_STR "00:22:33:44:55:66"
+#define NPACKETS 100
+
+static int root_netns_fd = -1;
+
+static void restore_root_netns(void)
+{
+       ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns");
+}
+
+static int setns_by_name(char *name)
+{
+       int nsfd, err;
+       char nspath[PATH_MAX];
+
+       snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+       nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+       if (nsfd < 0)
+               return -1;
+
+       err = setns(nsfd, CLONE_NEWNET);
+       close(nsfd);
+       return err;
+}
+
+static int get_rx_packets(const char *iface)
+{
+       FILE *f;
+       char line[512];
+       int iface_len = strlen(iface);
+
+       f = fopen("/proc/net/dev", "r");
+       if (!f)
+               return -1;
+
+       while (fgets(line, sizeof(line), f)) {
+               char *p = line;
+
+               while (*p == ' ')
+                       p++; /* skip whitespace */
+               if (!strncmp(p, iface, iface_len)) {
+                       p += iface_len;
+                       if (*p++ != ':')
+                               continue;
+                       while (*p == ' ')
+                               p++; /* skip whitespace */
+                       while (*p && *p != ' ')
+                               p++; /* skip rx bytes */
+                       while (*p == ' ')
+                               p++; /* skip whitespace */
+                       fclose(f);
+                       return atoi(p);
+               }
+       }
+       fclose(f);
+       return -1;
+}
+
+#define MAX_BPF_LINKS 8
+
+struct skeletons {
+       struct xdp_dummy *xdp_dummy;
+       struct xdp_tx *xdp_tx;
+       struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+
+       int nlinks;
+       struct bpf_link *links[MAX_BPF_LINKS];
+};
+
+static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface)
+{
+       struct bpf_link *link;
+       int ifindex;
+
+       ifindex = if_nametoindex(iface);
+       if (!ASSERT_GT(ifindex, 0, "get ifindex"))
+               return -1;
+
+       if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached"))
+               return -1;
+
+       link = bpf_program__attach_xdp(prog, ifindex);
+       if (!ASSERT_OK_PTR(link, "attach xdp program"))
+               return -1;
+
+       skeletons->links[skeletons->nlinks++] = link;
+       return 0;
+}
+
+enum {
+       BOND_ONE_NO_ATTACH = 0,
+       BOND_BOTH_AND_ATTACH,
+};
+
+static const char * const mode_names[] = {
+       [BOND_MODE_ROUNDROBIN]   = "balance-rr",
+       [BOND_MODE_ACTIVEBACKUP] = "active-backup",
+       [BOND_MODE_XOR]          = "balance-xor",
+       [BOND_MODE_BROADCAST]    = "broadcast",
+       [BOND_MODE_8023AD]       = "802.3ad",
+       [BOND_MODE_TLB]          = "balance-tlb",
+       [BOND_MODE_ALB]          = "balance-alb",
+};
+
+static const char * const xmit_policy_names[] = {
+       [BOND_XMIT_POLICY_LAYER2]       = "layer2",
+       [BOND_XMIT_POLICY_LAYER34]      = "layer3+4",
+       [BOND_XMIT_POLICY_LAYER23]      = "layer2+3",
+       [BOND_XMIT_POLICY_ENCAP23]      = "encap2+3",
+       [BOND_XMIT_POLICY_ENCAP34]      = "encap3+4",
+};
+
+static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
+                        int bond_both_attach)
+{
+#define SYS(fmt, ...)                                          \
+       ({                                                      \
+               char cmd[1024];                                 \
+               snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+               if (!ASSERT_OK(system(cmd), cmd))               \
+                       return -1;                              \
+       })
+
+       SYS("ip netns add ns_dst");
+       SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
+       SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
+
+       SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s",
+           mode_names[mode], xmit_policy_names[xmit_policy]);
+       SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
+       SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
+           mode_names[mode], xmit_policy_names[xmit_policy]);
+       SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
+
+       SYS("ip link set veth1_1 master bond1");
+       if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+               SYS("ip link set veth1_2 master bond1");
+       } else {
+               SYS("ip link set veth1_2 up addrgenmode none");
+
+               if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
+                       return -1;
+       }
+
+       SYS("ip -netns ns_dst link set veth2_1 master bond2");
+
+       if (bond_both_attach == BOND_BOTH_AND_ATTACH)
+               SYS("ip -netns ns_dst link set veth2_2 master bond2");
+       else
+               SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none");
+
+       /* Load a dummy program on sending side as with veth peer needs to have a
+        * XDP program loaded as well.
+        */
+       if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1"))
+               return -1;
+
+       if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+               if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst"))
+                       return -1;
+
+               if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2"))
+                       return -1;
+
+               restore_root_netns();
+       }
+
+       return 0;
+
+#undef SYS
+}
+
+static void bonding_cleanup(struct skeletons *skeletons)
+{
+       restore_root_netns();
+       while (skeletons->nlinks) {
+               skeletons->nlinks--;
+               bpf_link__destroy(skeletons->links[skeletons->nlinks]);
+       }
+       ASSERT_OK(system("ip link delete bond1"), "delete bond1");
+       ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
+       ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
+       ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst");
+}
+
+static int send_udp_packets(int vary_dst_ip)
+{
+       struct ethhdr eh = {
+               .h_source = BOND1_MAC,
+               .h_dest = BOND2_MAC,
+               .h_proto = htons(ETH_P_IP),
+       };
+       uint8_t buf[128] = {};
+       struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh));
+       struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph));
+       int i, s = -1;
+       int ifindex;
+
+       s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+       if (!ASSERT_GE(s, 0, "socket"))
+               goto err;
+
+       ifindex = if_nametoindex("bond1");
+       if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
+               goto err;
+
+       memcpy(buf, &eh, sizeof(eh));
+       iph->ihl = 5;
+       iph->version = 4;
+       iph->tos = 16;
+       iph->id = 1;
+       iph->ttl = 64;
+       iph->protocol = IPPROTO_UDP;
+       iph->saddr = 1;
+       iph->daddr = 2;
+       iph->tot_len = htons(sizeof(buf) - ETH_HLEN);
+       iph->check = 0;
+
+       for (i = 1; i <= NPACKETS; i++) {
+               int n;
+               struct sockaddr_ll saddr_ll = {
+                       .sll_ifindex = ifindex,
+                       .sll_halen = ETH_ALEN,
+                       .sll_addr = BOND2_MAC,
+               };
+
+               /* vary the UDP destination port for even distribution with roundrobin/xor modes */
+               uh->dest++;
+
+               if (vary_dst_ip)
+                       iph->daddr++;
+
+               n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
+               if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
+                       goto err;
+       }
+
+       return 0;
+
+err:
+       if (s >= 0)
+               close(s);
+       return -1;
+}
+
+static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy)
+{
+       int bond1_rx;
+
+       if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH))
+               goto out;
+
+       if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34))
+               goto out;
+
+       bond1_rx = get_rx_packets("bond1");
+       ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets");
+
+       switch (mode) {
+       case BOND_MODE_ROUNDROBIN:
+       case BOND_MODE_XOR: {
+               int veth1_rx = get_rx_packets("veth1_1");
+               int veth2_rx = get_rx_packets("veth1_2");
+               int diff = abs(veth1_rx - veth2_rx);
+
+               ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets");
+
+               switch (xmit_policy) {
+               case BOND_XMIT_POLICY_LAYER2:
+                       ASSERT_GE(diff, NPACKETS,
+                                 "expected packets on only one of the interfaces");
+                       break;
+               case BOND_XMIT_POLICY_LAYER23:
+               case BOND_XMIT_POLICY_LAYER34:
+                       ASSERT_LT(diff, NPACKETS/2,
+                                 "expected even distribution of packets");
+                       break;
+               default:
+                       PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+                       break;
+               }
+               break;
+       }
+       case BOND_MODE_ACTIVEBACKUP: {
+               int veth1_rx = get_rx_packets("veth1_1");
+               int veth2_rx = get_rx_packets("veth1_2");
+               int diff = abs(veth1_rx - veth2_rx);
+
+               ASSERT_GE(diff, NPACKETS,
+                         "expected packets on only one of the interfaces");
+               break;
+       }
+       default:
+               PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+               break;
+       }
+
+out:
+       bonding_cleanup(skeletons);
+}
+
+/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding
+ * all the interfaces to it and checking that broadcasting won't send the packet
+ * to neither the ingress bond device (bond2) or its slave (veth2_1).
+ */
+static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons)
+{
+       static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"};
+       int veth1_1_rx, veth1_2_rx;
+       int err;
+
+       if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23,
+                         BOND_ONE_NO_ATTACH))
+               goto out;
+
+
+       if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst"))
+               goto out;
+
+       /* populate the devmap with the relevant interfaces */
+       for (int i = 0; i < ARRAY_SIZE(ifaces); i++) {
+               int ifindex = if_nametoindex(ifaces[i]);
+               int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all);
+
+               if (!ASSERT_GT(ifindex, 0, "could not get interface index"))
+                       goto out;
+
+               err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0);
+               if (!ASSERT_OK(err, "add interface to map_all"))
+                       goto out;
+       }
+
+       if (xdp_attach(skeletons,
+                      skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog,
+                      "bond2"))
+               goto out;
+
+       restore_root_netns();
+
+       if (send_udp_packets(BOND_MODE_ROUNDROBIN))
+               goto out;
+
+       veth1_1_rx = get_rx_packets("veth1_1");
+       veth1_2_rx = get_rx_packets("veth1_2");
+
+       ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1");
+       ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2");
+
+out:
+       restore_root_netns();
+       bonding_cleanup(skeletons);
+}
+
+/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */
+static void test_xdp_bonding_attach(struct skeletons *skeletons)
+{
+       struct bpf_link *link = NULL;
+       struct bpf_link *link2 = NULL;
+       int veth, bond;
+       int err;
+
+       if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
+               goto out;
+       if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+               goto out;
+
+       veth = if_nametoindex("veth");
+       if (!ASSERT_GE(veth, 0, "if_nametoindex veth"))
+               goto out;
+       bond = if_nametoindex("bond");
+       if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+               goto out;
+
+       /* enslaving with a XDP program loaded fails */
+       link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+       if (!ASSERT_OK_PTR(link, "attach program to veth"))
+               goto out;
+
+       err = system("ip link set veth master bond");
+       if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail"))
+               goto out;
+
+       bpf_link__destroy(link);
+       link = NULL;
+
+       err = system("ip link set veth master bond");
+       if (!ASSERT_OK(err, "set veth master"))
+               goto out;
+
+       /* attaching to slave when master has no program is allowed */
+       link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+       if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
+               goto out;
+
+       /* attaching to master not allowed when slave has program loaded */
+       link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+       if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program"))
+               goto out;
+
+       bpf_link__destroy(link);
+       link = NULL;
+
+       /* attaching XDP program to master allowed when slave has no program */
+       link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+       if (!ASSERT_OK_PTR(link, "attach program to master"))
+               goto out;
+
+       /* attaching to slave not allowed when master has program loaded */
+       link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+       ASSERT_ERR_PTR(link2, "attach program to slave when master has program");
+
+out:
+       bpf_link__destroy(link);
+       bpf_link__destroy(link2);
+
+       system("ip link del veth");
+       system("ip link del bond");
+}
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+                             const char *format, va_list args)
+{
+       if (level != LIBBPF_WARN)
+               vprintf(format, args);
+       return 0;
+}
+
+struct bond_test_case {
+       char *name;
+       int mode;
+       int xmit_policy;
+};
+
+static struct bond_test_case bond_test_cases[] = {
+       { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, },
+       { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 },
+
+       { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, },
+       { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, },
+       { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
+};
+
+void test_xdp_bonding(void)
+{
+       libbpf_print_fn_t old_print_fn;
+       struct skeletons skeletons = {};
+       int i;
+
+       old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+       root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+       if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+               goto out;
+
+       skeletons.xdp_dummy = xdp_dummy__open_and_load();
+       if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+               goto out;
+
+       skeletons.xdp_tx = xdp_tx__open_and_load();
+       if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+               goto out;
+
+       skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+       if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern,
+                          "xdp_redirect_multi_kern__open_and_load"))
+               goto out;
+
+       if (test__start_subtest("xdp_bonding_attach"))
+               test_xdp_bonding_attach(&skeletons);
+
+       for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
+               struct bond_test_case *test_case = &bond_test_cases[i];
+
+               if (test__start_subtest(test_case->name))
+                       test_xdp_bonding_with_mode(
+                               &skeletons,
+                               test_case->mode,
+                               test_case->xmit_policy);
+       }
+
+       if (test__start_subtest("xdp_bonding_redirect_multi"))
+               test_xdp_bonding_redirect_multi(&skeletons);
+
+out:
+       xdp_dummy__destroy(skeletons.xdp_dummy);
+       xdp_tx__destroy(skeletons.xdp_tx);
+       xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern);
+
+       libbpf_set_print(old_print_fn);
+       if (root_netns_fd >= 0)
+               close(root_netns_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
new file mode 100644 (file)
index 0000000..ab4952b
--- /dev/null
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_context_test_run.skel.h"
+
+void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
+                           __u32 data_meta, __u32 data, __u32 data_end,
+                           __u32 ingress_ifindex, __u32 rx_queue_index,
+                           __u32 egress_ifindex)
+{
+       struct xdp_md ctx = {
+               .data = data,
+               .data_end = data_end,
+               .data_meta = data_meta,
+               .ingress_ifindex = ingress_ifindex,
+               .rx_queue_index = rx_queue_index,
+               .egress_ifindex = egress_ifindex,
+       };
+       int err;
+
+       opts.ctx_in = &ctx;
+       opts.ctx_size_in = sizeof(ctx);
+       err = bpf_prog_test_run_opts(prog_fd, &opts);
+       ASSERT_EQ(errno, EINVAL, "errno-EINVAL");
+       ASSERT_ERR(err, "bpf_prog_test_run");
+}
+
+void test_xdp_context_test_run(void)
+{
+       struct test_xdp_context_test_run *skel = NULL;
+       char data[sizeof(pkt_v4) + sizeof(__u32)];
+       char bad_ctx[sizeof(struct xdp_md) + 1];
+       struct xdp_md ctx_in, ctx_out;
+       DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+                           .data_in = &data,
+                           .data_size_in = sizeof(data),
+                           .ctx_out = &ctx_out,
+                           .ctx_size_out = sizeof(ctx_out),
+                           .repeat = 1,
+               );
+       int err, prog_fd;
+
+       skel = test_xdp_context_test_run__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "skel"))
+               return;
+       prog_fd = bpf_program__fd(skel->progs.xdp_context);
+
+       /* Data past the end of the kernel's struct xdp_md must be 0 */
+       bad_ctx[sizeof(bad_ctx) - 1] = 1;
+       opts.ctx_in = bad_ctx;
+       opts.ctx_size_in = sizeof(bad_ctx);
+       err = bpf_prog_test_run_opts(prog_fd, &opts);
+       ASSERT_EQ(errno, E2BIG, "extradata-errno");
+       ASSERT_ERR(err, "bpf_prog_test_run(extradata)");
+
+       *(__u32 *)data = XDP_PASS;
+       *(struct ipv4_packet *)(data + sizeof(__u32)) = pkt_v4;
+       opts.ctx_in = &ctx_in;
+       opts.ctx_size_in = sizeof(ctx_in);
+       memset(&ctx_in, 0, sizeof(ctx_in));
+       ctx_in.data_meta = 0;
+       ctx_in.data = sizeof(__u32);
+       ctx_in.data_end = ctx_in.data + sizeof(pkt_v4);
+       err = bpf_prog_test_run_opts(prog_fd, &opts);
+       ASSERT_OK(err, "bpf_prog_test_run(valid)");
+       ASSERT_EQ(opts.retval, XDP_PASS, "valid-retval");
+       ASSERT_EQ(opts.data_size_out, sizeof(pkt_v4), "valid-datasize");
+       ASSERT_EQ(opts.ctx_size_out, opts.ctx_size_in, "valid-ctxsize");
+       ASSERT_EQ(ctx_out.data_meta, 0, "valid-datameta");
+       ASSERT_EQ(ctx_out.data, 0, "valid-data");
+       ASSERT_EQ(ctx_out.data_end, sizeof(pkt_v4), "valid-dataend");
+
+       /* Meta data's size must be a multiple of 4 */
+       test_xdp_context_error(prog_fd, opts, 0, 1, sizeof(data), 0, 0, 0);
+
+       /* data_meta must reference the start of data */
+       test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data),
+                              0, 0, 0);
+
+       /* Meta data must be 32 bytes or smaller */
+       test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0);
+
+       /* Total size of data must match data_end - data_meta */
+       test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
+                              sizeof(data) - 1, 0, 0, 0);
+       test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
+                              sizeof(data) + 1, 0, 0, 0);
+
+       /* RX queue cannot be specified without specifying an ingress */
+       test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+                              0, 1, 0);
+
+       /* Interface 1 is always the loopback interface which always has only
+        * one RX queue (index 0). This makes index 1 an invalid rx queue index
+        * for interface 1.
+        */
+       test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+                              1, 1, 0);
+
+       /* The egress cannot be specified */
+       test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+                              0, 0, 1);
+
+       test_xdp_context_test_run__destroy(skel);
+}
index 0176573..8755eff 100644 (file)
@@ -7,64 +7,53 @@
 
 #define IFINDEX_LO     1
 
-void test_xdp_with_cpumap_helpers(void)
+void test_xdp_cpumap_attach(void)
 {
        struct test_xdp_with_cpumap_helpers *skel;
        struct bpf_prog_info info = {};
+       __u32 len = sizeof(info);
        struct bpf_cpumap_val val = {
                .qsize = 192,
        };
-       __u32 duration = 0, idx = 0;
-       __u32 len = sizeof(info);
        int err, prog_fd, map_fd;
+       __u32 idx = 0;
 
        skel = test_xdp_with_cpumap_helpers__open_and_load();
-       if (CHECK_FAIL(!skel)) {
-               perror("test_xdp_with_cpumap_helpers__open_and_load");
+       if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
                return;
-       }
 
-       /* can not attach program with cpumaps that allow programs
-        * as xdp generic
-        */
        prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
        err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
-       CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
-             "should have failed\n");
+       if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
+               goto out_close;
+
+       err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+       ASSERT_OK(err, "XDP program detach");
 
        prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
        map_fd = bpf_map__fd(skel->maps.cpu_map);
        err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
-       if (CHECK_FAIL(err))
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
                goto out_close;
 
        val.bpf_prog.fd = prog_fd;
        err = bpf_map_update_elem(map_fd, &idx, &val, 0);
-       CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
-             err, errno);
+       ASSERT_OK(err, "Add program to cpumap entry");
 
        err = bpf_map_lookup_elem(map_fd, &idx, &val);
-       CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
-       CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
-             "expected %u read %u\n", info.id, val.bpf_prog.id);
+       ASSERT_OK(err, "Read cpumap entry");
+       ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
 
        /* can not attach BPF_XDP_CPUMAP program to a device */
        err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
-       CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
-             "should have failed\n");
+       if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
+               bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
 
        val.qsize = 192;
        val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
        err = bpf_map_update_elem(map_fd, &idx, &val, 0);
-       CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
-             "should have failed\n");
+       ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
 
 out_close:
        test_xdp_with_cpumap_helpers__destroy(skel);
 }
-
-void test_xdp_cpumap_attach(void)
-{
-       if (test__start_subtest("cpumap_with_progs"))
-               test_xdp_with_cpumap_helpers();
-}
index 88ef3ec..c72af03 100644 (file)
@@ -16,50 +16,45 @@ void test_xdp_with_devmap_helpers(void)
                .ifindex = IFINDEX_LO,
        };
        __u32 len = sizeof(info);
-       __u32 duration = 0, idx = 0;
        int err, dm_fd, map_fd;
+       __u32 idx = 0;
 
 
        skel = test_xdp_with_devmap_helpers__open_and_load();
-       if (CHECK_FAIL(!skel)) {
-               perror("test_xdp_with_devmap_helpers__open_and_load");
+       if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
                return;
-       }
 
-       /* can not attach program with DEVMAPs that allow programs
-        * as xdp generic
-        */
        dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
        err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
-       CHECK(err == 0, "Generic attach of program with 8-byte devmap",
-             "should have failed\n");
+       if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
+               goto out_close;
+
+       err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+       ASSERT_OK(err, "XDP program detach");
 
        dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
        map_fd = bpf_map__fd(skel->maps.dm_ports);
        err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
-       if (CHECK_FAIL(err))
+       if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
                goto out_close;
 
        val.bpf_prog.fd = dm_fd;
        err = bpf_map_update_elem(map_fd, &idx, &val, 0);
-       CHECK(err, "Add program to devmap entry",
-             "err %d errno %d\n", err, errno);
+       ASSERT_OK(err, "Add program to devmap entry");
 
        err = bpf_map_lookup_elem(map_fd, &idx, &val);
-       CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
-       CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
-             "expected %u read %u\n", info.id, val.bpf_prog.id);
+       ASSERT_OK(err, "Read devmap entry");
+       ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
 
        /* can not attach BPF_XDP_DEVMAP program to a device */
        err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
-       CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
-             "should have failed\n");
+       if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
+               bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
 
        val.ifindex = 1;
        val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
        err = bpf_map_update_elem(map_fd, &idx, &val, 0);
-       CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
-             "should have failed\n");
+       ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
 
 out_close:
        test_xdp_with_devmap_helpers__destroy(skel);
@@ -68,12 +63,10 @@ out_close:
 void test_neg_xdp_devmap_helpers(void)
 {
        struct test_xdp_devmap_helpers *skel;
-       __u32 duration = 0;
 
        skel = test_xdp_devmap_helpers__open_and_load();
-       if (CHECK(skel,
-                 "Load of XDP program accessing egress ifindex without attach type",
-                 "should have failed\n")) {
+       if (!ASSERT_EQ(skel, NULL,
+                   "Load of XDP program accessing egress ifindex without attach type")) {
                test_xdp_devmap_helpers__destroy(skel);
        }
 }
index fd42247..9573be6 100644 (file)
 
 char _license[] SEC("license") = "GPL";
 
+volatile const char fallback[TCP_CA_NAME_MAX];
+const char bpf_dctcp[] = "bpf_dctcp";
+const char tcp_cdg[] = "cdg";
+char cc_res[TCP_CA_NAME_MAX];
+int tcp_cdg_res = 0;
 int stg_result = 0;
 
 struct {
@@ -57,6 +62,26 @@ void BPF_PROG(dctcp_init, struct sock *sk)
        struct dctcp *ca = inet_csk_ca(sk);
        int *stg;
 
+       if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
+               /* Switch to fallback */
+               bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+                              (void *)fallback, sizeof(fallback));
+               /* Switch back to myself which the bpf trampoline
+                * stopped calling dctcp_init recursively.
+                */
+               bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+                              (void *)bpf_dctcp, sizeof(bpf_dctcp));
+               /* Switch back to fallback */
+               bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+                              (void *)fallback, sizeof(fallback));
+               /* Expecting -ENOTSUPP for tcp_cdg_res */
+               tcp_cdg_res = bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+                                            (void *)tcp_cdg, sizeof(tcp_cdg));
+               bpf_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
+                              (void *)cc_res, sizeof(cc_res));
+               return;
+       }
+
        ca->prior_rcv_nxt = tp->rcv_nxt;
        ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
        ca->loss_cwnd = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
new file mode 100644 (file)
index 0000000..d836f7c
--- /dev/null
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+const char cubic[] = "cubic";
+
+void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk)
+{
+       bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+                      (void *)cubic, sizeof(cubic));
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops dctcp_rel = {
+       .release        = (void *)dctcp_nouse_release,
+       .name           = "bpf_dctcp_rel",
+};
index 3d83b18..8cfaeba 100644 (file)
@@ -12,6 +12,7 @@
 #define tcp6_sock tcp6_sock___not_used
 #define bpf_iter__udp bpf_iter__udp___not_used
 #define udp6_sock udp6_sock___not_used
+#define bpf_iter__unix bpf_iter__unix___not_used
 #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
 #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
 #define bpf_iter__sockmap bpf_iter__sockmap___not_used
@@ -32,6 +33,7 @@
 #undef tcp6_sock
 #undef bpf_iter__udp
 #undef udp6_sock
+#undef bpf_iter__unix
 #undef bpf_iter__bpf_map_elem
 #undef bpf_iter__bpf_sk_storage_map
 #undef bpf_iter__sockmap
@@ -103,6 +105,12 @@ struct udp6_sock {
        struct ipv6_pinfo inet6;
 } __attribute__((preserve_access_index));
 
+struct bpf_iter__unix {
+       struct bpf_iter_meta *meta;
+       struct unix_sock *unix_sk;
+       uid_t uid;
+} __attribute__((preserve_access_index));
+
 struct bpf_iter__bpf_map_elem {
        struct bpf_iter_meta *meta;
        struct bpf_map *map;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
new file mode 100644 (file)
index 0000000..b77adfd
--- /dev/null
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define bpf_tcp_sk(skc)        ({                              \
+       struct sock_common *_skc = skc;                 \
+       sk = NULL;                                      \
+       tp = NULL;                                      \
+       if (_skc) {                                     \
+               tp = bpf_skc_to_tcp_sock(_skc);         \
+               sk = (struct sock *)tp;                 \
+       }                                               \
+       tp;                                             \
+})
+
+unsigned short reuse_listen_hport = 0;
+unsigned short listen_hport = 0;
+char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
+char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
+bool random_retry = false;
+
+static bool tcp_cc_eq(const char *a, const char *b)
+{
+       int i;
+
+       for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+               if (a[i] != b[i])
+                       return false;
+               if (!a[i])
+                       break;
+       }
+
+       return true;
+}
+
+SEC("iter/tcp")
+int change_tcp_cc(struct bpf_iter__tcp *ctx)
+{
+       char cur_cc[TCP_CA_NAME_MAX];
+       struct tcp_sock *tp;
+       struct sock *sk;
+       int ret;
+
+       if (!bpf_tcp_sk(ctx->sk_common))
+               return 0;
+
+       if (sk->sk_family != AF_INET6 ||
+           (sk->sk_state != TCP_LISTEN &&
+            sk->sk_state != TCP_ESTABLISHED) ||
+           (sk->sk_num != reuse_listen_hport &&
+            sk->sk_num != listen_hport &&
+            bpf_ntohs(sk->sk_dport) != listen_hport))
+               return 0;
+
+       if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION,
+                          cur_cc, sizeof(cur_cc)))
+               return 0;
+
+       if (!tcp_cc_eq(cur_cc, cubic_cc))
+               return 0;
+
+       if (random_retry && bpf_get_prandom_u32() % 4 == 1)
+               return 1;
+
+       bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc));
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index 2e4775c..92267ab 100644 (file)
@@ -121,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
        }
 
        BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
-                      seq_num, src, srcp, destp, destp);
+                      seq_num, src, srcp, dest, destp);
        BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
                       state,
                       tp->write_seq - tp->snd_una, rx_queue,
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
new file mode 100644 (file)
index 0000000..9442390
--- /dev/null
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+       const struct socket *sk_socket = sk->sk_socket;
+       const struct inode *inode;
+       unsigned long ino;
+
+       if (!sk_socket)
+               return 0;
+
+       inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+       bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+       return ino;
+}
+
+SEC("iter/unix")
+int dump_unix(struct bpf_iter__unix *ctx)
+{
+       struct unix_sock *unix_sk = ctx->unix_sk;
+       struct sock *sk = (struct sock *)unix_sk;
+       struct seq_file *seq;
+       __u32 seq_num;
+
+       if (!unix_sk)
+               return 0;
+
+       seq = ctx->meta->seq;
+       seq_num = ctx->meta->seq_num;
+       if (seq_num == 0)
+               BPF_SEQ_PRINTF(seq, "Num               RefCount Protocol Flags    Type St    Inode Path\n");
+
+       BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %8lu",
+                      unix_sk,
+                      sk->sk_refcnt.refs.counter,
+                      0,
+                      sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+                      sk->sk_type,
+                      sk->sk_socket ?
+                      (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
+                      (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
+                      sock_i_ino(sk));
+
+       if (unix_sk->addr) {
+               if (!UNIX_ABSTRACT(unix_sk)) {
+                       BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
+               } else {
+                       /* The name of the abstract UNIX domain socket starts
+                        * with '\0' and can contain '\0'.  The null bytes
+                        * should be escaped as done in unix_seq_show().
+                        */
+                       __u64 i, len;
+
+                       len = unix_sk->addr->len - sizeof(short);
+
+                       BPF_SEQ_PRINTF(seq, " @");
+
+                       for (i = 1; i < len; i++) {
+                               /* unix_mkname() tests this upper bound. */
+                               if (i >= sizeof(struct sockaddr_un))
+                                       break;
+
+                               BPF_SEQ_PRINTF(seq, "%c",
+                                              unix_sk->addr->name->sun_path[i] ?:
+                                              '@');
+                       }
+               }
+       }
+
+       BPF_SEQ_PRINTF(seq, "\n");
+
+       return 0;
+}
index 0137891..eef5646 100644 (file)
@@ -5,6 +5,14 @@
 #define AF_INET                        2
 #define AF_INET6               10
 
+#define __SO_ACCEPTCON         (1 << 16)
+#define UNIX_HASH_SIZE         256
+#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
+
+#define SOL_TCP                        6
+#define TCP_CONGESTION         13
+#define TCP_CA_NAME_MAX                16
+
 #define ICSK_TIME_RETRANS      1
 #define ICSK_TIME_PROBE0       3
 #define ICSK_TIME_LOSS_PROBE   5
@@ -32,6 +40,8 @@
 #define ir_v6_rmt_addr         req.__req_common.skc_v6_daddr
 #define ir_v6_loc_addr         req.__req_common.skc_v6_rcv_saddr
 
+#define sk_num                 __sk_common.skc_num
+#define sk_dport               __sk_common.skc_dport
 #define sk_family              __sk_common.skc_family
 #define sk_rmem_alloc          sk_backlog.rmem_alloc
 #define sk_refcnt              __sk_common.skc_refcnt
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
new file mode 100644 (file)
index 0000000..a587aec
--- /dev/null
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern const void bpf_fentry_test1 __ksym;
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+extern const void bpf_modify_return_test __ksym;
+extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+       __u64 addr = bpf_get_func_ip(ctx);
+
+       test1_result = (const void *) addr == &bpf_fentry_test1;
+       return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_fentry_test2")
+int BPF_PROG(test2, int a)
+{
+       __u64 addr = bpf_get_func_ip(ctx);
+
+       test2_result = (const void *) addr == &bpf_fentry_test2;
+       return 0;
+}
+
+__u64 test3_result = 0;
+SEC("kprobe/bpf_fentry_test3")
+int test3(struct pt_regs *ctx)
+{
+       __u64 addr = bpf_get_func_ip(ctx);
+
+       test3_result = (const void *) addr == &bpf_fentry_test3;
+       return 0;
+}
+
+__u64 test4_result = 0;
+SEC("kretprobe/bpf_fentry_test4")
+int BPF_KRETPROBE(test4)
+{
+       __u64 addr = bpf_get_func_ip(ctx);
+
+       test4_result = (const void *) addr == &bpf_fentry_test4;
+       return 0;
+}
+
+__u64 test5_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(test5, int a, int *b, int ret)
+{
+       __u64 addr = bpf_get_func_ip(ctx);
+
+       test5_result = (const void *) addr == &bpf_modify_return_test;
+       return ret;
+}
+
+__u64 test6_result = 0;
+SEC("kprobe/bpf_fentry_test6+0x5")
+int test6(struct pt_regs *ctx)
+{
+       __u64 addr = bpf_get_func_ip(ctx);
+
+       test6_result = (const void *) addr == &bpf_fentry_test6 + 5;
+       return 0;
+}
+
+__u64 test7_result = 0;
+SEC("kprobe/bpf_fentry_test7+5")
+int test7(struct pt_regs *ctx)
+{
+       __u64 addr = bpf_get_func_ip(ctx);
+
+       test7_result = (const void *) addr == &bpf_fentry_test7 + 5;
+       return 0;
+}
index b2dcb7d..5fbd9e2 100644 (file)
@@ -9,7 +9,7 @@ extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
                                  __u32 c, __u64 d) __ksym;
 extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
 int active_res = -1;
-int sk_state = -1;
+int sk_state_res = -1;
 
 int __noinline f1(struct __sk_buff *skb)
 {
@@ -28,7 +28,7 @@ int __noinline f1(struct __sk_buff *skb)
        if (active)
                active_res = *active;
 
-       sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
+       sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state;
 
        return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
 }
index d071adf..43649bc 100644 (file)
 struct {
        __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
        __type(key, struct bpf_cgroup_storage_key);
-       __type(value, struct percpu_net_cnt);
+       __type(value, union percpu_net_cnt);
 } percpu_netcnt SEC(".maps");
 
 struct {
        __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
        __type(key, struct bpf_cgroup_storage_key);
-       __type(value, struct net_cnt);
+       __type(value, union net_cnt);
 } netcnt SEC(".maps");
 
 SEC("cgroup/skb")
 int bpf_nextcnt(struct __sk_buff *skb)
 {
-       struct percpu_net_cnt *percpu_cnt;
+       union percpu_net_cnt *percpu_cnt;
        char fmt[] = "%d %llu %llu\n";
-       struct net_cnt *cnt;
+       union net_cnt *cnt;
        __u64 ts, dt;
        int ret;
 
diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
new file mode 100644 (file)
index 0000000..aeff3a4
--- /dev/null
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+#define AF_INET6 10
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __type(key, int);
+       __type(value, int);
+} sockops_netns_cookies SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __type(key, int);
+       __type(value, int);
+} sk_msg_netns_cookies SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SOCKMAP);
+       __uint(max_entries, 2);
+       __type(key, __u32);
+       __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sockops")
+int get_netns_cookie_sockops(struct bpf_sock_ops *ctx)
+{
+       struct bpf_sock *sk = ctx->sk;
+       int *cookie;
+       __u32 key = 0;
+
+       if (ctx->family != AF_INET6)
+               return 1;
+
+       if (!sk)
+               return 1;
+
+       switch (ctx->op) {
+       case BPF_SOCK_OPS_TCP_CONNECT_CB:
+               cookie = bpf_sk_storage_get(&sockops_netns_cookies, sk, 0,
+                                           BPF_SK_STORAGE_GET_F_CREATE);
+               if (!cookie)
+                       return 1;
+
+               *cookie = bpf_get_netns_cookie(ctx);
+               break;
+       case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+               bpf_sock_map_update(ctx, &sock_map, &key, BPF_NOEXIST);
+               break;
+       default:
+               break;
+       }
+
+       return 1;
+}
+
+SEC("sk_msg")
+int get_netns_cookie_sk_msg(struct sk_msg_md *msg)
+{
+       struct bpf_sock *sk = msg->sk;
+       int *cookie;
+
+       if (msg->family != AF_INET6)
+               return 1;
+
+       if (!sk)
+               return 1;
+
+       cookie = bpf_sk_storage_get(&sk_msg_netns_cookies, sk, 0,
+                                   BPF_SK_STORAGE_GET_F_CREATE);
+       if (!cookie)
+               return 1;
+
+       *cookie = bpf_get_netns_cookie(msg);
+
+       return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
new file mode 100644 (file)
index 0000000..1bce83b
--- /dev/null
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <string.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("cgroup/setsockopt")
+int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
+{
+       void *optval_end = ctx->optval_end;
+       int *optval = ctx->optval;
+       char buf[TCP_CA_NAME_MAX];
+       char cc_reno[TCP_CA_NAME_MAX] = "reno";
+       char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
+
+       if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
+               return 1;
+
+       if (optval + 1 > optval_end)
+               return 0; /* EPERM, bounds check */
+
+       if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
+               return 0;
+
+       if (!tcp_cc_eq(buf, cc_cubic))
+               return 0;
+
+       if (*optval == 0x2d) {
+               if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &cc_reno,
+                               sizeof(cc_reno)))
+                       return 0;
+       }
+       return 1;
+}
index 8acdb99..79c8139 100644 (file)
@@ -33,6 +33,14 @@ int _getsockopt(struct bpf_sockopt *ctx)
        __u8 *optval = ctx->optval;
        struct sockopt_sk *storage;
 
+       /* Make sure bpf_get_netns_cookie is callable.
+        */
+       if (bpf_get_netns_cookie(NULL) == 0)
+               return 0;
+
+       if (bpf_get_netns_cookie(ctx) == 0)
+               return 0;
+
        if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
                /* Not interested in SOL_IP:IP_TOS;
                 * let next BPF program in the cgroup chain or kernel
@@ -123,6 +131,14 @@ int _setsockopt(struct bpf_sockopt *ctx)
        __u8 *optval = ctx->optval;
        struct sockopt_sk *storage;
 
+       /* Make sure bpf_get_netns_cookie is callable.
+        */
+       if (bpf_get_netns_cookie(NULL) == 0)
+               return 0;
+
+       if (bpf_get_netns_cookie(ctx) == 0)
+               return 0;
+
        if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
                /* Not interested in SOL_IP:IP_TOS;
                 * let next BPF program in the cgroup chain or kernel
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
new file mode 100644 (file)
index 0000000..2d3a771
--- /dev/null
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int my_tid;
+
+int kprobe_res;
+int kprobe_multi_res;
+int kretprobe_res;
+int uprobe_res;
+int uretprobe_res;
+int tp_res;
+int pe_res;
+
+static void update(void *ctx, int *res)
+{
+       if (my_tid != (u32)bpf_get_current_pid_tgid())
+               return;
+
+       *res |= bpf_get_attach_cookie(ctx);
+}
+
+SEC("kprobe/sys_nanosleep")
+int handle_kprobe(struct pt_regs *ctx)
+{
+       update(ctx, &kprobe_res);
+       return 0;
+}
+
+SEC("kretprobe/sys_nanosleep")
+int handle_kretprobe(struct pt_regs *ctx)
+{
+       update(ctx, &kretprobe_res);
+       return 0;
+}
+
+SEC("uprobe/trigger_func")
+int handle_uprobe(struct pt_regs *ctx)
+{
+       update(ctx, &uprobe_res);
+       return 0;
+}
+
+SEC("uretprobe/trigger_func")
+int handle_uretprobe(struct pt_regs *ctx)
+{
+       update(ctx, &uretprobe_res);
+       return 0;
+}
+
+/* bpf_prog_array, used by kernel internally to keep track of attached BPF
+ * programs to a given BPF hook (e.g., for tracepoints) doesn't allow the same
+ * BPF program to be attached multiple times. So have three identical copies
+ * ready to attach to the same tracepoint.
+ */
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp1(struct pt_regs *ctx)
+{
+       update(ctx, &tp_res);
+       return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp2(struct pt_regs *ctx)
+{
+       update(ctx, &tp_res);
+       return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp3(void *ctx)
+{
+       update(ctx, &tp_res);
+       return 1;
+}
+
+SEC("perf_event")
+int handle_pe(struct pt_regs *ctx)
+{
+       update(ctx, &pe_res);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index 44f5aa2..9a7829c 100644 (file)
@@ -125,6 +125,16 @@ int handle_downsize(void *ctx)
        return 0;
 }
 
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_core_read_int bpf_core_read
+#else
+#define bpf_core_read_int(dst, sz, src) ({ \
+       /* Prevent "subtraction from stack pointer prohibited" */ \
+       volatile long __off = sizeof(*dst) - (sz); \
+       bpf_core_read((char *)(dst) + __off, sz, src); \
+})
+#endif
+
 SEC("raw_tp/sys_enter")
 int handle_probed(void *ctx)
 {
@@ -132,23 +142,23 @@ int handle_probed(void *ctx)
        __u64 tmp;
 
        tmp = 0;
-       bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
+       bpf_core_read_int(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
        ptr_probed = tmp;
 
        tmp = 0;
-       bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1);
+       bpf_core_read_int(&tmp, bpf_core_field_size(in->val1), &in->val1);
        val1_probed = tmp;
 
        tmp = 0;
-       bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2);
+       bpf_core_read_int(&tmp, bpf_core_field_size(in->val2), &in->val2);
        val2_probed = tmp;
 
        tmp = 0;
-       bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3);
+       bpf_core_read_int(&tmp, bpf_core_field_size(in->val3), &in->val3);
        val3_probed = tmp;
 
        tmp = 0;
-       bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4);
+       bpf_core_read_int(&tmp, bpf_core_field_size(in->val4), &in->val4);
        val4_probed = tmp;
 
        return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
new file mode 100644 (file)
index 0000000..5f8379a
--- /dev/null
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test weak ksyms.
+ *
+ * Copyright (c) 2021 Google
+ */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+int out__existing_typed = -1;
+__u64 out__existing_typeless = -1;
+
+__u64 out__non_existent_typeless = -1;
+__u64 out__non_existent_typed = -1;
+
+/* existing weak symbols */
+
+/* test existing weak symbols can be resolved. */
+extern const struct rq runqueues __ksym __weak; /* typed */
+extern const void bpf_prog_active __ksym __weak; /* typeless */
+
+
+/* non-existent weak symbols. */
+
+/* typeless symbols, default to zero. */
+extern const void bpf_link_fops1 __ksym __weak;
+
+/* typed symbols, default to zero. */
+extern const int bpf_link_fops2 __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int pass_handler(const void *ctx)
+{
+       struct rq *rq;
+
+       /* tests existing symbols. */
+       rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
+       if (rq)
+               out__existing_typed = rq->cpu;
+       out__existing_typeless = (__u64)&bpf_prog_active;
+
+       /* tests non-existent symbols. */
+       out__non_existent_typeless = (__u64)&bpf_link_fops1;
+
+       /* tests non-existent symbols. */
+       out__non_existent_typed = (__u64)&bpf_link_fops2;
+
+       if (&bpf_link_fops2) /* can't happen */
+               out__non_existent_typed = (__u64)bpf_per_cpu_ptr(&bpf_link_fops2, 0);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
new file mode 100644 (file)
index 0000000..703c08e
--- /dev/null
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Isovalent, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, int);
+       __uint(max_entries, 4);
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+       __uint(max_entries, 0); /* This will make map creation to fail */
+       __uint(key_size, sizeof(__u32));
+       __array(values, struct inner);
+} mim SEC(".maps");
+
+SEC("xdp")
+int xdp_noop0(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_link.c b/tools/testing/selftests/bpf/progs/test_perf_link.c
new file mode 100644 (file)
index 0000000..c1db9fd
--- /dev/null
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int run_cnt = 0;
+
+SEC("perf_event")
+int handler(struct pt_regs *ctx)
+{
+       __sync_fetch_and_add(&run_cnt, 1);
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index e83d0b4..8249075 100644 (file)
@@ -91,7 +91,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("classifier/fail_use_after_free")
+SEC("classifier/err_use_after_free")
 int bpf_sk_lookup_uaf(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -106,7 +106,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
        return family;
 }
 
-SEC("classifier/fail_modify_sk_pointer")
+SEC("classifier/err_modify_sk_pointer")
 int bpf_sk_lookup_modptr(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -121,7 +121,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("classifier/fail_modify_sk_or_null_pointer")
+SEC("classifier/err_modify_sk_or_null_pointer")
 int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -135,7 +135,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("classifier/fail_no_release")
+SEC("classifier/err_no_release")
 int bpf_sk_lookup_test2(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -144,7 +144,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("classifier/fail_release_twice")
+SEC("classifier/err_release_twice")
 int bpf_sk_lookup_test3(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -156,7 +156,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
        return 0;
 }
 
-SEC("classifier/fail_release_unchecked")
+SEC("classifier/err_release_unchecked")
 int bpf_sk_lookup_test4(struct __sk_buff *skb)
 {
        struct bpf_sock_tuple tuple = {};
@@ -173,7 +173,7 @@ void lookup_no_release(struct __sk_buff *skb)
        bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 }
 
-SEC("classifier/fail_no_release_subcall")
+SEC("classifier/err_no_release_subcall")
 int bpf_sk_lookup_test5(struct __sk_buff *skb)
 {
        lookup_no_release(skb);
index e2ad261..8fda075 100644 (file)
@@ -59,9 +59,9 @@ int handler(const void *ctx)
        /* Kernel pointers */
        addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
                                0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
-       /* Strings embedding */
-       str_ret  = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s",
-                               str1, longstr);
+       /* Strings and single-byte character embedding */
+       str_ret  = BPF_SNPRINTF(str_out, sizeof(str_out), "%s % 9c %+2c %-3c %04c %0c %+05s",
+                               str1, 'a', 'b', 'c', 'd', 'e', longstr);
        /* Overflow */
        over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
        /* Padding of fixed width numbers */
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
new file mode 100644 (file)
index 0000000..6c059f1
--- /dev/null
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct pt_regs current_regs = {};
+struct pt_regs ctx_regs = {};
+int uprobe_res = 0;
+
+SEC("uprobe/trigger_func")
+int handle_uprobe(struct pt_regs *ctx)
+{
+       struct task_struct *current;
+       struct pt_regs *regs;
+
+       current = bpf_get_current_task_btf();
+       regs = (struct pt_regs *) bpf_task_pt_regs(current);
+       __builtin_memcpy(&current_regs, regs, sizeof(*regs));
+       __builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx));
+
+       /* Prove that uprobe was run */
+       uprobe_res = 1;
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index 84cd632..a0e7762 100644 (file)
@@ -528,7 +528,6 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
 
 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 {
-       char buf[sizeof(struct v6hdr)];
        struct gre_hdr greh;
        struct udphdr udph;
        int olen = len;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c
new file mode 100644 (file)
index 0000000..d7b88cd
--- /dev/null
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_context(struct xdp_md *xdp)
+{
+       void *data = (void *)(long)xdp->data;
+       __u32 *metadata = (void *)(long)xdp->data_meta;
+       __u32 ret;
+
+       if (metadata + 1 > data)
+               return XDP_ABORTED;
+       ret = *metadata;
+       if (bpf_xdp_adjust_meta(xdp, 4))
+               return XDP_ABORTED;
+       return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
new file mode 100644 (file)
index 0000000..5f53097
--- /dev/null
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+       int counter;
+       struct bpf_timer timer;
+       struct bpf_spin_lock lock; /* unused */
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, 1000);
+       __type(key, int);
+       __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __uint(max_entries, 1000);
+       __type(key, int);
+       __type(value, struct hmap_elem);
+} hmap_malloc SEC(".maps");
+
+struct elem {
+       struct bpf_timer t;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 2);
+       __type(key, int);
+       __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_LRU_HASH);
+       __uint(max_entries, 4);
+       __type(key, int);
+       __type(value, struct elem);
+} lru SEC(".maps");
+
+__u64 bss_data;
+__u64 err;
+__u64 ok;
+__u64 callback_check = 52;
+__u64 callback2_check = 52;
+
+#define ARRAY 1
+#define HTAB 2
+#define HTAB_MALLOC 3
+#define LRU 4
+
+/* callback for array and lru timers */
+static int timer_cb1(void *map, int *key, struct bpf_timer *timer)
+{
+       /* increment bss variable twice.
+        * Once via array timer callback and once via lru timer callback
+        */
+       bss_data += 5;
+
+       /* *key == 0 - the callback was called for array timer.
+        * *key == 4 - the callback was called from lru timer.
+        */
+       if (*key == ARRAY) {
+               struct bpf_timer *lru_timer;
+               int lru_key = LRU;
+
+               /* rearm array timer to be called again in ~35 seconds */
+               if (bpf_timer_start(timer, 1ull << 35, 0) != 0)
+                       err |= 1;
+
+               lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+               if (!lru_timer)
+                       return 0;
+               bpf_timer_set_callback(lru_timer, timer_cb1);
+               if (bpf_timer_start(lru_timer, 0, 0) != 0)
+                       err |= 2;
+       } else if (*key == LRU) {
+               int lru_key, i;
+
+               for (i = LRU + 1;
+                    i <= 100  /* for current LRU eviction algorithm this number
+                               * should be larger than ~ lru->max_entries * 2
+                               */;
+                    i++) {
+                       struct elem init = {};
+
+                       /* lru_key cannot be used as loop induction variable
+                        * otherwise the loop will be unbounded.
+                        */
+                       lru_key = i;
+
+                       /* add more elements into lru map to push out current
+                        * element and force deletion of this timer
+                        */
+                       bpf_map_update_elem(map, &lru_key, &init, 0);
+                       /* look it up to bump it into active list */
+                       bpf_map_lookup_elem(map, &lru_key);
+
+                       /* keep adding until *key changes underneath,
+                        * which means that key/timer memory was reused
+                        */
+                       if (*key != LRU)
+                               break;
+               }
+
+               /* check that the timer was removed */
+               if (bpf_timer_cancel(timer) != -EINVAL)
+                       err |= 4;
+               ok |= 1;
+       }
+       return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+       struct bpf_timer *arr_timer, *lru_timer;
+       struct elem init = {};
+       int lru_key = LRU;
+       int array_key = ARRAY;
+
+       arr_timer = bpf_map_lookup_elem(&array, &array_key);
+       if (!arr_timer)
+               return 0;
+       bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+
+       bpf_map_update_elem(&lru, &lru_key, &init, 0);
+       lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+       if (!lru_timer)
+               return 0;
+       bpf_timer_init(lru_timer, &lru, CLOCK_MONOTONIC);
+
+       bpf_timer_set_callback(arr_timer, timer_cb1);
+       bpf_timer_start(arr_timer, 0 /* call timer_cb1 asap */, 0);
+
+       /* init more timers to check that array destruction
+        * doesn't leak timer memory.
+        */
+       array_key = 0;
+       arr_timer = bpf_map_lookup_elem(&array, &array_key);
+       if (!arr_timer)
+               return 0;
+       bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+       return 0;
+}
+
+/* callback for prealloc and non-prealloca hashtab timers */
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+       if (*key == HTAB)
+               callback_check--;
+       else
+               callback2_check--;
+       if (val->counter > 0 && --val->counter) {
+               /* re-arm the timer again to execute after 1 usec */
+               bpf_timer_start(&val->timer, 1000, 0);
+       } else if (*key == HTAB) {
+               struct bpf_timer *arr_timer;
+               int array_key = ARRAY;
+
+               /* cancel arr_timer otherwise bpf_fentry_test1 prog
+                * will stay alive forever.
+                */
+               arr_timer = bpf_map_lookup_elem(&array, &array_key);
+               if (!arr_timer)
+                       return 0;
+               if (bpf_timer_cancel(arr_timer) != 1)
+                       /* bpf_timer_cancel should return 1 to indicate
+                        * that arr_timer was active at this time
+                        */
+                       err |= 8;
+
+               /* try to cancel ourself. It shouldn't deadlock. */
+               if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+                       err |= 16;
+
+               /* delete this key and this timer anyway.
+                * It shouldn't deadlock either.
+                */
+               bpf_map_delete_elem(map, key);
+
+               /* in preallocated hashmap both 'key' and 'val' could have been
+                * reused to store another map element (like in LRU above),
+                * but in controlled test environment the below test works.
+                * It's not a use-after-free. The memory is owned by the map.
+                */
+               if (bpf_timer_start(&val->timer, 1000, 0) != -EINVAL)
+                       err |= 32;
+               ok |= 2;
+       } else {
+               if (*key != HTAB_MALLOC)
+                       err |= 64;
+
+               /* try to cancel ourself. It shouldn't deadlock. */
+               if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+                       err |= 128;
+
+               /* delete this key and this timer anyway.
+                * It shouldn't deadlock either.
+                */
+               bpf_map_delete_elem(map, key);
+
+               /* in non-preallocated hashmap both 'key' and 'val' are RCU
+                * protected and still valid though this element was deleted
+                * from the map. Arm this timer for ~35 seconds. When callback
+                * finishes the call_rcu will invoke:
+                * htab_elem_free_rcu
+                *   check_and_free_timer
+                *     bpf_timer_cancel_and_free
+                * to cancel this 35 second sleep and delete the timer for real.
+                */
+               if (bpf_timer_start(&val->timer, 1ull << 35, 0) != 0)
+                       err |= 256;
+               ok |= 4;
+       }
+       return 0;
+}
+
+int bpf_timer_test(void)
+{
+       struct hmap_elem *val;
+       int key = HTAB, key_malloc = HTAB_MALLOC;
+
+       val = bpf_map_lookup_elem(&hmap, &key);
+       if (val) {
+               if (bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME) != 0)
+                       err |= 512;
+               bpf_timer_set_callback(&val->timer, timer_cb2);
+               bpf_timer_start(&val->timer, 1000, 0);
+       }
+       val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+       if (val) {
+               if (bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME) != 0)
+                       err |= 1024;
+               bpf_timer_set_callback(&val->timer, timer_cb2);
+               bpf_timer_start(&val->timer, 1000, 0);
+       }
+       return 0;
+}
+
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG(test2, int a, int b)
+{
+       struct hmap_elem init = {}, *val;
+       int key = HTAB, key_malloc = HTAB_MALLOC;
+
+       init.counter = 10; /* number of times to trigger timer_cb2 */
+       bpf_map_update_elem(&hmap, &key, &init, 0);
+       val = bpf_map_lookup_elem(&hmap, &key);
+       if (val)
+               bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+       /* update the same key to free the timer */
+       bpf_map_update_elem(&hmap, &key, &init, 0);
+
+       bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+       val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+       if (val)
+               bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+       /* update the same key to free the timer */
+       bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+
+       /* init more timers to check that htab operations
+        * don't leak timer memory.
+        */
+       key = 0;
+       bpf_map_update_elem(&hmap, &key, &init, 0);
+       val = bpf_map_lookup_elem(&hmap, &key);
+       if (val)
+               bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+       bpf_map_delete_elem(&hmap, &key);
+       bpf_map_update_elem(&hmap, &key, &init, 0);
+       val = bpf_map_lookup_elem(&hmap, &key);
+       if (val)
+               bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+
+       /* and with non-prealloc htab */
+       key_malloc = 0;
+       bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+       val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+       if (val)
+               bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+       bpf_map_delete_elem(&hmap_malloc, &key_malloc);
+       bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+       val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+       if (val)
+               bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+
+       return bpf_timer_test();
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim.c b/tools/testing/selftests/bpf/progs/timer_mim.c
new file mode 100644 (file)
index 0000000..2fee7ab
--- /dev/null
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+       int pad; /* unused */
+       struct bpf_timer timer;
+};
+
+struct inner_map {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, 1024);
+       __type(key, int);
+       __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define HASH_KEY 1234
+
+struct outer_arr {
+       __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+       __uint(max_entries, 2);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+       __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+       .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+static int timer_cb1(void *map, int *key, struct hmap_elem *val);
+
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+       cnt++;
+       bpf_timer_set_callback(&val->timer, timer_cb1);
+       if (bpf_timer_start(&val->timer, 1000, 0))
+               err |= 1;
+       ok |= 1;
+       return 0;
+}
+
+/* callback for inner hash map */
+static int timer_cb1(void *map, int *key, struct hmap_elem *val)
+{
+       cnt++;
+       bpf_timer_set_callback(&val->timer, timer_cb2);
+       if (bpf_timer_start(&val->timer, 1000, 0))
+               err |= 2;
+       /* Do a lookup to make sure 'map' and 'key' pointers are correct */
+       bpf_map_lookup_elem(map, key);
+       ok |= 2;
+       return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+       struct hmap_elem init = {};
+       struct bpf_map *inner_map;
+       struct hmap_elem *val;
+       int array_key = ARRAY_KEY;
+       int hash_key = HASH_KEY;
+
+       inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+       if (!inner_map)
+               return 0;
+
+       bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+       val = bpf_map_lookup_elem(inner_map, &hash_key);
+       if (!val)
+               return 0;
+
+       bpf_timer_init(&val->timer, inner_map, CLOCK_MONOTONIC);
+       if (bpf_timer_set_callback(&val->timer, timer_cb1))
+               err |= 4;
+       if (bpf_timer_start(&val->timer, 0, 0))
+               err |= 8;
+       return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim_reject.c b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
new file mode 100644 (file)
index 0000000..5d648e3
--- /dev/null
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+       int pad; /* unused */
+       struct bpf_timer timer;
+};
+
+struct inner_map {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, 1024);
+       __type(key, int);
+       __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define ARRAY_KEY2 2
+#define HASH_KEY 1234
+
+struct outer_arr {
+       __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+       __uint(max_entries, 2);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+       __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+       .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+/* callback for inner hash map */
+static int timer_cb(void *map, int *key, struct hmap_elem *val)
+{
+       return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+       struct hmap_elem init = {};
+       struct bpf_map *inner_map, *inner_map2;
+       struct hmap_elem *val;
+       int array_key = ARRAY_KEY;
+       int array_key2 = ARRAY_KEY2;
+       int hash_key = HASH_KEY;
+
+       inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+       if (!inner_map)
+               return 0;
+
+       inner_map2 = bpf_map_lookup_elem(&outer_arr, &array_key2);
+       if (!inner_map2)
+               return 0;
+       bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+       val = bpf_map_lookup_elem(inner_map, &hash_key);
+       if (!val)
+               return 0;
+
+       bpf_timer_init(&val->timer, inner_map2, CLOCK_MONOTONIC);
+       if (bpf_timer_set_callback(&val->timer, timer_cb))
+               err |= 4;
+       if (bpf_timer_start(&val->timer, 0, 0))
+               err |= 8;
+       return 0;
+}
index 94e6c2b..5f725c7 100644 (file)
@@ -3,7 +3,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-SEC("tx")
+SEC("xdp")
 int xdp_tx(struct xdp_md *xdp)
 {
        return XDP_TX;
index 6669077..718f596 100755 (executable)
@@ -2,4 +2,10 @@
 # SPDX-License-Identifier: GPL-2.0
 # Copyright (c) 2020 SUSE LLC.
 
+# 'make -C tools/testing/selftests/bpf install' will install to SCRIPT_DIR
+SCRIPT_DIR=$(dirname $(realpath $0))
+
+# 'make -C tools/testing/selftests/bpf' will install to BPFTOOL_INSTALL_PATH
+BPFTOOL_INSTALL_PATH="$SCRIPT_DIR"/tools/sbin
+export PATH=$SCRIPT_DIR:$BPFTOOL_INSTALL_PATH:$PATH
 python3 -m unittest -v test_bpftool.TestBpftool
index ac349a5..b03a875 100755 (executable)
@@ -22,7 +22,7 @@ KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
 cd $KDIR_ROOT_DIR
 if [ ! -e tools/bpf/bpftool/Makefile ]; then
        echo -e "skip:    bpftool files not found!\n"
-       exit 0
+       exit 4 # KSFT_SKIP=4
 fi
 
 ERROR=0
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
new file mode 100755 (executable)
index 0000000..be54b73
--- /dev/null
@@ -0,0 +1,586 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+#
+# Copyright (C) 2021 Isovalent, Inc.
+
+import argparse
+import re
+import os, sys
+
+LINUX_ROOT = os.path.abspath(os.path.join(__file__,
+    os.pardir, os.pardir, os.pardir, os.pardir, os.pardir))
+BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool')
+retval = 0
+
+class BlockParser(object):
+    """
+    A parser for extracting set of values from blocks such as enums.
+    @reader: a pointer to the open file to parse
+    """
+    def __init__(self, reader):
+        self.reader = reader
+
+    def search_block(self, start_marker):
+        """
+        Search for a given structure in a file.
+        @start_marker: regex marking the beginning of a structure to parse
+        """
+        offset = self.reader.tell()
+        array_start = re.search(start_marker, self.reader.read())
+        if array_start is None:
+            raise Exception('Failed to find start of block')
+        self.reader.seek(offset + array_start.start())
+
+    def parse(self, pattern, end_marker):
+        """
+        Parse a block and return a set of values. Values to extract must be
+        on separate lines in the file.
+        @pattern: pattern used to identify the values to extract
+        @end_marker: regex marking the end of the block to parse
+        """
+        entries = set()
+        while True:
+            line = self.reader.readline()
+            if not line or re.match(end_marker, line):
+                break
+            capture = pattern.search(line)
+            if capture and pattern.groups >= 1:
+                entries.add(capture.group(1))
+        return entries
+
+class ArrayParser(BlockParser):
+    """
+    A parser for extracting dicionaries of values from some BPF-related arrays.
+    @reader: a pointer to the open file to parse
+    @array_name: name of the array to parse
+    """
+    end_marker = re.compile('^};')
+
+    def __init__(self, reader, array_name):
+        self.array_name = array_name
+        self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n')
+        super().__init__(reader)
+
+    def search_block(self):
+        """
+        Search for the given array in a file.
+        """
+        super().search_block(self.start_marker);
+
+    def parse(self):
+        """
+        Parse a block and return data as a dictionary. Items to extract must be
+        on separate lines in the file.
+        """
+        pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$')
+        entries = {}
+        while True:
+            line = self.reader.readline()
+            if line == '' or re.match(self.end_marker, line):
+                break
+            capture = pattern.search(line)
+            if capture:
+                entries[capture.group(1)] = capture.group(2)
+        return entries
+
+class InlineListParser(BlockParser):
+    """
+    A parser for extracting set of values from inline lists.
+    """
+    def parse(self, pattern, end_marker):
+        """
+        Parse a block and return a set of values. Multiple values to extract
+        can be on a same line in the file.
+        @pattern: pattern used to identify the values to extract
+        @end_marker: regex marking the end of the block to parse
+        """
+        entries = set()
+        while True:
+            line = self.reader.readline()
+            if not line:
+                break
+            entries.update(pattern.findall(line))
+            if re.search(end_marker, line):
+                break
+        return entries
+
+class FileExtractor(object):
+    """
+    A generic reader for extracting data from a given file. This class contains
+    several helper methods that wrap arround parser objects to extract values
+    from different structures.
+    This class does not offer a way to set a filename, which is expected to be
+    defined in children classes.
+    """
+    def __init__(self):
+        self.reader = open(self.filename, 'r')
+
+    def close(self):
+        """
+        Close the file used by the parser.
+        """
+        self.reader.close()
+
+    def reset_read(self):
+        """
+        Reset the file position indicator for this parser. This is useful when
+        parsing several structures in the file without respecting the order in
+        which those structures appear in the file.
+        """
+        self.reader.seek(0)
+
+    def get_types_from_array(self, array_name):
+        """
+        Search for and parse an array associating names to BPF_* enum members,
+        for example:
+
+            const char * const prog_type_name[] = {
+                    [BPF_PROG_TYPE_UNSPEC]                  = "unspec",
+                    [BPF_PROG_TYPE_SOCKET_FILTER]           = "socket_filter",
+                    [BPF_PROG_TYPE_KPROBE]                  = "kprobe",
+            };
+
+        Return a dictionary with the enum member names as keys and the
+        associated names as values, for example:
+
+            {'BPF_PROG_TYPE_UNSPEC': 'unspec',
+             'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter',
+             'BPF_PROG_TYPE_KPROBE': 'kprobe'}
+
+        @array_name: name of the array to parse
+        """
+        array_parser = ArrayParser(self.reader, array_name)
+        array_parser.search_block()
+        return array_parser.parse()
+
+    def get_enum(self, enum_name):
+        """
+        Search for and parse an enum containing BPF_* members, for example:
+
+            enum bpf_prog_type {
+                    BPF_PROG_TYPE_UNSPEC,
+                    BPF_PROG_TYPE_SOCKET_FILTER,
+                    BPF_PROG_TYPE_KPROBE,
+            };
+
+        Return a set containing all member names, for example:
+
+            {'BPF_PROG_TYPE_UNSPEC',
+             'BPF_PROG_TYPE_SOCKET_FILTER',
+             'BPF_PROG_TYPE_KPROBE'}
+
+        @enum_name: name of the enum to parse
+        """
+        start_marker = re.compile(f'enum {enum_name} {{\n')
+        pattern = re.compile('^\s*(BPF_\w+),?$')
+        end_marker = re.compile('^};')
+        parser = BlockParser(self.reader)
+        parser.search_block(start_marker)
+        return parser.parse(pattern, end_marker)
+
+    def __get_description_list(self, start_marker, pattern, end_marker):
+        parser = InlineListParser(self.reader)
+        parser.search_block(start_marker)
+        return parser.parse(pattern, end_marker)
+
+    def get_rst_list(self, block_name):
+        """
+        Search for and parse a list of type names from RST documentation, for
+        example:
+
+             |       *TYPE* := {
+             |               **socket** | **kprobe** |
+             |               **kretprobe**
+             |       }
+
+        Return a set containing all type names, for example:
+
+            {'socket', 'kprobe', 'kretprobe'}
+
+        @block_name: name of the blog to parse, 'TYPE' in the example
+        """
+        start_marker = re.compile(f'\*{block_name}\* := {{')
+        pattern = re.compile('\*\*([\w/-]+)\*\*')
+        end_marker = re.compile('}\n')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+    def get_help_list(self, block_name):
+        """
+        Search for and parse a list of type names from a help message in
+        bpftool, for example:
+
+            "       TYPE := { socket | kprobe |\\n"
+            "               kretprobe }\\n"
+
+        Return a set containing all type names, for example:
+
+            {'socket', 'kprobe', 'kretprobe'}
+
+        @block_name: name of the blog to parse, 'TYPE' in the example
+        """
+        start_marker = re.compile(f'"\s*{block_name} := {{')
+        pattern = re.compile('([\w/]+) [|}]')
+        end_marker = re.compile('}')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+    def get_help_list_macro(self, macro):
+        """
+        Search for and parse a list of values from a help message starting with
+        a macro in bpftool, for example:
+
+            "       " HELP_SPEC_OPTIONS " |\\n"
+            "                    {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n"
+
+        Return a set containing all item names, for example:
+
+            {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'}
+
+        @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example
+        """
+        start_marker = re.compile(f'"\s*{macro}\s*" [|}}]')
+        pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])')
+        end_marker = re.compile('}\\\\n')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+    def default_options(self):
+        """
+        Return the default options contained in HELP_SPEC_OPTIONS
+        """
+        return { '-j', '--json', '-p', '--pretty', '-d', '--debug' }
+
+    def get_bashcomp_list(self, block_name):
+        """
+        Search for and parse a list of type names from a variable in bash
+        completion file, for example:
+
+            local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\
+                kretprobe'
+
+        Return a set containing all type names, for example:
+
+            {'socket', 'kprobe', 'kretprobe'}
+
+        @block_name: name of the blog to parse, 'TYPE' in the example
+        """
+        start_marker = re.compile(f'local {block_name}=\'')
+        pattern = re.compile('(?:.*=\')?([\w/]+)')
+        end_marker = re.compile('\'$')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+class SourceFileExtractor(FileExtractor):
+    """
+    An abstract extractor for a source file with usage message.
+    This class does not offer a way to set a filename, which is expected to be
+    defined in children classes.
+    """
+    def get_options(self):
+        return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS'))
+
+class ProgFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's prog.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'prog.c')
+
+    def get_prog_types(self):
+        return self.get_types_from_array('prog_type_name')
+
+    def get_attach_types(self):
+        return self.get_types_from_array('attach_type_strings')
+
+    def get_prog_attach_help(self):
+        return self.get_help_list('ATTACH_TYPE')
+
+class MapFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's map.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'map.c')
+
+    def get_map_types(self):
+        return self.get_types_from_array('map_type_name')
+
+    def get_map_help(self):
+        return self.get_help_list('TYPE')
+
+class CgroupFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's cgroup.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'cgroup.c')
+
+    def get_prog_attach_help(self):
+        return self.get_help_list('ATTACH_TYPE')
+
+class CommonFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's common.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'common.c')
+
+    def __init__(self):
+        super().__init__()
+        self.attach_types = {}
+
+    def get_attach_types(self):
+        if not self.attach_types:
+            self.attach_types = self.get_types_from_array('attach_type_name')
+        return self.attach_types
+
+    def get_cgroup_attach_types(self):
+        if not self.attach_types:
+            self.get_attach_types()
+        cgroup_types = {}
+        for (key, value) in self.attach_types.items():
+            if key.find('BPF_CGROUP') != -1:
+                cgroup_types[key] = value
+        return cgroup_types
+
+class GenericSourceExtractor(SourceFileExtractor):
+    """
+    An extractor for generic source code files.
+    """
+    filename = ""
+
+    def __init__(self, filename):
+        self.filename = os.path.join(BPFTOOL_DIR, filename)
+        super().__init__()
+
+class BpfHeaderExtractor(FileExtractor):
+    """
+    An extractor for the UAPI BPF header.
+    """
+    filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h')
+
+    def get_prog_types(self):
+        return self.get_enum('bpf_prog_type')
+
+    def get_map_types(self):
+        return self.get_enum('bpf_map_type')
+
+    def get_attach_types(self):
+        return self.get_enum('bpf_attach_type')
+
+class ManPageExtractor(FileExtractor):
+    """
+    An abstract extractor for an RST documentation page.
+    This class does not offer a way to set a filename, which is expected to be
+    defined in children classes.
+    """
+    def get_options(self):
+        return self.get_rst_list('OPTIONS')
+
+class ManProgExtractor(ManPageExtractor):
+    """
+    An extractor for bpftool-prog.rst.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst')
+
+    def get_attach_types(self):
+        return self.get_rst_list('ATTACH_TYPE')
+
+class ManMapExtractor(ManPageExtractor):
+    """
+    An extractor for bpftool-map.rst.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst')
+
+    def get_map_types(self):
+        return self.get_rst_list('TYPE')
+
+class ManCgroupExtractor(ManPageExtractor):
+    """
+    An extractor for bpftool-cgroup.rst.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst')
+
+    def get_attach_types(self):
+        return self.get_rst_list('ATTACH_TYPE')
+
+class ManGenericExtractor(ManPageExtractor):
+    """
+    An extractor for generic RST documentation pages.
+    """
+    filename = ""
+
+    def __init__(self, filename):
+        self.filename = os.path.join(BPFTOOL_DIR, filename)
+        super().__init__()
+
+class BashcompExtractor(FileExtractor):
+    """
+    An extractor for bpftool's bash completion file.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool')
+
+    def get_prog_attach_types(self):
+        return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
+
+    def get_map_types(self):
+        return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES')
+
+    def get_cgroup_attach_types(self):
+        return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES')
+
+def verify(first_set, second_set, message):
+    """
+    Print all values that differ between two sets.
+    @first_set: one set to compare
+    @second_set: another set to compare
+    @message: message to print for values belonging to only one of the sets
+    """
+    global retval
+    diff = first_set.symmetric_difference(second_set)
+    if diff:
+        print(message, diff)
+        retval = 1
+
+def main():
+    # No arguments supported at this time, but print usage for -h|--help
+    argParser = argparse.ArgumentParser(description="""
+    Verify that bpftool's code, help messages, documentation and bash
+    completion are all in sync on program types, map types, attach types, and
+    options. Also check that bpftool is in sync with the UAPI BPF header.
+    """)
+    args = argParser.parse_args()
+
+    # Map types (enum)
+
+    bpf_info = BpfHeaderExtractor()
+    ref = bpf_info.get_map_types()
+
+    map_info = MapFileExtractor()
+    source_map_items = map_info.get_map_types()
+    map_types_enum = set(source_map_items.keys())
+
+    verify(ref, map_types_enum,
+            f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):')
+
+    # Map types (names)
+
+    source_map_types = set(source_map_items.values())
+    source_map_types.discard('unspec')
+
+    help_map_types = map_info.get_map_help()
+    help_map_options = map_info.get_options()
+    map_info.close()
+
+    man_map_info = ManMapExtractor()
+    man_map_options = man_map_info.get_options()
+    man_map_types = man_map_info.get_map_types()
+    man_map_info.close()
+
+    bashcomp_info = BashcompExtractor()
+    bashcomp_map_types = bashcomp_info.get_map_types()
+
+    verify(source_map_types, help_map_types,
+            f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):')
+    verify(source_map_types, man_map_types,
+            f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):')
+    verify(help_map_options, man_map_options,
+            f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
+    verify(source_map_types, bashcomp_map_types,
+            f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
+
+    # Program types (enum)
+
+    ref = bpf_info.get_prog_types()
+
+    prog_info = ProgFileExtractor()
+    prog_types = set(prog_info.get_prog_types().keys())
+
+    verify(ref, prog_types,
+            f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):')
+
+    # Attach types (enum)
+
+    ref = bpf_info.get_attach_types()
+    bpf_info.close()
+
+    common_info = CommonFileExtractor()
+    attach_types = common_info.get_attach_types()
+
+    verify(ref, attach_types,
+            f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):')
+
+    # Attach types (names)
+
+    source_prog_attach_types = set(prog_info.get_attach_types().values())
+
+    help_prog_attach_types = prog_info.get_prog_attach_help()
+    help_prog_options = prog_info.get_options()
+    prog_info.close()
+
+    man_prog_info = ManProgExtractor()
+    man_prog_options = man_prog_info.get_options()
+    man_prog_attach_types = man_prog_info.get_attach_types()
+    man_prog_info.close()
+
+    bashcomp_info.reset_read() # We stopped at map types, rewind
+    bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+
+    verify(source_prog_attach_types, help_prog_attach_types,
+            f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
+    verify(source_prog_attach_types, man_prog_attach_types,
+            f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):')
+    verify(help_prog_options, man_prog_options,
+            f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):')
+    verify(source_prog_attach_types, bashcomp_prog_attach_types,
+            f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
+
+    # Cgroup attach types
+
+    source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values())
+    common_info.close()
+
+    cgroup_info = CgroupFileExtractor()
+    help_cgroup_attach_types = cgroup_info.get_prog_attach_help()
+    help_cgroup_options = cgroup_info.get_options()
+    cgroup_info.close()
+
+    man_cgroup_info = ManCgroupExtractor()
+    man_cgroup_options = man_cgroup_info.get_options()
+    man_cgroup_attach_types = man_cgroup_info.get_attach_types()
+    man_cgroup_info.close()
+
+    bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types()
+    bashcomp_info.close()
+
+    verify(source_cgroup_attach_types, help_cgroup_attach_types,
+            f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
+    verify(source_cgroup_attach_types, man_cgroup_attach_types,
+            f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
+    verify(help_cgroup_options, man_cgroup_options,
+            f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
+    verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types,
+            f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
+
+    # Options for remaining commands
+
+    for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]:
+        source_info = GenericSourceExtractor(cmd + '.c')
+        help_cmd_options = source_info.get_options()
+        source_info.close()
+
+        man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst'))
+        man_cmd_options = man_cmd_info.get_options()
+        man_cmd_info.close()
+
+        verify(help_cmd_options, man_cmd_options,
+                f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):')
+
+    source_main_info = GenericSourceExtractor('main.c')
+    help_main_options = source_main_info.get_options()
+    source_main_info.close()
+
+    man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst'))
+    man_main_options = man_main_info.get_options()
+    man_main_info.close()
+
+    verify(help_main_options, man_main_options,
+            f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):')
+
+    sys.exit(retval)
+
+if __name__ == "__main__":
+    main()
index ed12111..679cf96 100755 (executable)
@@ -4,11 +4,17 @@ set -e
 
 # Assume script is located under tools/testing/selftests/bpf/. We want to start
 # build attempts from the top of kernel repository.
-SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_PATH=$(realpath $0)
 SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
-KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+KDIR_ROOT_DIR=$(realpath $SCRIPT_REL_DIR/../../../../)
+SCRIPT_REL_DIR=$(dirname $(realpath --relative-to=$KDIR_ROOT_DIR $SCRIPT_REL_PATH))
 cd $KDIR_ROOT_DIR
 
+if [ ! -e $PWD/$SCRIPT_REL_DIR/Makefile ]; then
+       echo -e "skip:    bpftool files not found!\n"
+       exit 4 # KSFT_SKIP=4
+fi
+
 for tgt in docs docs-clean; do
        make -s -C $PWD/$SCRIPT_REL_DIR $tgt;
 done
index 30cbf5d..c7a36a9 100644 (file)
@@ -764,8 +764,8 @@ static void test_sockmap(unsigned int tasks, void *data)
        udp = socket(AF_INET, SOCK_DGRAM, 0);
        i = 0;
        err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
-       if (!err) {
-               printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+       if (err) {
+               printf("Failed socket update SOCK_DGRAM '%i:%i'\n",
                       i, udp);
                goto out_sockmap;
        }
@@ -985,7 +985,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 
                FD_ZERO(&w);
                FD_SET(sfd[3], &w);
-               to.tv_sec = 1;
+               to.tv_sec = 30;
                to.tv_usec = 0;
                s = select(sfd[3] + 1, &w, NULL, NULL, &to);
                if (s == -1) {
@@ -1153,12 +1153,17 @@ out_sockmap:
 }
 
 #define MAPINMAP_PROG "./test_map_in_map.o"
+#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.o"
 static void test_map_in_map(void)
 {
        struct bpf_object *obj;
        struct bpf_map *map;
        int mim_fd, fd, err;
        int pos = 0;
+       struct bpf_map_info info = {};
+       __u32 len = sizeof(info);
+       __u32 id = 0;
+       libbpf_print_fn_t old_print_fn;
 
        obj = bpf_object__open(MAPINMAP_PROG);
 
@@ -1228,11 +1233,72 @@ static void test_map_in_map(void)
        }
 
        close(fd);
+       fd = -1;
        bpf_object__close(obj);
+
+       /* Test that failing bpf_object__create_map() destroys the inner map */
+       obj = bpf_object__open(MAPINMAP_INVALID_PROG);
+       err = libbpf_get_error(obj);
+       if (err) {
+               printf("Failed to load %s program: %d %d",
+                      MAPINMAP_INVALID_PROG, err, errno);
+               goto out_map_in_map;
+       }
+
+       map = bpf_object__find_map_by_name(obj, "mim");
+       if (!map) {
+               printf("Failed to load array of maps from test prog\n");
+               goto out_map_in_map;
+       }
+
+       old_print_fn = libbpf_set_print(NULL);
+
+       err = bpf_object__load(obj);
+       if (!err) {
+               printf("Loading obj supposed to fail\n");
+               goto out_map_in_map;
+       }
+
+       libbpf_set_print(old_print_fn);
+
+       /* Iterate over all maps to check whether the internal map
+        * ("mim.internal") has been destroyed.
+        */
+       while (true) {
+               err = bpf_map_get_next_id(id, &id);
+               if (err) {
+                       if (errno == ENOENT)
+                               break;
+                       printf("Failed to get next map: %d", errno);
+                       goto out_map_in_map;
+               }
+
+               fd = bpf_map_get_fd_by_id(id);
+               if (fd < 0) {
+                       if (errno == ENOENT)
+                               continue;
+                       printf("Failed to get map by id %u: %d", id, errno);
+                       goto out_map_in_map;
+               }
+
+               err = bpf_obj_get_info_by_fd(fd, &info, &len);
+               if (err) {
+                       printf("Failed to get map info by fd %d: %d", fd,
+                              errno);
+                       goto out_map_in_map;
+               }
+
+               if (!strcmp(info.name, "mim.inner")) {
+                       printf("Inner map mim.inner was not destroyed\n");
+                       goto out_map_in_map;
+               }
+       }
+
        return;
 
 out_map_in_map:
-       close(fd);
+       if (fd >= 0)
+               close(fd);
        exit(1);
 }
 
@@ -1330,15 +1396,22 @@ static void test_map_stress(void)
 #define DO_DELETE 0
 
 #define MAP_RETRIES 20
+#define MAX_DELAY_US 50000
+#define MIN_DELAY_RANGE_US 5000
 
 static int map_update_retriable(int map_fd, const void *key, const void *value,
                                int flags, int attempts)
 {
+       int delay = rand() % MIN_DELAY_RANGE_US;
+
        while (bpf_map_update_elem(map_fd, key, value, flags)) {
                if (!attempts || (errno != EAGAIN && errno != EBUSY))
                        return -errno;
 
-               usleep(1);
+               if (delay <= MAX_DELAY_US / 2)
+                       delay *= 2;
+
+               usleep(delay);
                attempts--;
        }
 
@@ -1347,11 +1420,16 @@ static int map_update_retriable(int map_fd, const void *key, const void *value,
 
 static int map_delete_retriable(int map_fd, const void *key, int attempts)
 {
+       int delay = rand() % MIN_DELAY_RANGE_US;
+
        while (bpf_map_delete_elem(map_fd, key)) {
                if (!attempts || (errno != EAGAIN && errno != EBUSY))
                        return -errno;
 
-               usleep(1);
+               if (delay <= MAX_DELAY_US / 2)
+                       delay *= 2;
+
+               usleep(delay);
                attempts--;
        }
 
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
deleted file mode 100644 (file)
index a7b9a69..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-#include "netcnt_common.h"
-
-#define BPF_PROG "./netcnt_prog.o"
-#define TEST_CGROUP "/test-network-counters/"
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
-                       const char *name)
-{
-       struct bpf_map *map;
-
-       map = bpf_object__find_map_by_name(obj, name);
-       if (!map) {
-               printf("%s:FAIL:map '%s' not found\n", test, name);
-               return -1;
-       }
-       return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
-       struct percpu_net_cnt *percpu_netcnt;
-       struct bpf_cgroup_storage_key key;
-       int map_fd, percpu_map_fd;
-       int error = EXIT_FAILURE;
-       struct net_cnt netcnt;
-       struct bpf_object *obj;
-       int prog_fd, cgroup_fd;
-       unsigned long packets;
-       unsigned long bytes;
-       int cpu, nproc;
-       __u32 prog_cnt;
-
-       nproc = get_nprocs_conf();
-       percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
-       if (!percpu_netcnt) {
-               printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
-               goto err;
-       }
-
-       if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
-                         &obj, &prog_fd)) {
-               printf("Failed to load bpf program\n");
-               goto out;
-       }
-
-       cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
-       if (cgroup_fd < 0)
-               goto err;
-
-       /* Attach bpf program */
-       if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
-               printf("Failed to attach bpf program");
-               goto err;
-       }
-
-       if (system("which ping6 &>/dev/null") == 0)
-               assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
-       else
-               assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
-
-       if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
-                          &prog_cnt)) {
-               printf("Failed to query attached programs");
-               goto err;
-       }
-
-       map_fd = bpf_find_map(__func__, obj, "netcnt");
-       if (map_fd < 0) {
-               printf("Failed to find bpf map with net counters");
-               goto err;
-       }
-
-       percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
-       if (percpu_map_fd < 0) {
-               printf("Failed to find bpf map with percpu net counters");
-               goto err;
-       }
-
-       if (bpf_map_get_next_key(map_fd, NULL, &key)) {
-               printf("Failed to get key in cgroup storage\n");
-               goto err;
-       }
-
-       if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
-               printf("Failed to lookup cgroup storage\n");
-               goto err;
-       }
-
-       if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
-               printf("Failed to lookup percpu cgroup storage\n");
-               goto err;
-       }
-
-       /* Some packets can be still in per-cpu cache, but not more than
-        * MAX_PERCPU_PACKETS.
-        */
-       packets = netcnt.packets;
-       bytes = netcnt.bytes;
-       for (cpu = 0; cpu < nproc; cpu++) {
-               if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
-                       printf("Unexpected percpu value: %llu\n",
-                              percpu_netcnt[cpu].packets);
-                       goto err;
-               }
-
-               packets += percpu_netcnt[cpu].packets;
-               bytes += percpu_netcnt[cpu].bytes;
-       }
-
-       /* No packets should be lost */
-       if (packets != 10000) {
-               printf("Unexpected packet count: %lu\n", packets);
-               goto err;
-       }
-
-       /* Let's check that bytes counter matches the number of packets
-        * multiplied by the size of ipv6 ICMP packet.
-        */
-       if (bytes != packets * 104) {
-               printf("Unexpected bytes count: %lu\n", bytes);
-               goto err;
-       }
-
-       error = 0;
-       printf("test_netcnt:PASS\n");
-
-err:
-       cleanup_cgroup_environment();
-       free(percpu_netcnt);
-
-out:
-       return error;
-}
index 6f10310..cc1cd24 100644 (file)
 #include <execinfo.h> /* backtrace */
 #include <linux/membarrier.h>
 
+/* Adapted from perf/util/string.c */
+static bool glob_match(const char *str, const char *pat)
+{
+       while (*str && *pat && *pat != '*') {
+               if (*str != *pat)
+                       return false;
+               str++;
+               pat++;
+       }
+       /* Check wild card */
+       if (*pat == '*') {
+               while (*pat == '*')
+                       pat++;
+               if (!*pat) /* Tail wild card matches all */
+                       return true;
+               while (*str)
+                       if (glob_match(str++, pat))
+                               return true;
+       }
+       return !*str && !*pat;
+}
+
 #define EXIT_NO_TEST           2
 #define EXIT_ERR_SETUP_INFRA   3
 
@@ -55,12 +77,12 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
        int i;
 
        for (i = 0; i < sel->blacklist.cnt; i++) {
-               if (strstr(name, sel->blacklist.strs[i]))
+               if (glob_match(name, sel->blacklist.strs[i]))
                        return false;
        }
 
        for (i = 0; i < sel->whitelist.cnt; i++) {
-               if (strstr(name, sel->whitelist.strs[i]))
+               if (glob_match(name, sel->whitelist.strs[i]))
                        return true;
        }
 
@@ -148,18 +170,18 @@ void test__end_subtest()
        struct prog_test_def *test = env.test;
        int sub_error_cnt = test->error_cnt - test->old_error_cnt;
 
+       dump_test_log(test, sub_error_cnt);
+
+       fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
+              test->test_num, test->subtest_num, test->test_name, test->subtest_name,
+              sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+
        if (sub_error_cnt)
                env.fail_cnt++;
        else if (test->skip_cnt == 0)
                env.sub_succ_cnt++;
        skip_account();
 
-       dump_test_log(test, sub_error_cnt);
-
-       fprintf(env.stdout, "#%d/%d %s:%s\n",
-              test->test_num, test->subtest_num, test->subtest_name,
-              sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
-
        free(test->subtest_name);
        test->subtest_name = NULL;
 }
@@ -450,6 +472,8 @@ enum ARG_KEYS {
        ARG_VERBOSE = 'v',
        ARG_GET_TEST_CNT = 'c',
        ARG_LIST_TEST_NAMES = 'l',
+       ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
+       ARG_TEST_NAME_GLOB_DENYLIST = 'd',
 };
 
 static const struct argp_option opts[] = {
@@ -467,6 +491,10 @@ static const struct argp_option opts[] = {
          "Get number of selected top-level tests " },
        { "list", ARG_LIST_TEST_NAMES, NULL, 0,
          "List test names that would run (without running them) " },
+       { "allow", ARG_TEST_NAME_GLOB_ALLOWLIST, "NAMES", 0,
+         "Run tests with name matching the pattern (supports '*' wildcard)." },
+       { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0,
+         "Don't run tests with name matching the pattern (supports '*' wildcard)." },
        {},
 };
 
@@ -491,36 +519,48 @@ static void free_str_set(const struct str_set *set)
        free(set->strs);
 }
 
-static int parse_str_list(const char *s, struct str_set *set)
+static int parse_str_list(const char *s, struct str_set *set, bool is_glob_pattern)
 {
        char *input, *state = NULL, *next, **tmp, **strs = NULL;
-       int cnt = 0;
+       int i, cnt = 0;
 
        input = strdup(s);
        if (!input)
                return -ENOMEM;
 
-       set->cnt = 0;
-       set->strs = NULL;
-
        while ((next = strtok_r(state ? NULL : input, ",", &state))) {
                tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
                if (!tmp)
                        goto err;
                strs = tmp;
 
-               strs[cnt] = strdup(next);
-               if (!strs[cnt])
-                       goto err;
+               if (is_glob_pattern) {
+                       strs[cnt] = strdup(next);
+                       if (!strs[cnt])
+                               goto err;
+               } else {
+                       strs[cnt] = malloc(strlen(next) + 2 + 1);
+                       if (!strs[cnt])
+                               goto err;
+                       sprintf(strs[cnt], "*%s*", next);
+               }
 
                cnt++;
        }
 
-       set->cnt = cnt;
-       set->strs = (const char **)strs;
+       tmp = realloc(set->strs, sizeof(*strs) * (cnt + set->cnt));
+       if (!tmp)
+               goto err;
+       memcpy(tmp + set->cnt, strs, sizeof(*strs) * cnt);
+       set->strs = (const char **)tmp;
+       set->cnt += cnt;
+
        free(input);
+       free(strs);
        return 0;
 err:
+       for (i = 0; i < cnt; i++)
+               free(strs[i]);
        free(strs);
        free(input);
        return -ENOMEM;
@@ -553,29 +593,35 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
                }
                break;
        }
+       case ARG_TEST_NAME_GLOB_ALLOWLIST:
        case ARG_TEST_NAME: {
                char *subtest_str = strchr(arg, '/');
 
                if (subtest_str) {
                        *subtest_str = '\0';
                        if (parse_str_list(subtest_str + 1,
-                                          &env->subtest_selector.whitelist))
+                                          &env->subtest_selector.whitelist,
+                                          key == ARG_TEST_NAME_GLOB_ALLOWLIST))
                                return -ENOMEM;
                }
-               if (parse_str_list(arg, &env->test_selector.whitelist))
+               if (parse_str_list(arg, &env->test_selector.whitelist,
+                                  key == ARG_TEST_NAME_GLOB_ALLOWLIST))
                        return -ENOMEM;
                break;
        }
+       case ARG_TEST_NAME_GLOB_DENYLIST:
        case ARG_TEST_NAME_BLACKLIST: {
                char *subtest_str = strchr(arg, '/');
 
                if (subtest_str) {
                        *subtest_str = '\0';
                        if (parse_str_list(subtest_str + 1,
-                                          &env->subtest_selector.blacklist))
+                                          &env->subtest_selector.blacklist,
+                                          key == ARG_TEST_NAME_GLOB_DENYLIST))
                                return -ENOMEM;
                }
-               if (parse_str_list(arg, &env->test_selector.blacklist))
+               if (parse_str_list(arg, &env->test_selector.blacklist,
+                                  key == ARG_TEST_NAME_GLOB_DENYLIST))
                        return -ENOMEM;
                break;
        }
@@ -755,7 +801,7 @@ int main(int argc, char **argv)
        save_netns();
        stdio_hijack();
        env.has_testmod = true;
-       if (load_bpf_testmod()) {
+       if (!env.list_test_names && load_bpf_testmod()) {
                fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
                env.has_testmod = false;
        }
@@ -786,24 +832,25 @@ int main(int argc, char **argv)
                        test__end_subtest();
 
                test->tested = true;
-               if (test->error_cnt)
-                       env.fail_cnt++;
-               else
-                       env.succ_cnt++;
-               skip_account();
 
                dump_test_log(test, test->error_cnt);
 
                fprintf(env.stdout, "#%d %s:%s\n",
                        test->test_num, test->test_name,
-                       test->error_cnt ? "FAIL" : "OK");
+                       test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+
+               if (test->error_cnt)
+                       env.fail_cnt++;
+               else
+                       env.succ_cnt++;
+               skip_account();
 
                reset_affinity();
                restore_netns();
                if (test->need_cgroup_cleanup)
                        cleanup_cgroup_environment();
        }
-       if (env.has_testmod)
+       if (!env.list_test_names && env.has_testmod)
                unload_bpf_testmod();
        stdio_restore();
 
index 8ef7f33..c8c2bf8 100644 (file)
@@ -221,6 +221,18 @@ extern int test__join_cgroup(const char *path);
        ___ok;                                                          \
 })
 
+#define ASSERT_STRNEQ(actual, expected, len, name) ({                  \
+       static int duration = 0;                                        \
+       const char *___act = actual;                                    \
+       const char *___exp = expected;                                  \
+       int ___len = len;                                               \
+       bool ___ok = strncmp(___act, ___exp, ___len) == 0;              \
+       CHECK(!___ok, (name),                                           \
+             "unexpected %s: actual '%.*s' != expected '%.*s'\n",      \
+             (name), ___len, ___act, ___len, ___exp);                  \
+       ___ok;                                                          \
+})
+
 #define ASSERT_OK(res, name) ({                                                \
        static int duration = 0;                                        \
        long long ___res = (res);                                       \
index c9dde9b..088fcad 100755 (executable)
@@ -69,7 +69,7 @@ cleanup() {
 }
 
 server_listen() {
-       ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
+       ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
        server_pid=$!
        sleep 0.2
 }
index ba8ffcd..995278e 100755 (executable)
@@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
 
 ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
+ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
 ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
 
 trap cleanup EXIT
index 46633a3..cd7bf32 100755 (executable)
 # ----------------
 # Must run with CAP_NET_ADMIN capability.
 #
-# Run (full color-coded output):
-#   sudo ./test_xsk.sh -c
+# Run:
+#   sudo ./test_xsk.sh
 #
 # If running from kselftests:
-#   sudo make colorconsole=1 run_tests
-#
-# Run (full output without color-coding):
-#   sudo ./test_xsk.sh
+#   sudo make run_tests
 #
 # Run with verbose output:
 #   sudo ./test_xsk.sh -v
@@ -83,7 +80,6 @@
 while getopts "cvD" flag
 do
        case "${flag}" in
-               c) colorconsole=1;;
                v) verbose=1;;
                D) dump_pkts=1;;
        esac
index 1bbd1d9..e7a19b0 100644 (file)
@@ -136,3 +136,90 @@ void read_trace_pipe(void)
                }
        }
 }
+
+#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define OP_RT_RA_MASK   0xffff0000UL
+#define LIS_R2          0x3c400000UL
+#define ADDIS_R2_R12    0x3c4c0000UL
+#define ADDI_R2_R2      0x38420000UL
+
+ssize_t get_uprobe_offset(const void *addr, ssize_t base)
+{
+       u32 *insn = (u32 *)(uintptr_t)addr;
+
+       /*
+        * A PPC64 ABIv2 function may have a local and a global entry
+        * point. We need to use the local entry point when patching
+        * functions, so identify and step over the global entry point
+        * sequence.
+        *
+        * The global entry point sequence is always of the form:
+        *
+        * addis r2,r12,XXXX
+        * addi  r2,r2,XXXX
+        *
+        * A linker optimisation may convert the addis to lis:
+        *
+        * lis   r2,XXXX
+        * addi  r2,r2,XXXX
+        */
+       if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+            ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+           ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+               return (ssize_t)(insn + 2) - base;
+       else
+               return (uintptr_t)addr - base;
+}
+
+#else
+
+ssize_t get_uprobe_offset(const void *addr, ssize_t base)
+{
+       return (uintptr_t)addr - base;
+}
+
+#endif
+
+ssize_t get_base_addr(void)
+{
+       size_t start, offset;
+       char buf[256];
+       FILE *f;
+
+       f = fopen("/proc/self/maps", "r");
+       if (!f)
+               return -errno;
+
+       while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
+                     &start, buf, &offset) == 3) {
+               if (strcmp(buf, "r-xp") == 0) {
+                       fclose(f);
+                       return start - offset;
+               }
+       }
+
+       fclose(f);
+       return -EINVAL;
+}
+
+ssize_t get_rel_offset(uintptr_t addr)
+{
+       size_t start, end, offset;
+       char buf[256];
+       FILE *f;
+
+       f = fopen("/proc/self/maps", "r");
+       if (!f)
+               return -errno;
+
+       while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &offset) == 4) {
+               if (addr >= start && addr < end) {
+                       fclose(f);
+                       return (size_t)addr - start + offset;
+               }
+       }
+
+       fclose(f);
+       return -EINVAL;
+}
index f62fdef..d907b44 100644 (file)
@@ -18,4 +18,8 @@ int kallsyms_find(const char *sym, unsigned long long *addr);
 
 void read_trace_pipe(void);
 
+ssize_t get_uprobe_offset(const void *addr, ssize_t base);
+ssize_t get_base_addr(void);
+ssize_t get_rel_offset(uintptr_t addr);
+
 #endif
index 1135fb9..f53ce26 100644 (file)
@@ -70,7 +70,6 @@
 #include <errno.h>
 #include <getopt.h>
 #include <asm/barrier.h>
-typedef __u16 __sum16;
 #include <linux/if_link.h>
 #include <linux/if_ether.h>
 #include <linux/ip.h>
@@ -106,14 +105,9 @@ static const u16 UDP_PORT2 = 2121;
 
 static void __exit_with_error(int error, const char *file, const char *func, int line)
 {
-       if (configured_mode == TEST_MODE_UNCONFIGURED) {
-               ksft_exit_fail_msg
-               ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
-       } else {
-               ksft_test_result_fail
-               ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
-               ksft_exit_xfail();
-       }
+       ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
+                             strerror(error));
+       ksft_exit_xfail();
 }
 
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
@@ -126,7 +120,7 @@ static void __exit_with_error(int error, const char *file, const char *func, int
                               test_type == TEST_TYPE_STATS ? "Stats" : "",\
                               test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
 
-static void *memset32_htonl(void *dest, u32 val, u32 size)
+static void memset32_htonl(void *dest, u32 val, u32 size)
 {
        u32 *ptr = (u32 *)dest;
        int i;
@@ -135,11 +129,6 @@ static void *memset32_htonl(void *dest, u32 val, u32 size)
 
        for (i = 0; i < (size & (~0x3)); i += 4)
                ptr[i >> 2] = val;
-
-       for (; i < size; i++)
-               ((char *)dest)[i] = ((char *)&val)[i & 3];
-
-       return dest;
 }
 
 /*
@@ -230,13 +219,13 @@ static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
        ip_hdr->check = 0;
 }
 
-static void gen_udp_hdr(struct generic_data *data, struct ifobject *ifobject,
+static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
                        struct udphdr *udp_hdr)
 {
        udp_hdr->source = htons(ifobject->src_port);
        udp_hdr->dest = htons(ifobject->dst_port);
        udp_hdr->len = htons(UDP_PKT_SIZE);
-       memset32_htonl(pkt_data + PKT_HDR_SIZE, htonl(data->seqnum), UDP_PKT_DATA_SIZE);
+       memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
 }
 
 static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
@@ -246,12 +235,7 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
            udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
 }
 
-static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
-{
-       memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
-}
-
-static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
+static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size, int idx)
 {
        struct xsk_umem_config cfg = {
                .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -260,7 +244,6 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
                .frame_headroom = frame_headroom,
                .flags = XSK_UMEM__DEFAULT_FLAGS
        };
-       int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
        struct xsk_umem_info *umem;
        int ret;
 
@@ -271,7 +254,7 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
        ret = xsk_umem__create(&umem->umem, buffer, size,
                               &umem->fq, &umem->cq, &cfg);
        if (ret)
-               exit_with_error(ret);
+               exit_with_error(-ret);
 
        umem->buffer = buffer;
 
@@ -285,7 +268,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
 
        ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
        if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
-               exit_with_error(ret);
+               exit_with_error(-ret);
        for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
                *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
        xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
@@ -333,20 +316,19 @@ static struct option long_options[] = {
        {"queue", optional_argument, 0, 'q'},
        {"dump-pkts", optional_argument, 0, 'D'},
        {"verbose", no_argument, 0, 'v'},
-       {"tx-pkt-count", optional_argument, 0, 'C'},
        {0, 0, 0, 0}
 };
 
 static void usage(const char *prog)
 {
        const char *str =
-           "  Usage: %s [OPTIONS]\n"
-           "  Options:\n"
-           "  -i, --interface      Use interface\n"
-           "  -q, --queue=n        Use queue n (default 0)\n"
-           "  -D, --dump-pkts      Dump packets L2 - L5\n"
-           "  -v, --verbose        Verbose output\n"
-           "  -C, --tx-pkt-count=n Number of packets to send\n";
+               "  Usage: %s [OPTIONS]\n"
+               "  Options:\n"
+               "  -i, --interface      Use interface\n"
+               "  -q, --queue=n        Use queue n (default 0)\n"
+               "  -D, --dump-pkts      Dump packets L2 - L5\n"
+               "  -v, --verbose        Verbose output\n";
+
        ksft_print_msg(str, prog);
 }
 
@@ -392,7 +374,7 @@ static void parse_command_line(int argc, char **argv)
        opterr = 0;
 
        for (;;) {
-               c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index);
+               c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
 
                if (c == -1)
                        break;
@@ -413,13 +395,10 @@ static void parse_command_line(int argc, char **argv)
                        interface_index++;
                        break;
                case 'D':
-                       debug_pkt_dump = 1;
-                       break;
-               case 'C':
-                       opt_pkt_count = atoi(optarg);
+                       opt_pkt_dump = true;
                        break;
                case 'v':
-                       opt_verbose = 1;
+                       opt_verbose = true;
                        break;
                default:
                        usage(basename(argv[0]));
@@ -427,17 +406,143 @@ static void parse_command_line(int argc, char **argv)
                }
        }
 
-       if (!opt_pkt_count) {
-               print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT);
-               opt_pkt_count = DEFAULT_PKT_CNT;
-       }
-
        if (!validate_interfaces()) {
                usage(basename(argv[0]));
                ksft_exit_xfail();
        }
 }
 
+static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
+{
+       if (pkt_nb >= pkt_stream->nb_pkts)
+               return NULL;
+
+       return &pkt_stream->pkts[pkt_nb];
+}
+
+static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+{
+       struct pkt_stream *pkt_stream;
+       u32 i;
+
+       pkt_stream = malloc(sizeof(*pkt_stream));
+       if (!pkt_stream)
+               exit_with_error(ENOMEM);
+
+       pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+       if (!pkt_stream->pkts)
+               exit_with_error(ENOMEM);
+
+       pkt_stream->nb_pkts = nb_pkts;
+       for (i = 0; i < nb_pkts; i++) {
+               pkt_stream->pkts[i].addr = (i % num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE;
+               pkt_stream->pkts[i].len = pkt_len;
+               pkt_stream->pkts[i].payload = i;
+       }
+
+       return pkt_stream;
+}
+
+static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
+{
+       struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
+       struct udphdr *udp_hdr;
+       struct ethhdr *eth_hdr;
+       struct iphdr *ip_hdr;
+       void *data;
+
+       if (!pkt)
+               return NULL;
+
+       data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
+       udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
+       ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+       eth_hdr = (struct ethhdr *)data;
+
+       gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr);
+       gen_ip_hdr(ifobject, ip_hdr);
+       gen_udp_csum(udp_hdr, ip_hdr);
+       gen_eth_hdr(ifobject, eth_hdr);
+
+       return pkt;
+}
+
+static void pkt_dump(void *pkt, u32 len)
+{
+       char s[INET_ADDRSTRLEN];
+       struct ethhdr *ethhdr;
+       struct udphdr *udphdr;
+       struct iphdr *iphdr;
+       int payload, i;
+
+       ethhdr = pkt;
+       iphdr = pkt + sizeof(*ethhdr);
+       udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr);
+
+       /*extract L2 frame */
+       fprintf(stdout, "DEBUG>> L2: dst mac: ");
+       for (i = 0; i < ETH_ALEN; i++)
+               fprintf(stdout, "%02X", ethhdr->h_dest[i]);
+
+       fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+       for (i = 0; i < ETH_ALEN; i++)
+               fprintf(stdout, "%02X", ethhdr->h_source[i]);
+
+       /*extract L3 frame */
+       fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
+       fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
+               inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
+       fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
+               inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
+       /*extract L4 frame */
+       fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
+       fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
+       /*extract L5 frame */
+       payload = *((uint32_t *)(pkt + PKT_HDR_SIZE));
+
+       fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
+       fprintf(stdout, "---------------------------------------\n");
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *desc)
+{
+       void *data = xsk_umem__get_data(buffer, desc->addr);
+       struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+
+       if (!pkt) {
+               ksft_test_result_fail("ERROR: [%s] too many packets received\n", __func__);
+               return false;
+       }
+
+       if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
+               u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
+
+               if (opt_pkt_dump && test_type != TEST_TYPE_STATS)
+                       pkt_dump(data, PKT_SIZE);
+
+               if (pkt->len != desc->len) {
+                       ksft_test_result_fail
+                               ("ERROR: [%s] expected length [%d], got length [%d]\n",
+                                       __func__, pkt->len, desc->len);
+                       return false;
+               }
+
+               if (pkt->payload != seqnum) {
+                       ksft_test_result_fail
+                               ("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n",
+                                       __func__, pkt->payload, seqnum);
+                       return false;
+               }
+       } else {
+               ksft_print_msg("Invalid frame received: ");
+               ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
+                              iphdr->tos);
+               return false;
+       }
+
+       return true;
+}
+
 static void kick_tx(struct xsk_socket_info *xsk)
 {
        int ret;
@@ -448,7 +553,7 @@ static void kick_tx(struct xsk_socket_info *xsk)
        exit_with_error(errno);
 }
 
-static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 {
        unsigned int rcvd;
        u32 idx;
@@ -463,133 +568,108 @@ static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
        if (rcvd) {
                xsk_ring_cons__release(&xsk->umem->cq, rcvd);
                xsk->outstanding_tx -= rcvd;
-               xsk->tx_npkts += rcvd;
        }
 }
 
-static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
+static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk,
+                        struct pollfd *fds)
 {
-       unsigned int rcvd, i;
-       u32 idx_rx = 0, idx_fq = 0;
+       u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkt_count = 0;
+       struct pkt *pkt;
        int ret;
 
-       rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
-       if (!rcvd) {
-               if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
-                       ret = poll(fds, 1, POLL_TMOUT);
-                       if (ret < 0)
-                               exit_with_error(ret);
+       pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+       while (pkt) {
+               rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+               if (!rcvd) {
+                       if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+                               ret = poll(fds, 1, POLL_TMOUT);
+                               if (ret < 0)
+                                       exit_with_error(-ret);
+                       }
+                       continue;
                }
-               return;
-       }
 
-       ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
-       while (ret != rcvd) {
-               if (ret < 0)
-                       exit_with_error(ret);
-               if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
-                       ret = poll(fds, 1, POLL_TMOUT);
+               ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+               while (ret != rcvd) {
                        if (ret < 0)
-                               exit_with_error(ret);
+                               exit_with_error(-ret);
+                       if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+                               ret = poll(fds, 1, POLL_TMOUT);
+                               if (ret < 0)
+                                       exit_with_error(-ret);
+                       }
+                       ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
                }
-               ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
-       }
-
-       for (i = 0; i < rcvd; i++) {
-               u64 addr, orig;
-
-               addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
-               xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
-               orig = xsk_umem__extract_addr(addr);
 
-               addr = xsk_umem__add_offset_to_addr(addr);
-               pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE);
-               if (!pkt_node_rx)
-                       exit_with_error(errno);
+               for (i = 0; i < rcvd; i++) {
+                       const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+                       u64 addr = desc->addr, orig;
 
-               pkt_node_rx->pkt_frame = malloc(PKT_SIZE);
-               if (!pkt_node_rx->pkt_frame)
-                       exit_with_error(errno);
+                       orig = xsk_umem__extract_addr(addr);
+                       addr = xsk_umem__add_offset_to_addr(addr);
+                       if (!is_pkt_valid(pkt, xsk->umem->buffer, desc))
+                               return;
 
-               memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr),
-                      PKT_SIZE);
-
-               TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes);
+                       *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+                       pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+               }
 
-               *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+               xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+               xsk_ring_cons__release(&xsk->rx, rcvd);
        }
-
-       xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
-       xsk_ring_cons__release(&xsk->rx, rcvd);
-       xsk->rx_npkts += rcvd;
 }
 
-static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
+static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
 {
-       u32 idx = 0;
-       unsigned int i;
-       bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID;
-       u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE;
+       struct xsk_socket_info *xsk = ifobject->xsk;
+       u32 i, idx;
 
-       while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
-               complete_tx_only(xsk, batch_size);
+       while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
+               complete_pkts(xsk, BATCH_SIZE);
 
-       for (i = 0; i < batch_size; i++) {
+       for (i = 0; i < BATCH_SIZE; i++) {
                struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+               struct pkt *pkt = pkt_generate(ifobject, pkt_nb);
 
-               tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
-               tx_desc->len = len;
-       }
+               if (!pkt)
+                       break;
 
-       xsk_ring_prod__submit(&xsk->tx, batch_size);
-       if (!tx_invalid_test) {
-               xsk->outstanding_tx += batch_size;
-       } else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
-               kick_tx(xsk);
+               tx_desc->addr = pkt->addr;
+               tx_desc->len = pkt->len;
+               pkt_nb++;
        }
-       *frameptr += batch_size;
-       *frameptr %= num_frames;
-       complete_tx_only(xsk, batch_size);
-}
-
-static int get_batch_size(int pkt_cnt)
-{
-       if (!opt_pkt_count)
-               return BATCH_SIZE;
 
-       if (pkt_cnt + BATCH_SIZE <= opt_pkt_count)
-               return BATCH_SIZE;
+       xsk_ring_prod__submit(&xsk->tx, i);
+       if (stat_test_type != STAT_TEST_TX_INVALID)
+               xsk->outstanding_tx += i;
+       else if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+               kick_tx(xsk);
+       complete_pkts(xsk, i);
 
-       return opt_pkt_count - pkt_cnt;
+       return i;
 }
 
-static void complete_tx_only_all(struct ifobject *ifobject)
+static void wait_for_tx_completion(struct xsk_socket_info *xsk)
 {
-       bool pending;
-
-       do {
-               pending = false;
-               if (ifobject->xsk->outstanding_tx) {
-                       complete_tx_only(ifobject->xsk, BATCH_SIZE);
-                       pending = !!ifobject->xsk->outstanding_tx;
-               }
-       } while (pending);
+       while (xsk->outstanding_tx)
+               complete_pkts(xsk, BATCH_SIZE);
 }
 
-static void tx_only_all(struct ifobject *ifobject)
+static void send_pkts(struct ifobject *ifobject)
 {
        struct pollfd fds[MAX_SOCKS] = { };
-       u32 frame_nb = 0;
-       int pkt_cnt = 0;
-       int ret;
+       u32 pkt_cnt = 0;
 
        fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
        fds[0].events = POLLOUT;
 
-       while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
-               int batch_size = get_batch_size(pkt_cnt);
+       while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
+               u32 sent;
 
                if (test_type == TEST_TYPE_POLL) {
+                       int ret;
+
                        ret = poll(fds, 1, POLL_TMOUT);
                        if (ret <= 0)
                                continue;
@@ -598,78 +678,30 @@ static void tx_only_all(struct ifobject *ifobject)
                                continue;
                }
 
-               tx_only(ifobject->xsk, &frame_nb, batch_size);
-               pkt_cnt += batch_size;
+               sent = __send_pkts(ifobject, pkt_cnt);
+               pkt_cnt += sent;
+               usleep(10);
        }
 
-       if (opt_pkt_count)
-               complete_tx_only_all(ifobject);
+       wait_for_tx_completion(ifobject->xsk);
 }
 
-static void worker_pkt_dump(void)
-{
-       struct ethhdr *ethhdr;
-       struct iphdr *iphdr;
-       struct udphdr *udphdr;
-       char s[128];
-       int payload;
-       void *ptr;
-
-       fprintf(stdout, "---------------------------------------\n");
-       for (int iter = 0; iter < num_frames - 1; iter++) {
-               ptr = pkt_buf[iter]->payload;
-               ethhdr = ptr;
-               iphdr = ptr + sizeof(*ethhdr);
-               udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr);
-
-               /*extract L2 frame */
-               fprintf(stdout, "DEBUG>> L2: dst mac: ");
-               for (int i = 0; i < ETH_ALEN; i++)
-                       fprintf(stdout, "%02X", ethhdr->h_dest[i]);
-
-               fprintf(stdout, "\nDEBUG>> L2: src mac: ");
-               for (int i = 0; i < ETH_ALEN; i++)
-                       fprintf(stdout, "%02X", ethhdr->h_source[i]);
-
-               /*extract L3 frame */
-               fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
-               fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
-                       inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
-               fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
-                       inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
-               /*extract L4 frame */
-               fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
-               fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
-               /*extract L5 frame */
-               payload = *((uint32_t *)(ptr + PKT_HDR_SIZE));
-
-               if (payload == EOT) {
-                       print_verbose("End-of-transmission frame received\n");
-                       fprintf(stdout, "---------------------------------------\n");
-                       break;
-               }
-               fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
-               fprintf(stdout, "---------------------------------------\n");
-       }
-}
-
-static void worker_stats_validate(struct ifobject *ifobject)
+static bool rx_stats_are_valid(struct ifobject *ifobject)
 {
+       u32 xsk_stat = 0, expected_stat = ifobject->pkt_stream->nb_pkts;
+       struct xsk_socket *xsk = ifobject->xsk->xsk;
+       int fd = xsk_socket__fd(xsk);
        struct xdp_statistics stats;
        socklen_t optlen;
        int err;
-       struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ?
-                                                       ifdict[!ifobject->ifdict_index]->xsk->xsk :
-                                                       ifobject->xsk->xsk;
-       int fd = xsk_socket__fd(xsk);
-       unsigned long xsk_stat = 0, expected_stat = opt_pkt_count;
-
-       sigvar = 0;
 
        optlen = sizeof(stats);
        err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
-       if (err)
-               return;
+       if (err) {
+               ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+                                     __func__, -err, strerror(-err));
+               return true;
+       }
 
        if (optlen == sizeof(struct xdp_statistics)) {
                switch (stat_test_type) {
@@ -677,8 +709,7 @@ static void worker_stats_validate(struct ifobject *ifobject)
                        xsk_stat = stats.rx_dropped;
                        break;
                case STAT_TEST_TX_INVALID:
-                       xsk_stat = stats.tx_invalid_descs;
-                       break;
+                       return true;
                case STAT_TEST_RX_FULL:
                        xsk_stat = stats.rx_ring_full;
                        expected_stat -= RX_FULL_RXQSIZE;
@@ -691,99 +722,70 @@ static void worker_stats_validate(struct ifobject *ifobject)
                }
 
                if (xsk_stat == expected_stat)
-                       sigvar = 1;
+                       return true;
        }
+
+       return false;
 }
 
-static void worker_pkt_validate(void)
+static void tx_stats_validate(struct ifobject *ifobject)
 {
-       u32 payloadseqnum = -2;
-       struct iphdr *iphdr;
-
-       while (1) {
-               pkt_node_rx_q = TAILQ_LAST(&head, head_s);
-               if (!pkt_node_rx_q)
-                       break;
-
-               iphdr = (struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr));
-
-               /*do not increment pktcounter if !(tos=0x9 and ipv4) */
-               if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
-                       payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE));
-                       if (debug_pkt_dump && payloadseqnum != EOT) {
-                               pkt_obj = malloc(sizeof(*pkt_obj));
-                               pkt_obj->payload = malloc(PKT_SIZE);
-                               memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE);
-                               pkt_buf[payloadseqnum] = pkt_obj;
-                       }
-
-                       if (payloadseqnum == EOT) {
-                               print_verbose("End-of-transmission frame received: PASS\n");
-                               sigvar = 1;
-                               break;
-                       }
+       struct xsk_socket *xsk = ifobject->xsk->xsk;
+       int fd = xsk_socket__fd(xsk);
+       struct xdp_statistics stats;
+       socklen_t optlen;
+       int err;
 
-                       if (prev_pkt + 1 != payloadseqnum) {
-                               ksft_test_result_fail
-                                   ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n",
-                                    __func__, prev_pkt, payloadseqnum);
-                               ksft_exit_xfail();
-                       }
+       optlen = sizeof(stats);
+       err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+       if (err) {
+               ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+                                     __func__, -err, strerror(-err));
+               return;
+       }
 
-                       prev_pkt = payloadseqnum;
-                       pkt_counter++;
-               } else {
-                       ksft_print_msg("Invalid frame received: ");
-                       ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
-                                      iphdr->tos);
-               }
+       if (stats.tx_invalid_descs == ifobject->pkt_stream->nb_pkts)
+               return;
 
-               TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
-               free(pkt_node_rx_q->pkt_frame);
-               free(pkt_node_rx_q);
-               pkt_node_rx_q = NULL;
-       }
+       ksft_test_result_fail("ERROR: [%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
+                             __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
 }
 
 static void thread_common_ops(struct ifobject *ifobject, void *bufs)
 {
-       int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+       u64 umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+       int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+       size_t mmap_sz = umem_sz;
        int ctr = 0;
        int ret;
 
        ifobject->ns_fd = switch_namespace(ifobject->nsname);
 
        if (test_type == TEST_TYPE_BPF_RES)
-               umem_sz *= 2;
+               mmap_sz *= 2;
 
-       bufs = mmap(NULL, umem_sz,
-                   PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+       bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
        if (bufs == MAP_FAILED)
                exit_with_error(errno);
 
-       xsk_configure_umem(ifobject, bufs, 0);
-       ifobject->umem = ifobject->umem_arr[0];
-       ret = xsk_configure_socket(ifobject, 0);
-
-       /* Retry Create Socket if it fails as xsk_socket__create()
-        * is asynchronous
-        */
-       while (ret && ctr < SOCK_RECONF_CTR) {
-               xsk_configure_umem(ifobject, bufs, 0);
+       while (ctr++ < SOCK_RECONF_CTR) {
+               xsk_configure_umem(ifobject, bufs, umem_sz, 0);
                ifobject->umem = ifobject->umem_arr[0];
                ret = xsk_configure_socket(ifobject, 0);
+               if (!ret)
+                       break;
+
+               /* Retry Create Socket if it fails as xsk_socket__create() is asynchronous */
                usleep(USLEEP_MAX);
-               ctr++;
+               if (ctr >= SOCK_RECONF_CTR)
+                       exit_with_error(-ret);
        }
 
-       if (ctr >= SOCK_RECONF_CTR)
-               exit_with_error(ret);
-
        ifobject->umem = ifobject->umem_arr[0];
        ifobject->xsk = ifobject->xsk_arr[0];
 
        if (test_type == TEST_TYPE_BPF_RES) {
-               xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1);
+               xsk_configure_umem(ifobject, (u8 *)bufs + umem_sz, umem_sz, 1);
                ifobject->umem = ifobject->umem_arr[1];
                ret = xsk_configure_socket(ifobject, 1);
        }
@@ -809,33 +811,18 @@ static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
 
 static void *worker_testapp_validate_tx(void *arg)
 {
-       struct udphdr *udp_hdr =
-           (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
-       struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
-       struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
        struct ifobject *ifobject = (struct ifobject *)arg;
-       struct generic_data data;
        void *bufs = NULL;
 
        if (!second_step)
                thread_common_ops(ifobject, bufs);
 
-       for (int i = 0; i < num_frames; i++) {
-               /*send EOT frame */
-               if (i == (num_frames - 1))
-                       data.seqnum = -1;
-               else
-                       data.seqnum = i;
-               gen_udp_hdr(&data, ifobject, udp_hdr);
-               gen_ip_hdr(ifobject, ip_hdr);
-               gen_udp_csum(udp_hdr, ip_hdr);
-               gen_eth_hdr(ifobject, eth_hdr);
-               gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
-       }
+       print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
+                     ifobject->ifname);
+       send_pkts(ifobject);
 
-       print_verbose("Sending %d packets on interface %s\n",
-                     (opt_pkt_count - 1), ifobject->ifname);
-       tx_only_all(ifobject);
+       if (stat_test_type == STAT_TEST_TX_INVALID)
+               tx_stats_validate(ifobject);
 
        testapp_cleanup_xsk_res(ifobject);
        pthread_exit(NULL);
@@ -853,31 +840,16 @@ static void *worker_testapp_validate_rx(void *arg)
        if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
                xsk_populate_fill_ring(ifobject->umem);
 
-       TAILQ_INIT(&head);
-       if (debug_pkt_dump) {
-               pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
-               if (!pkt_buf)
-                       exit_with_error(errno);
-       }
-
        fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
        fds[0].events = POLLIN;
 
        pthread_barrier_wait(&barr);
 
-       while (1) {
-               if (test_type != TEST_TYPE_STATS) {
-                       rx_pkt(ifobject->xsk, fds);
-                       worker_pkt_validate();
-               } else {
-                       worker_stats_validate(ifobject);
-               }
-               if (sigvar)
-                       break;
-       }
-
-       print_verbose("Received %d packets on interface %s\n",
-                     pkt_counter, ifobject->ifname);
+       if (test_type == TEST_TYPE_STATS)
+               while (!rx_stats_are_valid(ifobject))
+                       continue;
+       else
+               receive_pkts(ifobject->pkt_stream, ifobject->xsk, fds);
 
        if (test_type == TEST_TYPE_TEARDOWN)
                print_verbose("Destroying socket\n");
@@ -890,10 +862,18 @@ static void testapp_validate(void)
 {
        bool bidi = test_type == TEST_TYPE_BIDI;
        bool bpf = test_type == TEST_TYPE_BPF_RES;
+       struct pkt_stream *pkt_stream;
 
        if (pthread_barrier_init(&barr, NULL, 2))
                exit_with_error(errno);
 
+       if (stat_test_type == STAT_TEST_TX_INVALID)
+               pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
+       else
+               pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, PKT_SIZE);
+       ifdict_tx->pkt_stream = pkt_stream;
+       ifdict_rx->pkt_stream = pkt_stream;
+
        /*Spawn RX thread */
        pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
 
@@ -907,15 +887,6 @@ static void testapp_validate(void)
        pthread_join(t1, NULL);
        pthread_join(t0, NULL);
 
-       if (debug_pkt_dump && test_type != TEST_TYPE_STATS) {
-               worker_pkt_dump();
-               for (int iter = 0; iter < num_frames - 1; iter++) {
-                       free(pkt_buf[iter]->payload);
-                       free(pkt_buf[iter]);
-               }
-               free(pkt_buf);
-       }
-
        if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
                print_ksft_result();
 }
@@ -925,9 +896,6 @@ static void testapp_teardown(void)
        int i;
 
        for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
-               pkt_counter = 0;
-               prev_pkt = -1;
-               sigvar = 0;
                print_verbose("Creating socket\n");
                testapp_validate();
        }
@@ -953,9 +921,6 @@ static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
 static void testapp_bidi(void)
 {
        for (int i = 0; i < MAX_BIDI_ITER; i++) {
-               pkt_counter = 0;
-               prev_pkt = -1;
-               sigvar = 0;
                print_verbose("Creating socket\n");
                testapp_validate();
                if (!second_step) {
@@ -987,9 +952,6 @@ static void testapp_bpf_res(void)
        int i;
 
        for (i = 0; i < MAX_BPF_ITER; i++) {
-               pkt_counter = 0;
-               prev_pkt = -1;
-               sigvar = 0;
                print_verbose("Creating socket\n");
                testapp_validate();
                if (!second_step)
@@ -1017,6 +979,8 @@ static void testapp_stats(void)
                case STAT_TEST_RX_FULL:
                        rxqsize = RX_FULL_RXQSIZE;
                        break;
+               case STAT_TEST_TX_INVALID:
+                       continue;
                default:
                        break;
                }
@@ -1062,10 +1026,7 @@ static void run_pkt_test(int mode, int type)
 
        /* reset defaults after potential previous test */
        xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-       pkt_counter = 0;
        second_step = 0;
-       prev_pkt = -1;
-       sigvar = 0;
        stat_test_type = -1;
        rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
        frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
@@ -1102,62 +1063,70 @@ static void run_pkt_test(int mode, int type)
        }
 }
 
+static struct ifobject *ifobject_create(void)
+{
+       struct ifobject *ifobj;
+
+       ifobj = calloc(1, sizeof(struct ifobject));
+       if (!ifobj)
+               return NULL;
+
+       ifobj->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
+       if (!ifobj->xsk_arr)
+               goto out_xsk_arr;
+
+       ifobj->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
+       if (!ifobj->umem_arr)
+               goto out_umem_arr;
+
+       return ifobj;
+
+out_umem_arr:
+       free(ifobj->xsk_arr);
+out_xsk_arr:
+       free(ifobj);
+       return NULL;
+}
+
+static void ifobject_delete(struct ifobject *ifobj)
+{
+       free(ifobj->umem_arr);
+       free(ifobj->xsk_arr);
+       free(ifobj);
+}
+
 int main(int argc, char **argv)
 {
        struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
-       bool failure = false;
        int i, j;
 
        if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
                exit_with_error(errno);
 
-       for (int i = 0; i < MAX_INTERFACES; i++) {
-               ifdict[i] = malloc(sizeof(struct ifobject));
+       for (i = 0; i < MAX_INTERFACES; i++) {
+               ifdict[i] = ifobject_create();
                if (!ifdict[i])
-                       exit_with_error(errno);
-
-               ifdict[i]->ifdict_index = i;
-               ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
-               if (!ifdict[i]->xsk_arr) {
-                       failure = true;
-                       goto cleanup;
-               }
-               ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
-               if (!ifdict[i]->umem_arr) {
-                       failure = true;
-                       goto cleanup;
-               }
+                       exit_with_error(ENOMEM);
        }
 
        setlocale(LC_ALL, "");
 
        parse_command_line(argc, argv);
 
-       num_frames = ++opt_pkt_count;
-
-       init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
-       init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
+       init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
+       init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
 
        ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
 
-       for (i = 0; i < TEST_MODE_MAX; i++) {
-               for (j = 0; j < TEST_TYPE_MAX; j++)
+       for (i = 0; i < TEST_MODE_MAX; i++)
+               for (j = 0; j < TEST_TYPE_MAX; j++) {
                        run_pkt_test(i, j);
-       }
-
-cleanup:
-       for (int i = 0; i < MAX_INTERFACES; i++) {
-               if (ifdict[i]->ns_fd != -1)
-                       close(ifdict[i]->ns_fd);
-               free(ifdict[i]->xsk_arr);
-               free(ifdict[i]->umem_arr);
-               free(ifdict[i]);
-       }
+                       usleep(USLEEP_MAX);
+               }
 
-       if (failure)
-               exit_with_error(errno);
+       for (i = 0; i < MAX_INTERFACES; i++)
+               ifobject_delete(ifdict[i]);
 
        ksft_exit_pass();
-
        return 0;
 }
index 6c428b2..7e49b9f 100644 (file)
 #define IP_PKT_TOS 0x9
 #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
 #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-#define EOT (-1)
-#define USLEEP_MAX 200000
+#define USLEEP_MAX 10000
 #define SOCK_RECONF_CTR 10
-#define BATCH_SIZE 64
+#define BATCH_SIZE 8
 #define POLL_TMOUT 1000
-#define DEFAULT_PKT_CNT 10000
+#define DEFAULT_PKT_CNT (4 * 1024)
 #define RX_FULL_RXQSIZE 32
+#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
 
 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
 
-typedef __u32 u32;
-typedef __u16 u16;
-typedef __u8 u8;
-
-enum TEST_MODES {
-       TEST_MODE_UNCONFIGURED = -1,
+enum test_mode {
        TEST_MODE_SKB,
        TEST_MODE_DRV,
        TEST_MODE_MAX
 };
 
-enum TEST_TYPES {
+enum test_type {
        TEST_TYPE_NOPOLL,
        TEST_TYPE_POLL,
        TEST_TYPE_TEARDOWN,
@@ -65,7 +60,7 @@ enum TEST_TYPES {
        TEST_TYPE_MAX
 };
 
-enum STAT_TEST_TYPES {
+enum stat_test_type {
        STAT_TEST_RX_DROPPED,
        STAT_TEST_TX_INVALID,
        STAT_TEST_RX_FULL,
@@ -73,21 +68,16 @@ enum STAT_TEST_TYPES {
        STAT_TEST_TYPE_MAX
 };
 
-static int configured_mode = TEST_MODE_UNCONFIGURED;
-static u8 debug_pkt_dump;
-static u32 num_frames;
+static int configured_mode;
+static bool opt_pkt_dump;
+static u32 num_frames = DEFAULT_PKT_CNT / 4;
 static bool second_step;
 static int test_type;
 
-static int opt_pkt_count;
-static u8 opt_verbose;
+static bool opt_verbose;
 
 static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
-static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
-static u32 pkt_counter;
-static long prev_pkt = -1;
-static int sigvar;
 static int stat_test_type;
 static u32 rxqsize;
 static u32 frame_headroom;
@@ -104,10 +94,6 @@ struct xsk_socket_info {
        struct xsk_ring_prod tx;
        struct xsk_umem_info *umem;
        struct xsk_socket *xsk;
-       unsigned long rx_npkts;
-       unsigned long tx_npkts;
-       unsigned long prev_rx_npkts;
-       unsigned long prev_tx_npkts;
        u32 outstanding_tx;
 };
 
@@ -118,8 +104,15 @@ struct flow_vector {
        } vector;
 };
 
-struct generic_data {
-       u32 seqnum;
+struct pkt {
+       u64 addr;
+       u32 len;
+       u32 payload;
+};
+
+struct pkt_stream {
+       u32 nb_pkts;
+       struct pkt *pkts;
 };
 
 struct ifobject {
@@ -131,8 +124,8 @@ struct ifobject {
        struct xsk_umem_info *umem;
        void *(*func_ptr)(void *arg);
        struct flow_vector fv;
+       struct pkt_stream *pkt_stream;
        int ns_fd;
-       int ifdict_index;
        u32 dst_ip;
        u32 src_ip;
        u16 src_port;
@@ -149,18 +142,4 @@ static struct ifobject *ifdict_tx;
 pthread_barrier_t barr;
 pthread_t t0, t1;
 
-TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
-struct head_s *head_p;
-struct pkt {
-       char *pkt_frame;
-
-       TAILQ_ENTRY(pkt) pkt_nodes;
-} *pkt_node_rx, *pkt_node_rx_q;
-
-struct pkt_frame {
-       char *payload;
-} *pkt_obj;
-
-struct pkt_frame **pkt_buf;
-
 #endif                         /* XDPXCEIVER_H */
index dac1c5f..bf29d25 100755 (executable)
@@ -8,14 +8,8 @@ ksft_xfail=2
 ksft_xpass=3
 ksft_skip=4
 
-GREEN='\033[0;92m'
-YELLOW='\033[0;93m'
-RED='\033[0;31m'
-NC='\033[0m'
-STACK_LIM=131072
 SPECFILE=veth.spec
 XSKOBJ=xdpxceiver
-NUMPKTS=10000
 
 validate_root_exec()
 {
@@ -50,22 +44,12 @@ validate_veth_spec_file()
 test_status()
 {
        statusval=$1
-       if [ -n "${colorconsole+set}" ]; then
-               if [ $statusval -eq 2 ]; then
-                       echo -e "${YELLOW}$2${NC}: [ ${RED}FAIL${NC} ]"
-               elif [ $statusval -eq 1 ]; then
-                       echo -e "${YELLOW}$2${NC}: [ ${RED}SKIPPED${NC} ]"
-               elif [ $statusval -eq 0 ]; then
-                       echo -e "${YELLOW}$2${NC}: [ ${GREEN}PASS${NC} ]"
-               fi
-       else
-               if [ $statusval -eq 2 ]; then
-                       echo -e "$2: [ FAIL ]"
-               elif [ $statusval -eq 1 ]; then
-                       echo -e "$2: [ SKIPPED ]"
-               elif [ $statusval -eq 0 ]; then
-                       echo -e "$2: [ PASS ]"
-               fi
+       if [ $statusval -eq 2 ]; then
+               echo -e "$2: [ FAIL ]"
+       elif [ $statusval -eq 1 ]; then
+               echo -e "$2: [ SKIPPED ]"
+       elif [ $statusval -eq 0 ]; then
+               echo -e "$2: [ PASS ]"
        fi
 }
 
@@ -107,5 +91,5 @@ validate_ip_utility()
 
 execxdpxceiver()
 {
-       ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} -C ${NUMPKTS} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
+       ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
 }
index 57b505c..e1bf55d 100644 (file)
@@ -57,6 +57,29 @@ const __u8 nci_init_rsp_v2[] = {0x40, 0x01, 0x1c, 0x00, 0x1a, 0x7e, 0x06,
 const __u8 nci_rf_disc_map_rsp[] = {0x41, 0x00, 0x01, 0x00};
 const __u8 nci_rf_disc_rsp[] = {0x41, 0x03, 0x01, 0x00};
 const __u8 nci_rf_deact_rsp[] = {0x41, 0x06, 0x01, 0x00};
+const __u8 nci_rf_deact_ntf[] = {0x61, 0x06, 0x02, 0x00, 0x00};
+const __u8 nci_rf_activate_ntf[] = {0x61, 0x05, 0x1D, 0x01, 0x02, 0x04, 0x00,
+                                    0xFF, 0xFF, 0x0C, 0x44, 0x03, 0x07, 0x04,
+                                    0x62, 0x26, 0x11, 0x80, 0x1D, 0x80, 0x01,
+                                    0x20, 0x00, 0x00, 0x00, 0x06, 0x05, 0x75,
+                                    0x77, 0x81, 0x02, 0x80};
+const __u8 nci_t4t_select_cmd[] = {0x00, 0x00, 0x0C, 0x00, 0xA4, 0x04, 0x00,
+                                   0x07, 0xD2, 0x76, 0x00, 0x00, 0x85, 0x01, 0x01};
+const __u8 nci_t4t_select_cmd2[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02,
+                                    0xE1, 0x03};
+const __u8 nci_t4t_select_cmd3[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02,
+                                    0xE1, 0x04};
+const __u8 nci_t4t_read_cmd[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x0F};
+const __u8 nci_t4t_read_rsp[] = {0x00, 0x00, 0x11, 0x00, 0x0F, 0x20, 0x00, 0x3B,
+                                 0x00, 0x34, 0x04, 0x06, 0xE1, 0x04, 0x08, 0x00,
+                                 0x00, 0x00, 0x90, 0x00};
+const __u8 nci_t4t_read_cmd2[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x02};
+const __u8 nci_t4t_read_rsp2[] = {0x00, 0x00, 0x04, 0x00, 0x0F, 0x90, 0x00};
+const __u8 nci_t4t_read_cmd3[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x02, 0x0F};
+const __u8 nci_t4t_read_rsp3[] = {0x00, 0x00, 0x11, 0xD1, 0x01, 0x0B, 0x54, 0x02,
+                                  0x65, 0x6E, 0x4E, 0x46, 0x43, 0x20, 0x54, 0x45,
+                                  0x53, 0x54, 0x90, 0x00};
+const __u8 nci_t4t_rsp_ok[] = {0x00, 0x00, 0x02, 0x90, 0x00};
 
 struct msgtemplate {
        struct nlmsghdr n;
@@ -87,7 +110,7 @@ error:
 
 static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
                           __u8 genl_cmd, int nla_num, __u16 nla_type[],
-                          void *nla_data[], int nla_len[])
+                          void *nla_data[], int nla_len[], __u16 flags)
 {
        struct sockaddr_nl nladdr;
        struct msgtemplate msg;
@@ -98,7 +121,7 @@ static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
 
        msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
        msg.n.nlmsg_type = nlmsg_type;
-       msg.n.nlmsg_flags = NLM_F_REQUEST;
+       msg.n.nlmsg_flags = flags;
        msg.n.nlmsg_seq = 0;
        msg.n.nlmsg_pid = nlmsg_pid;
        msg.g.cmd = genl_cmd;
@@ -110,11 +133,11 @@ static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
                na->nla_type = nla_type[cnt];
                na->nla_len = nla_len[cnt] + NLA_HDRLEN;
 
-               if (nla_len > 0)
+               if (nla_len[cnt] > 0)
                        memcpy(NLA_DATA(na), nla_data[cnt], nla_len[cnt]);
 
-               msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
-               prv_len = na->nla_len;
+               prv_len = NLA_ALIGN(nla_len[cnt]) + NLA_HDRLEN;
+               msg.n.nlmsg_len += prv_len;
        }
 
        buf = (char *)&msg;
@@ -146,11 +169,11 @@ static int send_get_nfc_family(int sd, __u32 pid)
        nla_get_family_data = family_name;
 
        return send_cmd_mt_nla(sd, GENL_ID_CTRL, pid, CTRL_CMD_GETFAMILY,
-                               1, &nla_get_family_type,
-                               &nla_get_family_data, &nla_get_family_len);
+                               1, &nla_get_family_type, &nla_get_family_data,
+                               &nla_get_family_len, NLM_F_REQUEST);
 }
 
-static int get_family_id(int sd, __u32 pid)
+static int get_family_id(int sd, __u32 pid, __u32 *event_group)
 {
        struct {
                struct nlmsghdr n;
@@ -158,8 +181,9 @@ static int get_family_id(int sd, __u32 pid)
                char buf[512];
        } ans;
        struct nlattr *na;
-       int rep_len;
+       int resp_len;
        __u16 id;
+       int len;
        int rc;
 
        rc = send_get_nfc_family(sd, pid);
@@ -167,17 +191,49 @@ static int get_family_id(int sd, __u32 pid)
        if (rc < 0)
                return 0;
 
-       rep_len = recv(sd, &ans, sizeof(ans), 0);
+       resp_len = recv(sd, &ans, sizeof(ans), 0);
 
-       if (ans.n.nlmsg_type == NLMSG_ERROR || rep_len < 0 ||
-           !NLMSG_OK(&ans.n, rep_len))
+       if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 ||
+           !NLMSG_OK(&ans.n, resp_len))
                return 0;
 
+       len = 0;
+       resp_len = GENLMSG_PAYLOAD(&ans.n);
        na = (struct nlattr *)GENLMSG_DATA(&ans);
-       na = (struct nlattr *)((char *)na + NLA_ALIGN(na->nla_len));
-       if (na->nla_type == CTRL_ATTR_FAMILY_ID)
-               id = *(__u16 *)NLA_DATA(na);
 
+       while (len < resp_len) {
+               len += NLA_ALIGN(na->nla_len);
+               if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
+                       id = *(__u16 *)NLA_DATA(na);
+               } else if (na->nla_type == CTRL_ATTR_MCAST_GROUPS) {
+                       struct nlattr *nested_na;
+                       struct nlattr *group_na;
+                       int group_attr_len;
+                       int group_attr;
+
+                       nested_na = (struct nlattr *)((char *)na + NLA_HDRLEN);
+                       group_na = (struct nlattr *)((char *)nested_na + NLA_HDRLEN);
+                       group_attr_len = 0;
+
+                       for (group_attr = CTRL_ATTR_MCAST_GRP_UNSPEC;
+                               group_attr < CTRL_ATTR_MCAST_GRP_MAX; group_attr++) {
+                               if (group_na->nla_type == CTRL_ATTR_MCAST_GRP_ID) {
+                                       *event_group = *(__u32 *)((char *)group_na +
+                                                                 NLA_HDRLEN);
+                                       break;
+                               }
+
+                               group_attr_len += NLA_ALIGN(group_na->nla_len) +
+                                                 NLA_HDRLEN;
+                               if (group_attr_len >= nested_na->nla_len)
+                                       break;
+
+                               group_na = (struct nlattr *)((char *)group_na +
+                                                            NLA_ALIGN(group_na->nla_len));
+                       }
+               }
+               na = (struct nlattr *)(GENLMSG_DATA(&ans) + len);
+       }
        return id;
 }
 
@@ -189,12 +245,12 @@ static int send_cmd_with_idx(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
        int nla_len = 4;
 
        return send_cmd_mt_nla(sd, nlmsg_type, nlmsg_pid, genl_cmd, 1,
-                               &nla_type, &nla_data, &nla_len);
+                               &nla_type, &nla_data, &nla_len, NLM_F_REQUEST);
 }
 
 static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtemplate *msg)
 {
-       int rc, rep_len;
+       int rc, resp_len;
 
        rc = send_cmd_with_idx(sd, fid, pid, NFC_CMD_GET_DEVICE, dev_id);
        if (rc < 0) {
@@ -202,14 +258,14 @@ static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtem
                goto error;
        }
 
-       rep_len = recv(sd, msg, sizeof(*msg), 0);
-       if (rep_len < 0) {
+       resp_len = recv(sd, msg, sizeof(*msg), 0);
+       if (resp_len < 0) {
                rc = -2;
                goto error;
        }
 
        if (msg->n.nlmsg_type == NLMSG_ERROR ||
-           !NLMSG_OK(&msg->n, rep_len)) {
+           !NLMSG_OK(&msg->n, resp_len)) {
                rc = -3;
                goto error;
        }
@@ -222,21 +278,21 @@ error:
 static __u8 get_dev_enable_state(struct msgtemplate *msg)
 {
        struct nlattr *na;
-       int rep_len;
+       int resp_len;
        int len;
 
-       rep_len = GENLMSG_PAYLOAD(&msg->n);
+       resp_len = GENLMSG_PAYLOAD(&msg->n);
        na = (struct nlattr *)GENLMSG_DATA(msg);
        len = 0;
 
-       while (len < rep_len) {
+       while (len < resp_len) {
                len += NLA_ALIGN(na->nla_len);
                if (na->nla_type == NFC_ATTR_DEVICE_POWERED)
                        return *(char *)NLA_DATA(na);
                na = (struct nlattr *)(GENLMSG_DATA(msg) + len);
        }
 
-       return rep_len;
+       return resp_len;
 }
 
 FIXTURE(NCI) {
@@ -270,8 +326,7 @@ static void *virtual_dev_open(void *data)
 
        dev_fd = *(int *)data;
 
-       while ((len = read(dev_fd, buf, 258)) == 0)
-               ;
+       len = read(dev_fd, buf, 258);
        if (len <= 0)
                goto error;
        if (len != sizeof(nci_reset_cmd))
@@ -280,8 +335,7 @@ static void *virtual_dev_open(void *data)
                goto error;
        write(dev_fd, nci_reset_rsp, sizeof(nci_reset_rsp));
 
-       while ((len = read(dev_fd, buf, 258)) == 0)
-               ;
+       len = read(dev_fd, buf, 258);
        if (len <= 0)
                goto error;
        if (len != sizeof(nci_init_cmd))
@@ -290,8 +344,7 @@ static void *virtual_dev_open(void *data)
                goto error;
        write(dev_fd, nci_init_rsp, sizeof(nci_init_rsp));
 
-       while ((len = read(dev_fd, buf, 258)) == 0)
-               ;
+       len = read(dev_fd, buf, 258);
        if (len <= 0)
                goto error;
        if (len != sizeof(nci_rf_disc_map_cmd))
@@ -313,8 +366,7 @@ static void *virtual_dev_open_v2(void *data)
 
        dev_fd = *(int *)data;
 
-       while ((len = read(dev_fd, buf, 258)) == 0)
-               ;
+       len = read(dev_fd, buf, 258);
        if (len <= 0)
                goto error;
        if (len != sizeof(nci_reset_cmd))
@@ -324,8 +376,7 @@ static void *virtual_dev_open_v2(void *data)
        write(dev_fd, nci_reset_rsp_v2, sizeof(nci_reset_rsp_v2));
        write(dev_fd, nci_reset_ntf, sizeof(nci_reset_ntf));
 
-       while ((len = read(dev_fd, buf, 258)) == 0)
-               ;
+       len = read(dev_fd, buf, 258);
        if (len <= 0)
                goto error;
        if (len != sizeof(nci_init_cmd_v2))
@@ -334,8 +385,7 @@ static void *virtual_dev_open_v2(void *data)
                goto error;
        write(dev_fd, nci_init_rsp_v2, sizeof(nci_init_rsp_v2));
 
-       while ((len = read(dev_fd, buf, 258)) == 0)
-               ;
+       len = read(dev_fd, buf, 258);
        if (len <= 0)
                goto error;
        if (len != sizeof(nci_rf_disc_map_cmd))
@@ -353,6 +403,7 @@ FIXTURE_SETUP(NCI)
 {
        struct msgtemplate msg;
        pthread_t thread_t;
+       __u32 event_group;
        int status;
        int rc;
 
@@ -364,12 +415,16 @@ FIXTURE_SETUP(NCI)
        ASSERT_NE(self->sd, -1);
 
        self->pid = getpid();
-       self->fid = get_family_id(self->sd, self->pid);
+       self->fid = get_family_id(self->sd, self->pid, &event_group);
        ASSERT_NE(self->fid, -1);
 
        self->virtual_nci_fd = open("/dev/virtual_nci", O_RDWR);
        ASSERT_GT(self->virtual_nci_fd, -1);
 
+       rc = setsockopt(self->sd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &event_group,
+                       sizeof(event_group));
+       ASSERT_NE(rc, -1);
+
        rc = ioctl(self->virtual_nci_fd, IOCTL_GET_NCIDEV_IDX, &self->dev_idex);
        ASSERT_EQ(rc, 0);
 
@@ -402,8 +457,7 @@ static void *virtual_deinit(void *data)
 
        dev_fd = *(int *)data;
 
-       while ((len = read(dev_fd, buf, 258)) == 0)
-               ;
+       len = read(dev_fd, buf, 258);
        if (len <= 0)
                goto error;
        if (len != sizeof(nci_reset_cmd))
@@ -425,8 +479,7 @@ static void *virtual_deinit_v2(void *data)
 
        dev_fd = *(int *)data;
 
-       while ((len = read(dev_fd, buf, 258)) == 0)
-               ;
+       len = read(dev_fd, buf, 258);
        if (len <= 0)
                goto error;
        if (len != sizeof(nci_reset_cmd))
@@ -489,16 +542,14 @@ static void *virtual_poll_start(void *data)
 
        dev_fd = *(int *)data;
 
-       while ((len = read(dev_fd, buf, 258)) == 0)
-               ;
+       len = read(dev_fd, buf, 258);
        if (len <= 0)
                goto error;
        if (len != sizeof(nci_rf_discovery_cmd))
                goto error;
        if (memcmp(nci_rf_discovery_cmd, buf, len))
                goto error;
-       write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp))
-               ;
+       write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp));
 
        return (void *)0;
 error:
@@ -513,8 +564,7 @@ static void *virtual_poll_stop(void *data)
 
        dev_fd = *(int *)data;
 
-       while ((len = read(dev_fd, buf, 258)) == 0)
-               ;
+       len = read(dev_fd, buf, 258);
        if (len <= 0)
                goto error;
        if (len != sizeof(nci_rf_deact_cmd))
@@ -528,38 +578,282 @@ error:
        return (void *)-1;
 }
 
-TEST_F(NCI, start_poll)
+int start_polling(int dev_idx, int proto, int virtual_fd, int sd, int fid, int pid)
 {
        __u16 nla_start_poll_type[2] = {NFC_ATTR_DEVICE_INDEX,
                                         NFC_ATTR_PROTOCOLS};
-       void *nla_start_poll_data[2] = {&self->dev_idex, &self->proto};
+       void *nla_start_poll_data[2] = {&dev_idx, &proto};
        int nla_start_poll_len[2] = {4, 4};
        pthread_t thread_t;
        int status;
        int rc;
 
        rc = pthread_create(&thread_t, NULL, virtual_poll_start,
-                           (void *)&self->virtual_nci_fd);
-       ASSERT_GT(rc, -1);
+                           (void *)&virtual_fd);
+       if (rc < 0)
+               return rc;
 
-       rc = send_cmd_mt_nla(self->sd, self->fid, self->pid,
-                            NFC_CMD_START_POLL, 2, nla_start_poll_type,
-                            nla_start_poll_data, nla_start_poll_len);
-       EXPECT_EQ(rc, 0);
+       rc = send_cmd_mt_nla(sd, fid, pid, NFC_CMD_START_POLL, 2, nla_start_poll_type,
+                            nla_start_poll_data, nla_start_poll_len, NLM_F_REQUEST);
+       if (rc != 0)
+               return rc;
 
        pthread_join(thread_t, (void **)&status);
-       ASSERT_EQ(status, 0);
+       return status;
+}
+
+int stop_polling(int dev_idx, int virtual_fd, int sd, int fid, int pid)
+{
+       pthread_t thread_t;
+       int status;
+       int rc;
 
        rc = pthread_create(&thread_t, NULL, virtual_poll_stop,
-                           (void *)&self->virtual_nci_fd);
-       ASSERT_GT(rc, -1);
+                           (void *)&virtual_fd);
+       if (rc < 0)
+               return rc;
 
-       rc = send_cmd_with_idx(self->sd, self->fid, self->pid,
-                              NFC_CMD_STOP_POLL, self->dev_idex);
-       EXPECT_EQ(rc, 0);
+       rc = send_cmd_with_idx(sd, fid, pid,
+                              NFC_CMD_STOP_POLL, dev_idx);
+       if (rc != 0)
+               return rc;
 
        pthread_join(thread_t, (void **)&status);
+       return status;
+}
+
+TEST_F(NCI, start_poll)
+{
+       int status;
+
+       status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd,
+                              self->sd, self->fid, self->pid);
+       EXPECT_EQ(status, 0);
+
+       status = stop_polling(self->dev_idex, self->virtual_nci_fd, self->sd,
+                             self->fid, self->pid);
+       EXPECT_EQ(status, 0);
+}
+
+int get_taginfo(int dev_idx, int sd, int fid, int pid)
+{
+       struct {
+               struct nlmsghdr n;
+               struct genlmsghdr g;
+               char buf[512];
+       } ans;
+
+       struct nlattr *na;
+       __u32 protocol;
+       int targetidx;
+       __u8 sel_res;
+       int resp_len;
+       int len;
+
+       __u16 tagid_type;
+       void *tagid_type_data;
+       int tagid_len;
+
+       tagid_type = NFC_ATTR_DEVICE_INDEX;
+       tagid_type_data = &dev_idx;
+       tagid_len = 4;
+
+       send_cmd_mt_nla(sd, fid, pid, NFC_CMD_GET_TARGET, 1, &tagid_type,
+                       &tagid_type_data, &tagid_len, NLM_F_REQUEST | NLM_F_DUMP);
+       resp_len = recv(sd, &ans, sizeof(ans), 0);
+       if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 ||
+           !NLMSG_OK(&ans.n, resp_len))
+               return -1;
+
+       resp_len = GENLMSG_PAYLOAD(&ans.n);
+       na = (struct nlattr *)GENLMSG_DATA(&ans);
+
+       len = 0;
+       targetidx = -1;
+       protocol = -1;
+       sel_res = -1;
+
+       while (len < resp_len) {
+               len += NLA_ALIGN(na->nla_len);
+
+               if (na->nla_type == NFC_ATTR_TARGET_INDEX)
+                       targetidx = *(int *)((char *)na + NLA_HDRLEN);
+               else if (na->nla_type == NFC_ATTR_TARGET_SEL_RES)
+                       sel_res = *(__u8 *)((char *)na + NLA_HDRLEN);
+               else if (na->nla_type == NFC_ATTR_PROTOCOLS)
+                       protocol = *(__u32 *)((char *)na + NLA_HDRLEN);
+
+               na = (struct nlattr *)(GENLMSG_DATA(&ans) + len);
+       }
+
+       if (targetidx == -1 || sel_res != 0x20 || protocol != NFC_PROTO_ISO14443_MASK)
+               return -1;
+
+       return targetidx;
+}
+
+int connect_socket(int dev_idx, int target_idx)
+{
+       struct sockaddr_nfc addr;
+       int sock;
+       int err = 0;
+
+       sock = socket(AF_NFC, SOCK_SEQPACKET, NFC_SOCKPROTO_RAW);
+       if (sock == -1)
+               return -1;
+
+       addr.sa_family = AF_NFC;
+       addr.dev_idx = dev_idx;
+       addr.target_idx = target_idx;
+       addr.nfc_protocol = NFC_PROTO_ISO14443;
+
+       err = connect(sock, (struct sockaddr *)&addr, sizeof(addr));
+       if (err) {
+               close(sock);
+               return -1;
+       }
+
+       return sock;
+}
+
+int connect_tag(int dev_idx, int virtual_fd, int sd, int fid, int pid)
+{
+       struct genlmsghdr *genlhdr;
+       struct nlattr *na;
+       char evt_data[255];
+       int target_idx;
+       int resp_len;
+       int evt_dev;
+
+       write(virtual_fd, nci_rf_activate_ntf, sizeof(nci_rf_activate_ntf));
+       resp_len = recv(sd, evt_data, sizeof(evt_data), 0);
+       if (resp_len < 0)
+               return -1;
+
+       genlhdr = (struct genlmsghdr *)((struct nlmsghdr *)evt_data + 1);
+       na = (struct nlattr *)(genlhdr + 1);
+       evt_dev = *(int *)((char *)na + NLA_HDRLEN);
+       if (dev_idx != evt_dev)
+               return -1;
+
+       target_idx = get_taginfo(dev_idx, sd, fid, pid);
+       if (target_idx == -1)
+               return -1;
+       return connect_socket(dev_idx, target_idx);
+}
+
+int read_write_nci_cmd(int nfc_sock, int virtual_fd, const __u8 *cmd, __u32 cmd_len,
+                      const __u8 *rsp, __u32 rsp_len)
+{
+       char buf[256];
+       unsigned int len;
+
+       send(nfc_sock, &cmd[3], cmd_len - 3, 0);
+       len = read(virtual_fd, buf, cmd_len);
+       if (len < 0 || memcmp(buf, cmd, cmd_len))
+               return -1;
+
+       write(virtual_fd, rsp, rsp_len);
+       len = recv(nfc_sock, buf, rsp_len - 2, 0);
+       if (len < 0 || memcmp(&buf[1], &rsp[3], rsp_len - 3))
+               return -1;
+
+       return 0;
+}
+
+int read_tag(int nfc_sock, int virtual_fd)
+{
+       if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd,
+                              sizeof(nci_t4t_select_cmd), nci_t4t_rsp_ok,
+                              sizeof(nci_t4t_rsp_ok)))
+               return -1;
+
+       if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd2,
+                              sizeof(nci_t4t_select_cmd2), nci_t4t_rsp_ok,
+                              sizeof(nci_t4t_rsp_ok)))
+               return -1;
+
+       if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd,
+                              sizeof(nci_t4t_read_cmd), nci_t4t_read_rsp,
+                              sizeof(nci_t4t_read_rsp)))
+               return -1;
+
+       if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd3,
+                              sizeof(nci_t4t_select_cmd3), nci_t4t_rsp_ok,
+                              sizeof(nci_t4t_rsp_ok)))
+               return -1;
+
+       if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd2,
+                              sizeof(nci_t4t_read_cmd2), nci_t4t_read_rsp2,
+                              sizeof(nci_t4t_read_rsp2)))
+               return -1;
+
+       return read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd3,
+                                 sizeof(nci_t4t_read_cmd3), nci_t4t_read_rsp3,
+                                 sizeof(nci_t4t_read_rsp3));
+}
+
+static void *virtual_deactivate_proc(void *data)
+{
+       int virtual_fd;
+       char buf[256];
+       int deactcmd_len;
+       int len;
+
+       virtual_fd = *(int *)data;
+       deactcmd_len = sizeof(nci_rf_deact_cmd);
+       len = read(virtual_fd, buf, deactcmd_len);
+       if (len != deactcmd_len || memcmp(buf, nci_rf_deact_cmd, deactcmd_len))
+               return (void *)-1;
+
+       write(virtual_fd, nci_rf_deact_rsp, sizeof(nci_rf_deact_rsp));
+       write(virtual_fd, nci_rf_deact_ntf, sizeof(nci_rf_deact_ntf));
+
+       return (void *)0;
+}
+
+int disconnect_tag(int nfc_sock, int virtual_fd)
+{
+       pthread_t thread_t;
+       char buf[256];
+       int status;
+       int len;
+
+       send(nfc_sock, &nci_t4t_select_cmd3[3], sizeof(nci_t4t_select_cmd3) - 3, 0);
+       len = read(virtual_fd, buf, sizeof(nci_t4t_select_cmd3));
+       if (len < 0 || memcmp(buf, nci_t4t_select_cmd3, sizeof(nci_t4t_select_cmd3)))
+               return -1;
+
+       len = recv(nfc_sock, buf, sizeof(nci_t4t_rsp_ok), 0);
+       if (len != -1)
+               return -1;
+
+       status = pthread_create(&thread_t, NULL, virtual_deactivate_proc,
+                               (void *)&virtual_fd);
+
+       close(nfc_sock);
+       pthread_join(thread_t, (void **)&status);
+       return status;
+}
+
+TEST_F(NCI, t4t_tag_read)
+{
+       int nfc_sock;
+       int status;
+
+       status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd,
+                              self->sd, self->fid, self->pid);
+       EXPECT_EQ(status, 0);
+
+       nfc_sock = connect_tag(self->dev_idex, self->virtual_nci_fd, self->sd,
+                              self->fid, self->pid);
+       ASSERT_GT(nfc_sock, -1);
+
+       status = read_tag(nfc_sock, self->virtual_nci_fd);
        ASSERT_EQ(status, 0);
+
+       status = disconnect_tag(nfc_sock, self->virtual_nci_fd);
+       EXPECT_EQ(status, 0);
 }
 
 TEST_F(NCI, deinit)
index 79c9eb0..378c0aa 100644 (file)
@@ -25,6 +25,8 @@ TEST_PROGS += bareudp.sh
 TEST_PROGS += unicast_extensions.sh
 TEST_PROGS += udpgro_fwd.sh
 TEST_PROGS += veth.sh
+TEST_PROGS += ioam6.sh
+TEST_PROGS += gro.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -36,8 +38,11 @@ TEST_GEN_FILES += fin_ack_lat
 TEST_GEN_FILES += reuseaddr_ports_exhausted
 TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
 TEST_GEN_FILES += ipsec
+TEST_GEN_FILES += ioam6_parser
+TEST_GEN_FILES += gro
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_FILES += toeplitz
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
new file mode 100644 (file)
index 0000000..cfc7f4f
--- /dev/null
@@ -0,0 +1,5 @@
+##TEST_GEN_FILES := test_unix_oob
+TEST_PROGS := test_unix_oob
+include ../../lib.mk
+
+all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
new file mode 100644 (file)
index 0000000..0f3e376
--- /dev/null
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/poll.h>
+
+static int pipefd[2];
+static int signal_recvd;
+static pid_t producer_id;
+static char sock_name[32];
+
+static void sig_hand(int sn, siginfo_t *si, void *p)
+{
+       signal_recvd = sn;
+}
+
+static int set_sig_handler(int signal)
+{
+       struct sigaction sa;
+
+       sa.sa_sigaction = sig_hand;
+       sigemptyset(&sa.sa_mask);
+       sa.sa_flags = SA_SIGINFO | SA_RESTART;
+
+       return sigaction(signal, &sa, NULL);
+}
+
+static void set_filemode(int fd, int set)
+{
+       int flags = fcntl(fd, F_GETFL, 0);
+
+       if (set)
+               flags &= ~O_NONBLOCK;
+       else
+               flags |= O_NONBLOCK;
+       fcntl(fd, F_SETFL, flags);
+}
+
+static void signal_producer(int fd)
+{
+       char cmd;
+
+       cmd = 'S';
+       write(fd, &cmd, sizeof(cmd));
+}
+
+static void wait_for_signal(int fd)
+{
+       char buf[5];
+
+       read(fd, buf, 5);
+}
+
+static void die(int status)
+{
+       fflush(NULL);
+       unlink(sock_name);
+       kill(producer_id, SIGTERM);
+       exit(status);
+}
+
+int is_sioctatmark(int fd)
+{
+       int ans = -1;
+
+       if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
+#ifdef DEBUG
+               perror("SIOCATMARK Failed");
+#endif
+       }
+       return ans;
+}
+
+void read_oob(int fd, char *c)
+{
+
+       *c = ' ';
+       if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
+#ifdef DEBUG
+               perror("Reading MSG_OOB Failed");
+#endif
+       }
+}
+
+int read_data(int pfd, char *buf, int size)
+{
+       int len = 0;
+
+       memset(buf, size, '0');
+       len = read(pfd, buf, size);
+#ifdef DEBUG
+       if (len < 0)
+               perror("read failed");
+#endif
+       return len;
+}
+
+static void wait_for_data(int pfd, int event)
+{
+       struct pollfd pfds[1];
+
+       pfds[0].fd = pfd;
+       pfds[0].events = event;
+       poll(pfds, 1, -1);
+}
+
+void producer(struct sockaddr_un *consumer_addr)
+{
+       int cfd;
+       char buf[64];
+       int i;
+
+       memset(buf, 'x', sizeof(buf));
+       cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+       wait_for_signal(pipefd[0]);
+       if (connect(cfd, (struct sockaddr *)consumer_addr,
+                    sizeof(struct sockaddr)) != 0) {
+               perror("Connect failed");
+               kill(0, SIGTERM);
+               exit(1);
+       }
+
+       for (i = 0; i < 2; i++) {
+               /* Test 1: Test for SIGURG and OOB */
+               wait_for_signal(pipefd[0]);
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '@';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               wait_for_signal(pipefd[0]);
+
+               /* Test 2: Test for OOB being overwitten */
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '%';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '#';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               wait_for_signal(pipefd[0]);
+
+               /* Test 3: Test for SIOCATMARK */
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '@';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '%';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               memset(buf, 'x', sizeof(buf));
+               send(cfd, buf, sizeof(buf), 0);
+
+               wait_for_signal(pipefd[0]);
+
+               /* Test 4: Test for 1byte OOB msg */
+               memset(buf, 'x', sizeof(buf));
+               buf[0] = '@';
+               send(cfd, buf, 1, MSG_OOB);
+       }
+}
+
+int
+main(int argc, char **argv)
+{
+       int lfd, pfd;
+       struct sockaddr_un consumer_addr, paddr;
+       socklen_t len = sizeof(consumer_addr);
+       char buf[1024];
+       int on = 0;
+       char oob;
+       int flags;
+       int atmark;
+       char *tmp_file;
+
+       lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+       memset(&consumer_addr, 0, sizeof(consumer_addr));
+       consumer_addr.sun_family = AF_UNIX;
+       sprintf(sock_name, "unix_oob_%d", getpid());
+       unlink(sock_name);
+       strcpy(consumer_addr.sun_path, sock_name);
+
+       if ((bind(lfd, (struct sockaddr *)&consumer_addr,
+                 sizeof(consumer_addr))) != 0) {
+               perror("socket bind failed");
+               exit(1);
+       }
+
+       pipe(pipefd);
+
+       listen(lfd, 1);
+
+       producer_id = fork();
+       if (producer_id == 0) {
+               producer(&consumer_addr);
+               exit(0);
+       }
+
+       set_sig_handler(SIGURG);
+       signal_producer(pipefd[1]);
+
+       pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
+       fcntl(pfd, F_SETOWN, getpid());
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 1:
+        * veriyf that SIGURG is
+        * delivered and 63 bytes are
+        * read and oob is '@'
+        */
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       read_oob(pfd, &oob);
+       len = read_data(pfd, buf, 1024);
+       if (!signal_recvd || len != 63 || oob != '@') {
+               fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
+                        signal_recvd, len, oob);
+                       die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 2:
+        * Verify that the first OOB is over written by
+        * the 2nd one and the first OOB is returned as
+        * part of the read, and sigurg is received.
+        */
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = 0;
+       while (len < 70)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       read_oob(pfd, &oob);
+       if (!signal_recvd || len != 127 || oob != '#') {
+               fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
+               signal_recvd, len, oob);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 3:
+        * verify that 2nd oob over writes
+        * the first one and read breaks at
+        * oob boundary returning 127 bytes
+        * and sigurg is received and atmark
+        * is set.
+        * oob is '%' and second read returns
+        * 64 bytes.
+        */
+       len = 0;
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       while (len < 150)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       read_oob(pfd, &oob);
+
+       if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
+               fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
+               "atmark %d\n", signal_recvd, len, oob, atmark);
+               die(1);
+       }
+
+       signal_recvd = 0;
+
+       len = read_data(pfd, buf, 1024);
+       if (len != 64) {
+               fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
+                       signal_recvd, len, oob);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 4:
+        * verify that a single byte
+        * oob message is delivered.
+        * set non blocking mode and
+        * check proper error is
+        * returned and sigurg is
+        * received and correct
+        * oob is read.
+        */
+
+       set_filemode(pfd, 0);
+
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = read_data(pfd, buf, 1024);
+       if ((len == -1) && (errno == 11))
+               len = 0;
+
+       read_oob(pfd, &oob);
+
+       if (!signal_recvd || len != 0 || oob != '@') {
+               fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
+                        signal_recvd, len, oob);
+               die(1);
+       }
+
+       set_filemode(pfd, 1);
+
+       /* Inline Testing */
+
+       on = 1;
+       if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
+               perror("SO_OOBINLINE");
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 1 -- Inline:
+        * Check that SIGURG is
+        * delivered and 63 bytes are
+        * read and oob is '@'
+        */
+
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = read_data(pfd, buf, 1024);
+
+       if (!signal_recvd || len != 63) {
+               fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
+                       signal_recvd, len);
+               die(1);
+       }
+
+       len = read_data(pfd, buf, 1024);
+
+       if (len != 1) {
+               fprintf(stderr,
+                        "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
+                        signal_recvd, len, oob);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 2 -- Inline:
+        * Verify that the first OOB is over written by
+        * the 2nd one and read breaks correctly on
+        * 2nd OOB boundary with the first OOB returned as
+        * part of the read, and sigurg is delivered and
+        * siocatmark returns true.
+        * next read returns one byte, the oob byte
+        * and siocatmark returns false.
+        */
+       len = 0;
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       while (len < 70)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (len != 127 || atmark != 1 || !signal_recvd) {
+               fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
+                        len, atmark);
+               die(1);
+       }
+
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (len != 1 || buf[0] != '#' || atmark == 1) {
+               fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
+                       len, buf[0], atmark);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 3 -- Inline:
+        * verify that 2nd oob over writes
+        * the first one and read breaks at
+        * oob boundary returning 127 bytes
+        * and sigurg is received and siocatmark
+        * is true after the read.
+        * subsequent read returns 65 bytes
+        * because of oob which should be '%'.
+        */
+       len = 0;
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       while (len < 126)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (!signal_recvd || len != 127 || !atmark) {
+               fprintf(stderr,
+                        "Test 3 Inline failed, sigurg %d len %d data %c\n",
+                        signal_recvd, len, buf[0]);
+               die(1);
+       }
+
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (len != 65 || buf[0] != '%' || atmark != 0) {
+               fprintf(stderr,
+                        "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
+                        len, buf[0], atmark);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 4 -- Inline:
+        * verify that a single
+        * byte oob message is delivered
+        * and read returns one byte, the oob
+        * byte and sigurg is received
+        */
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = read_data(pfd, buf, 1024);
+       if (!signal_recvd || len != 1 || buf[0] != '@') {
+               fprintf(stderr,
+                       "Test 4 Inline failed, signal %d len %d data %c\n",
+               signal_recvd, len, buf[0]);
+               die(1);
+       }
+       die(0);
+}
index 6f905b5..21b646d 100644 (file)
@@ -42,3 +42,4 @@ CONFIG_NET_CLS_FLOWER=m
 CONFIG_NET_ACT_TUNNEL_KEY=m
 CONFIG_NET_ACT_MIRRED=m
 CONFIG_BAREUDP=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
index a8ad928..13350cd 100755 (executable)
@@ -37,6 +37,9 @@
 #
 # server / client nomenclature relative to ns-A
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 VERBOSE=0
 
 NSA_DEV=eth1
@@ -3879,6 +3882,32 @@ use_case_ping_lla_multi()
        log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C"
 }
 
+# Perform IPv{4,6} SNAT on ns-A, and verify TCP connection is successfully
+# established with ns-B.
+use_case_snat_on_vrf()
+{
+       setup "yes"
+
+       local port="12345"
+
+       run_cmd iptables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+       run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+
+       run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} &
+       sleep 1
+       run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port}
+       log_test $? 0 "IPv4 TCP connection over VRF with SNAT"
+
+       run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} &
+       sleep 1
+       run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port}
+       log_test $? 0 "IPv6 TCP connection over VRF with SNAT"
+
+       # Cleanup
+       run_cmd iptables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+       run_cmd ip6tables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+}
+
 use_cases()
 {
        log_section "Use cases"
@@ -3886,6 +3915,8 @@ use_cases()
        use_case_br
        log_subsection "Ping LLA with multiple interfaces"
        use_case_ping_lla_multi
+       log_subsection "SNAT on VRF"
+       use_case_snat_on_vrf
 }
 
 ################################################################################
@@ -3946,7 +3977,7 @@ fi
 which nettest >/dev/null
 if [ $? -ne 0 ]; then
        echo "'nettest' command not found; skipping tests"
-       exit 0
+       exit $ksft_skip
 fi
 
 declare -i nfail=0
index a93e6b6..43ea840 100755 (executable)
@@ -3,6 +3,9 @@
 
 # This test is for checking IPv4 and IPv6 FIB rules API
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 ret=0
 
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
@@ -238,12 +241,12 @@ run_fibrule_tests()
 
 if [ "$(id -u)" -ne 0 ];then
        echo "SKIP: Need root privileges"
-       exit 0
+       exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
        echo "SKIP: Could not run test without ip tool"
-       exit 0
+       exit $ksft_skip
 fi
 
 # start clean
index 13d3d44..2c14a86 100644 (file)
@@ -1,6 +1,9 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 ##############################################################################
 # Defines
 
@@ -9,11 +12,11 @@ if [[ ! -v DEVLINK_DEV ]]; then
                             | jq -r '.port | keys[]' | cut -d/ -f-2)
        if [ -z "$DEVLINK_DEV" ]; then
                echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
-               exit 1
+               exit $ksft_skip
        fi
        if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
                echo "SKIP: devlink device's bus is not PCI"
-               exit 1
+               exit $ksft_skip
        fi
 
        DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
@@ -22,7 +25,7 @@ elif [[ ! -z "$DEVLINK_DEV" ]]; then
        devlink dev show $DEVLINK_DEV &> /dev/null
        if [ $? -ne 0 ]; then
                echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"
-               exit 1
+               exit $ksft_skip
        fi
 fi
 
@@ -32,19 +35,19 @@ fi
 devlink help 2>&1 | grep resource &> /dev/null
 if [ $? -ne 0 ]; then
        echo "SKIP: iproute2 too old, missing devlink resource support"
-       exit 1
+       exit $ksft_skip
 fi
 
 devlink help 2>&1 | grep trap &> /dev/null
 if [ $? -ne 0 ]; then
        echo "SKIP: iproute2 too old, missing devlink trap support"
-       exit 1
+       exit $ksft_skip
 fi
 
 devlink dev help 2>&1 | grep info &> /dev/null
 if [ $? -ne 0 ]; then
        echo "SKIP: iproute2 too old, missing devlink dev info support"
-       exit 1
+       exit $ksft_skip
 fi
 
 ##############################################################################
index 42e28c9..e7fc5c3 100644 (file)
@@ -4,6 +4,9 @@
 ##############################################################################
 # Defines
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # Can be overridden by the configuration file.
 PING=${PING:=ping}
 PING6=${PING6:=ping6}
@@ -38,7 +41,7 @@ check_tc_version()
        tc -j &> /dev/null
        if [[ $? -ne 0 ]]; then
                echo "SKIP: iproute2 too old; tc is missing JSON support"
-               exit 1
+               exit $ksft_skip
        fi
 }
 
@@ -51,7 +54,7 @@ check_tc_mpls_support()
                matchall action pipe &> /dev/null
        if [[ $? -ne 0 ]]; then
                echo "SKIP: iproute2 too old; tc is missing MPLS support"
-               return 1
+               return $ksft_skip
        fi
        tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
                matchall
@@ -69,7 +72,7 @@ check_tc_mpls_lse_stats()
 
        if [[ $? -ne 0 ]]; then
                echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
-               return 1
+               return $ksft_skip
        fi
 
        tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
@@ -79,7 +82,7 @@ check_tc_mpls_lse_stats()
 
        if [[ $ret -ne 0 ]]; then
                echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
-               return 1
+               return $ksft_skip
        fi
 }
 
@@ -88,7 +91,7 @@ check_tc_shblock_support()
        tc filter help 2>&1 | grep block &> /dev/null
        if [[ $? -ne 0 ]]; then
                echo "SKIP: iproute2 too old; tc is missing shared block support"
-               exit 1
+               exit $ksft_skip
        fi
 }
 
@@ -97,7 +100,7 @@ check_tc_chain_support()
        tc help 2>&1|grep chain &> /dev/null
        if [[ $? -ne 0 ]]; then
                echo "SKIP: iproute2 too old; tc is missing chain support"
-               exit 1
+               exit $ksft_skip
        fi
 }
 
@@ -106,7 +109,7 @@ check_tc_action_hw_stats_support()
        tc actions help 2>&1 | grep -q hw_stats
        if [[ $? -ne 0 ]]; then
                echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
-               exit 1
+               exit $ksft_skip
        fi
 }
 
@@ -115,13 +118,13 @@ check_ethtool_lanes_support()
        ethtool --help 2>&1| grep lanes &> /dev/null
        if [[ $? -ne 0 ]]; then
                echo "SKIP: ethtool too old; it is missing lanes support"
-               exit 1
+               exit $ksft_skip
        fi
 }
 
 if [[ "$(id -u)" -ne 0 ]]; then
        echo "SKIP: need root privileges"
-       exit 0
+       exit $ksft_skip
 fi
 
 if [[ "$CHECK_TC" = "yes" ]]; then
@@ -134,7 +137,7 @@ require_command()
 
        if [[ ! -x "$(command -v "$cmd")" ]]; then
                echo "SKIP: $cmd not installed"
-               exit 1
+               exit $ksft_skip
        fi
 }
 
@@ -143,7 +146,7 @@ require_command $MZ
 
 if [[ ! -v NUM_NETIFS ]]; then
        echo "SKIP: importer does not define \"NUM_NETIFS\""
-       exit 1
+       exit $ksft_skip
 fi
 
 ##############################################################################
@@ -203,7 +206,7 @@ for ((i = 1; i <= NUM_NETIFS; ++i)); do
        ip link show dev ${NETIFS[p$i]} &> /dev/null
        if [[ $? -ne 0 ]]; then
                echo "SKIP: could not find all required interfaces"
-               exit 1
+               exit $ksft_skip
        fi
 done
 
index 76efb1f..a0d612e 100755 (executable)
@@ -411,7 +411,7 @@ ping_ipv6()
 ip nexthop ls >/dev/null 2>&1
 if [ $? -ne 0 ]; then
        echo "Nexthop objects not supported; skipping tests"
-       exit 0
+       exit $ksft_skip
 fi
 
 trap cleanup EXIT
index 4898dd4..cb08ffe 100755 (executable)
@@ -386,7 +386,7 @@ ping_ipv6()
 ip nexthop ls >/dev/null 2>&1
 if [ $? -ne 0 ]; then
        echo "Nexthop objects not supported; skipping tests"
-       exit 0
+       exit $ksft_skip
 fi
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
new file mode 100644 (file)
index 0000000..cf37ce8
--- /dev/null
@@ -0,0 +1,1095 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ *  Data packets of the same size and same header setup with correct
+ *  sequence numbers coalesce. The one exception being the last data
+ *  packet coalesced: it can be smaller than the rest and coalesced
+ *  as long as it is in the same flow.
+ * 2.ack
+ *  Pure ACK does not coalesce.
+ * 3.flags
+ *  Specific test cases: no packets with PSH, SYN, URG, RST set will
+ *  be coalesced.
+ * 4.tcp
+ *  Packets with incorrect checksum, non-consecutive seqno and
+ *  different TCP header options shouldn't coalesce. Nit: given that
+ *  some extension headers have paddings, such as timestamp, headers
+ *  that are padding differently would not be coalesced.
+ * 5.ip:
+ *  Packets with different (ECN, TTL, TOS) header, ip options or
+ *  ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ *  Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define SIP6 "fdaa::2"
+#define DIP6 "fdaa::1"
+#define SIP4 "192.168.1.200"
+#define DIP4 "192.168.1.100"
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+
+static void vlog(const char *fmt, ...)
+{
+       va_list args;
+
+       if (verbose) {
+               va_start(args, fmt);
+               vfprintf(stderr, fmt, args);
+               va_end(args);
+       }
+}
+
+static void setup_sock_filter(int fd)
+{
+       const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+       const int ethproto_off = offsetof(struct ethhdr, h_proto);
+       int optlen = 0;
+       int ipproto_off;
+       int next_off;
+
+       if (proto == PF_INET)
+               next_off = offsetof(struct iphdr, protocol);
+       else
+               next_off = offsetof(struct ipv6hdr, nexthdr);
+       ipproto_off = ETH_HLEN + next_off;
+
+       if (strcmp(testname, "ip") == 0) {
+               if (proto == PF_INET)
+                       optlen = sizeof(struct ip_timestamp);
+               else
+                       optlen = sizeof(struct ip6_frag);
+       }
+
+       struct sock_filter filter[] = {
+                       BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, ethproto_off),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
+                       BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, ipproto_off),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+                       BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+                       BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off + optlen),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+                       BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+                       BPF_STMT(BPF_RET + BPF_K, 0),
+       };
+
+       struct sock_fprog bpf = {
+               .len = ARRAY_SIZE(filter),
+               .filter = filter,
+       };
+
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+               error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+       uint16_t *words = data;
+       int i;
+
+       for (i = 0; i < len / 2; i++)
+               sum += words[i];
+       if (len & 1)
+               sum += ((char *)data)[len - 1];
+       return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+       sum = checksum_nofold(data, len, sum);
+       while (sum > 0xFFFF)
+               sum = (sum & 0xFFFF) + (sum >> 16);
+       return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+       struct pseudo_header6 {
+               struct in6_addr saddr;
+               struct in6_addr daddr;
+               uint16_t protocol;
+               uint16_t payload_len;
+       } ph6;
+       struct pseudo_header4 {
+               struct in_addr saddr;
+               struct in_addr daddr;
+               uint16_t protocol;
+               uint16_t payload_len;
+       } ph4;
+       uint32_t sum = 0;
+
+       if (proto == PF_INET6) {
+               if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1)
+                       error(1, errno, "inet_pton6 source ip pseudo");
+               if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1)
+                       error(1, errno, "inet_pton6 dest ip pseudo");
+               ph6.protocol = htons(IPPROTO_TCP);
+               ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+               sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+       } else if (proto == PF_INET) {
+               if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1)
+                       error(1, errno, "inet_pton source ip pseudo");
+               if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1)
+                       error(1, errno, "inet_pton dest ip pseudo");
+               ph4.protocol = htons(IPPROTO_TCP);
+               ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+               sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+       }
+
+       return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+       if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+                  &mac_addr[0], &mac_addr[1], &mac_addr[2],
+                  &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+               error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+       struct ethhdr *eth = buf;
+
+       memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+       memcpy(eth->h_source, src_mac, ETH_ALEN);
+       eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len)
+{
+       struct ipv6hdr *ip6h = buf;
+       struct iphdr *iph = buf;
+
+       if (proto == PF_INET6) {
+               memset(ip6h, 0, sizeof(*ip6h));
+
+               ip6h->version = 6;
+               ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+               ip6h->nexthdr = IPPROTO_TCP;
+               ip6h->hop_limit = 8;
+               if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1)
+                       error(1, errno, "inet_pton source ip6");
+               if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1)
+                       error(1, errno, "inet_pton dest ip6");
+       } else if (proto == PF_INET) {
+               memset(iph, 0, sizeof(*iph));
+
+               iph->version = 4;
+               iph->ihl = 5;
+               iph->ttl = 8;
+               iph->protocol   = IPPROTO_TCP;
+               iph->tot_len = htons(sizeof(struct tcphdr) +
+                               payload_len + sizeof(struct iphdr));
+               iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+               if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1)
+                       error(1, errno, "inet_pton source ip");
+               if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1)
+                       error(1, errno, "inet_pton dest ip");
+               iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+       }
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+                               int payload_len, int fin)
+{
+       struct tcphdr *tcph = buf;
+
+       memset(tcph, 0, sizeof(*tcph));
+
+       tcph->source = htons(SPORT);
+       tcph->dest = htons(DPORT);
+       tcph->seq = ntohl(START_SEQ + seq_offset);
+       tcph->ack_seq = ntohl(START_ACK + ack_offset);
+       tcph->ack = 1;
+       tcph->fin = fin;
+       tcph->doff = 5;
+       tcph->window = htons(TCP_MAXWIN);
+       tcph->urg_ptr = 0;
+       tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+       int ret = -1;
+
+       ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+       if (ret == -1)
+               error(1, errno, "sendto failure");
+       if (ret != len)
+               error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+                         int payload_len, int fin)
+{
+       memset(buf, 0, total_hdr_len);
+       memset(buf + total_hdr_len, 'a', payload_len);
+       fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+                           payload_len, fin);
+       fill_networklayer(buf + ETH_HLEN, payload_len);
+       fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+                      int rst, int urg)
+{
+       static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       int payload_len, pkt_size, flag, i;
+       struct tcphdr *tcph;
+
+       payload_len = PAYLOAD_LEN * psh;
+       pkt_size = total_hdr_len + payload_len;
+       flag = NUM_PACKETS / 2;
+
+       create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+       tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+       tcph->psh = psh;
+       tcph->syn = syn;
+       tcph->rst = rst;
+       tcph->urg = urg;
+       tcph->check = 0;
+       tcph->check = tcp_checksum(tcph, payload_len);
+
+       for (i = 0; i < NUM_PACKETS + 1; i++) {
+               if (i == flag) {
+                       write_packet(fd, flag_buf, pkt_size, daddr);
+                       continue;
+               }
+               create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+               write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+       }
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+                          int payload_len1, int payload_len2)
+{
+       static char buf[ETH_HLEN + IP_MAXPACKET];
+
+       create_packet(buf, 0, 0, payload_len1, 0);
+       write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+       create_packet(buf, payload_len1, 0, payload_len2, 0);
+       write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+       static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+       static char last[TOTAL_HDR_LEN + MSS];
+       static char new_seg[TOTAL_HDR_LEN + MSS];
+       int i;
+
+       for (i = 0; i < NUM_LARGE_PKT; i++)
+               create_packet(pkts[i], i * MSS, 0, MSS, 0);
+       create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+       create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+       for (i = 0; i < NUM_LARGE_PKT; i++)
+               write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+       write_packet(fd, last, total_hdr_len + remainder, daddr);
+       write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN];
+
+       create_packet(buf, 0, 0, 0, 0);
+       write_packet(fd, buf, total_hdr_len, daddr);
+       write_packet(fd, buf, total_hdr_len, daddr);
+       create_packet(buf, 0, 1, 0, 0);
+       write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+       struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+       memmove(buf, no_ext, total_hdr_len);
+       memmove(buf + total_hdr_len + extlen,
+               no_ext + total_hdr_len, PAYLOAD_LEN);
+
+       tcphdr->doff = tcphdr->doff + (extlen / 4);
+       tcphdr->check = 0;
+       tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+       if (proto == PF_INET) {
+               iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+               iph->check = 0;
+               iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       } else {
+               ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+       }
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+       struct tcp_option_ts {
+               uint8_t kind;
+               uint8_t len;
+               uint32_t tsval;
+               uint32_t tsecr;
+       } *opt_ts = (void *)buf;
+       struct tcp_option_window {
+               uint8_t kind;
+               uint8_t len;
+               uint8_t shift;
+       } *opt_window = (void *)buf;
+
+       switch (kind) {
+       case TCPOPT_NOP:
+               buf[0] = TCPOPT_NOP;
+               break;
+       case TCPOPT_WINDOW:
+               memset(opt_window, 0, sizeof(struct tcp_option_window));
+               opt_window->kind = TCPOPT_WINDOW;
+               opt_window->len = TCPOLEN_WINDOW;
+               opt_window->shift = 0;
+               break;
+       case TCPOPT_TIMESTAMP:
+               memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+               opt_ts->kind = TCPOPT_TIMESTAMP;
+               opt_ts->len = TCPOLEN_TIMESTAMP;
+               opt_ts->tsval = ts;
+               opt_ts->tsecr = 0;
+               break;
+       default:
+               error(1, 0, "unimplemented TCP option");
+               break;
+       }
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+       switch (order) {
+       case 0:
+               tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+                                 TCPOPT_TIMESTAMP, ts);
+               break;
+       case 1:
+               tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + 1,
+                                 TCPOPT_TIMESTAMP, ts);
+               tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+                                 TCPOPT_NOP, 0);
+               break;
+       case 2:
+               tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+               tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+                                 TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+                                 TCPOPT_NOP, 0);
+               break;
+       default:
+               error(1, 0, "unknown order");
+               break;
+       }
+       recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       tcph->check = tcph->check - 1;
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       tcph->seq = ntohl(htonl(tcph->seq) + 1);
+       tcph->check = 0;
+       tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+  * don't coalesce.
+  */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+       int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 0, 0);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 0, 0);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 100, 0);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 100, 1);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 100, 2);
+       write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+       static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+       int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+       int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt1, buf, 0, 0);
+       write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt1, buf, 0, 0);
+       write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+       tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+       recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+       write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+       struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+       int optlen = sizeof(struct ip_timestamp);
+       struct iphdr *iph;
+
+       if (optlen % 4)
+               error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+       ts->ipt_code = IPOPT_TS;
+       ts->ipt_len = optlen;
+       ts->ipt_ptr = 5;
+       ts->ipt_flg = IPOPT_TS_TSONLY;
+
+       memcpy(optpkt, buf, tcp_offset);
+       memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+              sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+       iph = (struct iphdr *)(optpkt + ETH_HLEN);
+       iph->ihl = 5 + (optlen / 4);
+       iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+       iph->check = 0;
+       iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+       int optlen = sizeof(struct ip_timestamp);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+       add_ipv4_ts_option(buf, optpkt);
+       write_packet(fd, optpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/*  IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[IP_MAXPACKET];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       /* Once fragmented, packet would retain the total_len.
+        * Tcp header is prepared as if rest of data is in follow-up frags,
+        * but follow up frags aren't actually sent.
+        */
+       memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+       fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+       fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+       fill_datalinklayer(buf);
+
+       iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+       iph->check = 0;
+       iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       iph->ttl = 7;
+       iph->check = 0;
+       iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       if (proto == PF_INET) {
+               iph->tos = 1;
+               iph->check = 0;
+               iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       } else if (proto == PF_INET6) {
+               ip6h->priority = 0xf;
+       }
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       if (proto == PF_INET) {
+               buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+               iph->check = 0;
+               iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       } else {
+               buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+       }
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+                          sizeof(struct ip6_frag)];
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+       struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+       int extlen = sizeof(struct ip6_frag);
+       int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+       int extpkt_len = bufpkt_len + extlen;
+       int i;
+
+       for (i = 0; i < 2; i++) {
+               create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+               write_packet(fd, buf, bufpkt_len, daddr);
+       }
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       memset(extpkt, 0, extpkt_len);
+
+       ip6h->nexthdr = IPPROTO_FRAGMENT;
+       ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+       frag->ip6f_nxt = IPPROTO_TCP;
+
+       memcpy(extpkt, buf, tcp_offset);
+       memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+              sizeof(struct tcphdr) + PAYLOAD_LEN);
+       write_packet(fd, extpkt, extpkt_len, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+       struct sockaddr_ll daddr = {};
+
+       daddr.sll_family = AF_PACKET;
+       daddr.sll_protocol = ethhdr_proto;
+       daddr.sll_ifindex = if_nametoindex(ifname);
+       if (daddr.sll_ifindex == 0)
+               error(1, errno, "if_nametoindex");
+
+       if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+               error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+       struct timeval timeout;
+
+       timeout.tv_sec = 120;
+       timeout.tv_usec = 0;
+       if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+                      sizeof(timeout)) < 0)
+               error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+                           int correct_num_pkts)
+{
+       static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+       struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+       struct tcphdr *tcph;
+       bool bad_packet = false;
+       int tcp_ext_len = 0;
+       int ip_ext_len = 0;
+       int pkt_size = -1;
+       int data_len = 0;
+       int num_pkt = 0;
+       int i;
+
+       vlog("Expected {");
+       for (i = 0; i < correct_num_pkts; i++)
+               vlog("%d ", correct_payload[i]);
+       vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+       while (1) {
+               pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+               if (pkt_size < 0)
+                       error(1, errno, "could not receive");
+
+               if (iph->version == 4)
+                       ip_ext_len = (iph->ihl - 5) * 4;
+               else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+                       ip_ext_len = sizeof(struct ip6_frag);
+
+               tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+               if (tcph->fin)
+                       break;
+
+               tcp_ext_len = (tcph->doff - 5) * 4;
+               data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+               /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+                * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+                * Packet sockets are protocol agnostic, and will not trim the padding.
+                */
+               if (pkt_size == ETH_ZLEN && iph->version == 4) {
+                       data_len = ntohs(iph->tot_len)
+                               - sizeof(struct tcphdr) - sizeof(struct iphdr);
+               }
+               vlog("%d ", data_len);
+               if (data_len != correct_payload[num_pkt]) {
+                       vlog("[!=%d]", correct_payload[num_pkt]);
+                       bad_packet = true;
+               }
+               num_pkt++;
+       }
+       vlog("}, Total %d packets.\n", num_pkt);
+       if (num_pkt != correct_num_pkts)
+               error(1, 0, "incorrect number of packets");
+       if (bad_packet)
+               error(1, 0, "incorrect packet geometry");
+
+       printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+       static char fin_pkt[MAX_HDR_LEN];
+       struct sockaddr_ll daddr = {};
+       int txfd = -1;
+
+       txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+       if (txfd < 0)
+               error(1, errno, "socket creation");
+
+       memset(&daddr, 0, sizeof(daddr));
+       daddr.sll_ifindex = if_nametoindex(ifname);
+       if (daddr.sll_ifindex == 0)
+               error(1, errno, "if_nametoindex");
+       daddr.sll_family = AF_PACKET;
+       memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+       daddr.sll_halen = ETH_ALEN;
+       create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+       if (strcmp(testname, "data") == 0) {
+               send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "ack") == 0) {
+               send_ack(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "flags") == 0) {
+               send_flags(txfd, &daddr, 1, 0, 0, 0);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_flags(txfd, &daddr, 0, 1, 0, 0);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_flags(txfd, &daddr, 0, 0, 1, 0);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_flags(txfd, &daddr, 0, 0, 0, 1);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "tcp") == 0) {
+               send_changed_checksum(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_changed_seq(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_changed_ts(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_diff_opt(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "ip") == 0) {
+               send_changed_ECN(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_changed_tos(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+               if (proto == PF_INET) {
+                       /* Modified packets may be received out of order.
+                        * Sleep function added to enforce test boundaries
+                        * so that fin pkts are not received prior to other pkts.
+                        */
+                       sleep(1);
+                       send_changed_ttl(txfd, &daddr);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+                       sleep(1);
+                       send_ip_options(txfd, &daddr);
+                       sleep(1);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+                       sleep(1);
+                       send_fragment4(txfd, &daddr);
+                       sleep(1);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+               } else if (proto == PF_INET6) {
+                       send_fragment6(txfd, &daddr);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+               }
+       } else if (strcmp(testname, "large") == 0) {
+               /* 20 is the difference between min iphdr size
+                * and min ipv6hdr size. Like MAX_HDR_SIZE,
+                * MAX_PAYLOAD is defined with the larger header of the two.
+                */
+               int offset = proto == PF_INET ? 20 : 0;
+               int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+               send_large(txfd, &daddr, remainder);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_large(txfd, &daddr, remainder + 1);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else {
+               error(1, 0, "Unknown testcase");
+       }
+
+       if (close(txfd))
+               error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+       static int correct_payload[NUM_PACKETS];
+       int rxfd = -1;
+
+       rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+       if (rxfd < 0)
+               error(1, 0, "socket creation");
+       setup_sock_filter(rxfd);
+       set_timeout(rxfd);
+       bind_packetsocket(rxfd);
+
+       memset(correct_payload, 0, sizeof(correct_payload));
+
+       if (strcmp(testname, "data") == 0) {
+               printf("pure data packet of same size: ");
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               check_recv_pkts(rxfd, correct_payload, 1);
+
+               printf("large data packets followed by a smaller one: ");
+               correct_payload[0] = PAYLOAD_LEN * 1.5;
+               check_recv_pkts(rxfd, correct_payload, 1);
+
+               printf("small data packets followed by a larger one: ");
+               correct_payload[0] = PAYLOAD_LEN / 2;
+               correct_payload[1] = PAYLOAD_LEN;
+               check_recv_pkts(rxfd, correct_payload, 2);
+       } else if (strcmp(testname, "ack") == 0) {
+               printf("duplicate ack and pure ack: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+       } else if (strcmp(testname, "flags") == 0) {
+               correct_payload[0] = PAYLOAD_LEN * 3;
+               correct_payload[1] = PAYLOAD_LEN * 2;
+
+               printf("psh flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               correct_payload[1] = 0;
+               correct_payload[2] = PAYLOAD_LEN * 2;
+               printf("syn flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+
+               printf("rst flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+
+               printf("urg flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+       } else if (strcmp(testname, "tcp") == 0) {
+               correct_payload[0] = PAYLOAD_LEN;
+               correct_payload[1] = PAYLOAD_LEN;
+               correct_payload[2] = PAYLOAD_LEN;
+               correct_payload[3] = PAYLOAD_LEN;
+
+               printf("changed checksum does not coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               printf("Wrong Seq number doesn't coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               printf("Different timestamp doesn't coalesce: ");
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               check_recv_pkts(rxfd, correct_payload, 4);
+
+               printf("Different options doesn't coalesce: ");
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               check_recv_pkts(rxfd, correct_payload, 2);
+       } else if (strcmp(testname, "ip") == 0) {
+               correct_payload[0] = PAYLOAD_LEN;
+               correct_payload[1] = PAYLOAD_LEN;
+
+               printf("different ECN doesn't coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               printf("different tos doesn't coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               if (proto == PF_INET) {
+                       printf("different ttl doesn't coalesce: ");
+                       check_recv_pkts(rxfd, correct_payload, 2);
+
+                       printf("ip options doesn't coalesce: ");
+                       correct_payload[2] = PAYLOAD_LEN;
+                       check_recv_pkts(rxfd, correct_payload, 3);
+
+                       printf("fragmented ip4 doesn't coalesce: ");
+                       check_recv_pkts(rxfd, correct_payload, 2);
+               } else if (proto == PF_INET6) {
+                       /* GRO doesn't check for ipv6 hop limit when flushing.
+                        * Hence no corresponding test to the ipv4 case.
+                        */
+                       printf("fragmented ip6 doesn't coalesce: ");
+                       correct_payload[0] = PAYLOAD_LEN * 2;
+                       check_recv_pkts(rxfd, correct_payload, 2);
+               }
+       } else if (strcmp(testname, "large") == 0) {
+               int offset = proto == PF_INET ? 20 : 0;
+               int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+               correct_payload[0] = (MAX_PAYLOAD + offset);
+               correct_payload[1] = remainder;
+               printf("Shouldn't coalesce if exceed IP max pkt size: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               /* last segment sent individually, doesn't start new segment */
+               correct_payload[0] = correct_payload[0] - remainder;
+               correct_payload[1] = remainder + 1;
+               correct_payload[2] = remainder + 1;
+               check_recv_pkts(rxfd, correct_payload, 3);
+       } else {
+               error(1, 0, "Test case error, should never trigger");
+       }
+
+       if (close(rxfd))
+               error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+       static const struct option opts[] = {
+               { "dmac", required_argument, NULL, 'D' },
+               { "iface", required_argument, NULL, 'i' },
+               { "ipv4", no_argument, NULL, '4' },
+               { "ipv6", no_argument, NULL, '6' },
+               { "rx", no_argument, NULL, 'r' },
+               { "smac", required_argument, NULL, 'S' },
+               { "test", required_argument, NULL, 't' },
+               { "verbose", no_argument, NULL, 'v' },
+               { 0, 0, 0, 0 }
+       };
+       int c;
+
+       while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) {
+               switch (c) {
+               case '4':
+                       proto = PF_INET;
+                       ethhdr_proto = htons(ETH_P_IP);
+                       break;
+               case '6':
+                       proto = PF_INET6;
+                       ethhdr_proto = htons(ETH_P_IPV6);
+                       break;
+               case 'D':
+                       dmac = optarg;
+                       break;
+               case 'i':
+                       ifname = optarg;
+                       break;
+               case 'r':
+                       tx_socket = false;
+                       break;
+               case 'S':
+                       smac = optarg;
+                       break;
+               case 't':
+                       testname = optarg;
+                       break;
+               case 'v':
+                       verbose = true;
+                       break;
+               default:
+                       error(1, 0, "%s invalid option %c\n", __func__, c);
+                       break;
+               }
+       }
+}
+
+int main(int argc, char **argv)
+{
+       parse_args(argc, argv);
+
+       if (proto == PF_INET) {
+               tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+               total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+       } else if (proto == PF_INET6) {
+               tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+               total_hdr_len = MAX_HDR_LEN;
+       } else {
+               error(1, 0, "Protocol family is not ipv4 or ipv6");
+       }
+
+       read_MAC(src_mac, smac);
+       read_MAC(dst_mac, dmac);
+
+       if (tx_socket)
+               gro_sender();
+       else
+               gro_receiver();
+       return 0;
+}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
new file mode 100755 (executable)
index 0000000..342ad27
--- /dev/null
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
+readonly PROTOS=("ipv4" "ipv6")
+dev=""
+test="all"
+proto="ipv4"
+
+run_test() {
+  local server_pid=0
+  local exit_code=0
+  local protocol=$1
+  local test=$2
+  local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
+  "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
+
+  setup_ns
+  # Each test is run 3 times to deflake, because given the receive timing,
+  # not all packets that should coalesce will be considered in the same flow
+  # on every try.
+  for tries in {1..3}; do
+    # Actual test starts here
+    ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+      1>>log.txt &
+    server_pid=$!
+    sleep 0.5  # to allow for socket init
+    ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+      1>>log.txt
+    wait "${server_pid}"
+    exit_code=$?
+    if [[ "${exit_code}" -eq 0 ]]; then
+        break;
+    fi
+  done
+  cleanup_ns
+  echo ${exit_code}
+}
+
+run_all_tests() {
+  local failed_tests=()
+  for proto in "${PROTOS[@]}"; do
+    for test in "${TESTS[@]}"; do
+      echo "running test ${proto} ${test}" >&2
+      exit_code=$(run_test $proto $test)
+      if [[ "${exit_code}" -ne 0 ]]; then
+        failed_tests+=("${proto}_${test}")
+      fi;
+    done;
+  done
+  if [[ ${#failed_tests[@]} -ne 0 ]]; then
+    echo "failed tests: ${failed_tests[*]}. \
+    Please see log.txt for more logs"
+    exit 1
+  else
+    echo "All Tests Succeeded!"
+  fi;
+}
+
+usage() {
+  echo "Usage: $0 \
+  [-i <DEV>] \
+  [-t data|ack|flags|tcp|ip|large] \
+  [-p <ipv4|ipv6>]" 1>&2;
+  exit 1;
+}
+
+while getopts "i:t:p:" opt; do
+  case "${opt}" in
+    i)
+      dev="${OPTARG}"
+      ;;
+    t)
+      test="${OPTARG}"
+      ;;
+    p)
+      proto="${OPTARG}"
+      ;;
+    *)
+      usage
+      ;;
+  esac
+done
+
+if [ -n "$dev" ]; then
+       source setup_loopback.sh
+else
+       source setup_veth.sh
+fi
+
+setup
+trap cleanup EXIT
+if [[ "${test}" == "all" ]]; then
+  run_all_tests
+else
+  run_test "${proto}" "${test}"
+fi;
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
new file mode 100755 (executable)
index 0000000..3caf72b
--- /dev/null
@@ -0,0 +1,652 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data
+# consistency directly inside packets on the receiver side. Tests are divided
+# into three categories: OUTPUT (evaluates the IOAM processing by the sender),
+# INPUT (evaluates the IOAM processing by the receiver) and GLOBAL (evaluates
+# wider use cases that do not fall into the other two categories). Both OUTPUT
+# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL
+# tests use the entire three-node topology (alpha, beta, gamma). Each test is
+# documented inside its own handler in the code below.
+#
+# An IOAM domain is configured from Alpha to Gamma but not on the reverse path.
+# When either Beta or Gamma is the destination (depending on the test category),
+# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop.
+#
+#
+#            +-------------------+            +-------------------+
+#            |                   |            |                   |
+#            |    Alpha netns    |            |    Gamma netns    |
+#            |                   |            |                   |
+#            |  +-------------+  |            |  +-------------+  |
+#            |  |    veth0    |  |            |  |    veth0    |  |
+#            |  |  db01::2/64 |  |            |  |  db02::2/64 |  |
+#            |  +-------------+  |            |  +-------------+  |
+#            |         .         |            |         .         |
+#            +-------------------+            +-------------------+
+#                      .                                .
+#                      .                                .
+#                      .                                .
+#            +----------------------------------------------------+
+#            |         .                                .         |
+#            |  +-------------+                  +-------------+  |
+#            |  |    veth0    |                  |    veth1    |  |
+#            |  |  db01::1/64 | ................ |  db02::1/64 |  |
+#            |  +-------------+                  +-------------+  |
+#            |                                                    |
+#            |                      Beta netns                    |
+#            |                                                    |
+#            +----------------------------------------------------+
+#
+#
+#
+#        =============================================================
+#        |                Alpha - IOAM configuration                 |
+#        +===========================================================+
+#        | Node ID             | 1                                   |
+#        +-----------------------------------------------------------+
+#        | Node Wide ID        | 11111111                            |
+#        +-----------------------------------------------------------+
+#        | Ingress ID          | 0xffff (default value)              |
+#        +-----------------------------------------------------------+
+#        | Ingress Wide ID     | 0xffffffff (default value)          |
+#        +-----------------------------------------------------------+
+#        | Egress ID           | 101                                 |
+#        +-----------------------------------------------------------+
+#        | Egress Wide ID      | 101101                              |
+#        +-----------------------------------------------------------+
+#        | Namespace Data      | 0xdeadbee0                          |
+#        +-----------------------------------------------------------+
+#        | Namespace Wide Data | 0xcafec0caf00dc0de                  |
+#        +-----------------------------------------------------------+
+#        | Schema ID           | 777                                 |
+#        +-----------------------------------------------------------+
+#        | Schema Data         | something that will be 4n-aligned   |
+#        +-----------------------------------------------------------+
+#
+#
+#        =============================================================
+#        |                 Beta - IOAM configuration                 |
+#        +===========================================================+
+#        | Node ID             | 2                                   |
+#        +-----------------------------------------------------------+
+#        | Node Wide ID        | 22222222                            |
+#        +-----------------------------------------------------------+
+#        | Ingress ID          | 201                                 |
+#        +-----------------------------------------------------------+
+#        | Ingress Wide ID     | 201201                              |
+#        +-----------------------------------------------------------+
+#        | Egress ID           | 202                                 |
+#        +-----------------------------------------------------------+
+#        | Egress Wide ID      | 202202                              |
+#        +-----------------------------------------------------------+
+#        | Namespace Data      | 0xdeadbee1                          |
+#        +-----------------------------------------------------------+
+#        | Namespace Wide Data | 0xcafec0caf11dc0de                  |
+#        +-----------------------------------------------------------+
+#        | Schema ID           | 666                                 |
+#        +-----------------------------------------------------------+
+#        | Schema Data         | Hello there -Obi                    |
+#        +-----------------------------------------------------------+
+#
+#
+#        =============================================================
+#        |                Gamma - IOAM configuration                 |
+#        +===========================================================+
+#        | Node ID             | 3                                   |
+#        +-----------------------------------------------------------+
+#        | Node Wide ID        | 33333333                            |
+#        +-----------------------------------------------------------+
+#        | Ingress ID          | 301                                 |
+#        +-----------------------------------------------------------+
+#        | Ingress Wide ID     | 301301                              |
+#        +-----------------------------------------------------------+
+#        | Egress ID           | 0xffff (default value)              |
+#        +-----------------------------------------------------------+
+#        | Egress Wide ID      | 0xffffffff (default value)          |
+#        +-----------------------------------------------------------+
+#        | Namespace Data      | 0xdeadbee2                          |
+#        +-----------------------------------------------------------+
+#        | Namespace Wide Data | 0xcafec0caf22dc0de                  |
+#        +-----------------------------------------------------------+
+#        | Schema ID           | 0xffffff (= None)                   |
+#        +-----------------------------------------------------------+
+#        | Schema Data         |                                     |
+#        +-----------------------------------------------------------+
+
+
+################################################################################
+#                                                                              #
+# WARNING: Be careful if you modify the block below - it MUST be kept          #
+#          synchronized with configurations inside ioam6_parser.c and always   #
+#          reflect the same.                                                   #
+#                                                                              #
+################################################################################
+
+ALPHA=(
+       1                                       # ID
+       11111111                                # Wide ID
+       0xffff                                  # Ingress ID
+       0xffffffff                              # Ingress Wide ID
+       101                                     # Egress ID
+       101101                                  # Egress Wide ID
+       0xdeadbee0                              # Namespace Data
+       0xcafec0caf00dc0de                      # Namespace Wide Data
+       777                                     # Schema ID (0xffffff = None)
+       "something that will be 4n-aligned"     # Schema Data
+)
+
+BETA=(
+       2
+       22222222
+       201
+       201201
+       202
+       202202
+       0xdeadbee1
+       0xcafec0caf11dc0de
+       666
+       "Hello there -Obi"
+)
+
+GAMMA=(
+       3
+       33333333
+       301
+       301301
+       0xffff
+       0xffffffff
+       0xdeadbee2
+       0xcafec0caf22dc0de
+       0xffffff
+       ""
+)
+
+TESTS_OUTPUT="
+       out_undef_ns
+       out_no_room
+       out_bits
+       out_full_supp_trace
+"
+
+TESTS_INPUT="
+       in_undef_ns
+       in_no_room
+       in_oflag
+       in_bits
+       in_full_supp_trace
+"
+
+TESTS_GLOBAL="
+       fwd_full_supp_trace
+"
+
+
+################################################################################
+#                                                                              #
+#                                   LIBRARY                                    #
+#                                                                              #
+################################################################################
+
+check_kernel_compatibility()
+{
+  ip netns add ioam-tmp-node
+  ip link add name veth0 netns ioam-tmp-node type veth \
+         peer name veth1 netns ioam-tmp-node
+
+  ip -netns ioam-tmp-node link set veth0 up
+  ip -netns ioam-tmp-node link set veth1 up
+
+  ip -netns ioam-tmp-node ioam namespace add 0 &>/dev/null
+  ns_ad=$?
+
+  ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0"
+  ns_sh=$?
+
+  if [[ $ns_ad != 0 || $ns_sh != 0 ]]
+  then
+    echo "SKIP: kernel version probably too old, missing ioam support"
+    ip link del veth0 2>/dev/null || true
+    ip netns del ioam-tmp-node || true
+    exit 1
+  fi
+
+  ip -netns ioam-tmp-node route add db02::/64 encap ioam6 trace prealloc \
+         type 0x800000 ns 0 size 4 dev veth0 &>/dev/null
+  tr_ad=$?
+
+  ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6 trace"
+  tr_sh=$?
+
+  if [[ $tr_ad != 0 || $tr_sh != 0 ]]
+  then
+    echo "SKIP: cannot attach an ioam trace to a route, did you compile" \
+         "without CONFIG_IPV6_IOAM6_LWTUNNEL?"
+    ip link del veth0 2>/dev/null || true
+    ip netns del ioam-tmp-node || true
+    exit 1
+  fi
+
+  ip link del veth0 2>/dev/null || true
+  ip netns del ioam-tmp-node || true
+}
+
+cleanup()
+{
+  ip link del ioam-veth-alpha 2>/dev/null || true
+  ip link del ioam-veth-gamma 2>/dev/null || true
+
+  ip netns del ioam-node-alpha || true
+  ip netns del ioam-node-beta || true
+  ip netns del ioam-node-gamma || true
+}
+
+setup()
+{
+  ip netns add ioam-node-alpha
+  ip netns add ioam-node-beta
+  ip netns add ioam-node-gamma
+
+  ip link add name ioam-veth-alpha netns ioam-node-alpha type veth \
+         peer name ioam-veth-betaL netns ioam-node-beta
+  ip link add name ioam-veth-betaR netns ioam-node-beta type veth \
+         peer name ioam-veth-gamma netns ioam-node-gamma
+
+  ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0
+  ip -netns ioam-node-beta link set ioam-veth-betaL name veth0
+  ip -netns ioam-node-beta link set ioam-veth-betaR name veth1
+  ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0
+
+  ip -netns ioam-node-alpha addr add db01::2/64 dev veth0
+  ip -netns ioam-node-alpha link set veth0 up
+  ip -netns ioam-node-alpha link set lo up
+  ip -netns ioam-node-alpha route add db02::/64 via db01::1 dev veth0
+  ip -netns ioam-node-alpha route del db01::/64
+  ip -netns ioam-node-alpha route add db01::/64 dev veth0
+
+  ip -netns ioam-node-beta addr add db01::1/64 dev veth0
+  ip -netns ioam-node-beta addr add db02::1/64 dev veth1
+  ip -netns ioam-node-beta link set veth0 up
+  ip -netns ioam-node-beta link set veth1 up
+  ip -netns ioam-node-beta link set lo up
+
+  ip -netns ioam-node-gamma addr add db02::2/64 dev veth0
+  ip -netns ioam-node-gamma link set veth0 up
+  ip -netns ioam-node-gamma link set lo up
+  ip -netns ioam-node-gamma route add db01::/64 via db02::1 dev veth0
+
+  # - IOAM config -
+  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]}
+  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]}
+  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]}
+  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]}
+  ip -netns ioam-node-alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]}
+  ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}"
+  ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]}
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]}
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]}
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]}
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]}
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]}
+  ip -netns ioam-node-beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]}
+  ip -netns ioam-node-beta ioam schema add ${BETA[8]} "${BETA[9]}"
+  ip -netns ioam-node-beta ioam namespace set 123 schema ${BETA[8]}
+
+  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]}
+  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]}
+  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]}
+  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]}
+  ip -netns ioam-node-gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]}
+
+  sleep 1
+
+  ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null
+  if [ $? != 0 ]
+  then
+    echo "Setup FAILED"
+    cleanup &>/dev/null
+    exit 0
+  fi
+}
+
+log_test_passed()
+{
+  local desc=$1
+  printf "TEST: %-60s  [ OK ]\n" "${desc}"
+}
+
+log_test_failed()
+{
+  local desc=$1
+  printf "TEST: %-60s  [FAIL]\n" "${desc}"
+}
+
+run_test()
+{
+  local name=$1
+  local desc=$2
+  local node_src=$3
+  local node_dst=$4
+  local ip6_src=$5
+  local ip6_dst=$6
+  local if_dst=$7
+  local trace_type=$8
+  local ioam_ns=$9
+
+  ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \
+         $trace_type $ioam_ns &
+  local spid=$!
+  sleep 0.1
+
+  ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null
+  if [ $? != 0 ]
+  then
+    log_test_failed "${desc}"
+    kill -2 $spid &>/dev/null
+  else
+    wait $spid
+    [ $? = 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+  fi
+}
+
+run()
+{
+  echo
+  echo "OUTPUT tests"
+  printf "%0.s-" {1..74}
+  echo
+
+  # set OUTPUT settings
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0
+
+  for t in $TESTS_OUTPUT
+  do
+    $t
+  done
+
+  # clean OUTPUT settings
+  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+  echo
+  echo "INPUT tests"
+  printf "%0.s-" {1..74}
+  echo
+
+  # set INPUT settings
+  ip -netns ioam-node-alpha ioam namespace del 123
+
+  for t in $TESTS_INPUT
+  do
+    $t
+  done
+
+  # clean INPUT settings
+  ip -netns ioam-node-alpha ioam namespace add 123 \
+         data ${ALPHA[6]} wide ${ALPHA[7]}
+  ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+  ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+  echo
+  echo "GLOBAL tests"
+  printf "%0.s-" {1..74}
+  echo
+
+  for t in $TESTS_GLOBAL
+  do
+    $t
+  done
+}
+
+bit2type=(
+  0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000
+  0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100
+  0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002
+)
+bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 )
+
+
+################################################################################
+#                                                                              #
+#                              OUTPUT tests                                    #
+#                                                                              #
+#   Two nodes (sender/receiver), IOAM disabled on ingress for the receiver.    #
+################################################################################
+
+out_undef_ns()
+{
+  ##############################################################################
+  # Make sure that the encap node won't fill the trace if the chosen IOAM      #
+  # namespace is not configured locally.                                       #
+  ##############################################################################
+  local desc="Unknown IOAM namespace"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0x800000 ns 0 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0x800000 0
+}
+
+out_no_room()
+{
+  ##############################################################################
+  # Make sure that the encap node won't fill the trace and will set the        #
+  # Overflow flag since there is no room enough for its data.                  #
+  ##############################################################################
+  local desc="Missing trace room"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0xc00000 ns 123 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0xc00000 123
+}
+
+out_bits()
+{
+  ##############################################################################
+  # Make sure that, for each trace type bit, the encap node will either:       #
+  #  (i)  fill the trace with its data when it is a supported bit              #
+  #  (ii) not fill the trace with its data when it is an unsupported bit       #
+  ##############################################################################
+  local desc="Trace type with bit <n> only"
+
+  local tmp=${bit2size[22]}
+  bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
+
+  for i in {0..22}
+  do
+    ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+           prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+
+    run_test "out_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
+           db01::2 db01::1 veth0 ${bit2type[$i]} 123
+  done
+
+  bit2size[22]=$tmp
+}
+
+out_full_supp_trace()
+{
+  ##############################################################################
+  # Make sure that the encap node will correctly fill a full trace. Be careful,#
+  # "full trace" here does NOT mean all bits (only supported ones).            #
+  ##############################################################################
+  local desc="Full supported trace"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0xfff002 ns 123 size 100 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+#                                                                              #
+#                               INPUT tests                                    #
+#                                                                              #
+#     Two nodes (sender/receiver), the sender MUST NOT fill the trace upon     #
+#     insertion -> the IOAM namespace configured on the sender is removed      #
+#     and is used in the inserted trace to force the sender not to fill it.    #
+################################################################################
+
+in_undef_ns()
+{
+  ##############################################################################
+  # Make sure that the receiving node won't fill the trace if the related IOAM #
+  # namespace is not configured locally.                                       #
+  ##############################################################################
+  local desc="Unknown IOAM namespace"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0x800000 ns 0 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0x800000 0
+}
+
+in_no_room()
+{
+  ##############################################################################
+  # Make sure that the receiving node won't fill the trace and will set the    #
+  # Overflow flag if there is no room enough for its data.                     #
+  ##############################################################################
+  local desc="Missing trace room"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0xc00000 ns 123 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0xc00000 123
+}
+
+in_bits()
+{
+  ##############################################################################
+  # Make sure that, for each trace type bit, the receiving node will either:   #
+  #  (i)  fill the trace with its data when it is a supported bit              #
+  #  (ii) not fill the trace with its data when it is an unsupported bit       #
+  ##############################################################################
+  local desc="Trace type with bit <n> only"
+
+  local tmp=${bit2size[22]}
+  bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
+
+  for i in {0..22}
+  do
+    ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+           prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+
+    run_test "in_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
+           db01::2 db01::1 veth0 ${bit2type[$i]} 123
+  done
+
+  bit2size[22]=$tmp
+}
+
+in_oflag()
+{
+  ##############################################################################
+  # Make sure that the receiving node won't fill the trace since the Overflow  #
+  # flag is set.                                                               #
+  ##############################################################################
+  local desc="Overflow flag is set"
+
+  # Exception:
+  #   Here, we need the sender to set the Overflow flag. For that, we will add
+  #   back the IOAM namespace that was previously configured on the sender.
+  ip -netns ioam-node-alpha ioam namespace add 123
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0xc00000 ns 123 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0xc00000 123
+
+  # And we clean the exception for this test to get things back to normal for
+  # other INPUT tests
+  ip -netns ioam-node-alpha ioam namespace del 123
+}
+
+in_full_supp_trace()
+{
+  ##############################################################################
+  # Make sure that the receiving node will correctly fill a full trace. Be     #
+  # careful, "full trace" here does NOT mean all bits (only supported ones).   #
+  ##############################################################################
+  local desc="Full supported trace"
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+         type 0xfff002 ns 123 size 80 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+         db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+#                                                                              #
+#                              GLOBAL tests                                    #
+#                                                                              #
+#   Three nodes (sender/router/receiver), IOAM fully enabled on every node.    #
+################################################################################
+
+fwd_full_supp_trace()
+{
+  ##############################################################################
+  # Make sure that all three nodes correctly filled the full supported trace   #
+  # by checking that the trace data is consistent with the predefined config.  #
+  ##############################################################################
+  local desc="Forward - Full supported trace"
+
+  ip -netns ioam-node-alpha route change db02::/64 encap ioam6 trace prealloc \
+         type 0xfff002 ns 123 size 244 via db01::1 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-gamma db01::2 \
+         db02::2 veth0 0xfff002 123
+}
+
+
+################################################################################
+#                                                                              #
+#                                     MAIN                                     #
+#                                                                              #
+################################################################################
+
+if [ "$(id -u)" -ne 0 ]
+then
+  echo "SKIP: Need root privileges"
+  exit 1
+fi
+
+if [ ! -x "$(command -v ip)" ]
+then
+  echo "SKIP: Could not run test without ip tool"
+  exit 1
+fi
+
+ip ioam &>/dev/null
+if [ $? = 1 ]
+then
+  echo "SKIP: iproute2 too old, missing ioam command"
+  exit 1
+fi
+
+check_kernel_compatibility
+
+cleanup &>/dev/null
+setup
+run
+cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
new file mode 100644 (file)
index 0000000..d376cb2
--- /dev/null
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Author: Justin Iurman (justin.iurman@uliege.be)
+ *
+ * IOAM tester for IPv6, see ioam6.sh for details on each test case.
+ */
+#include <arpa/inet.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/const.h>
+#include <linux/if_ether.h>
+#include <linux/ioam6.h>
+#include <linux/ipv6.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+struct ioam_config {
+       __u32 id;
+       __u64 wide;
+       __u16 ingr_id;
+       __u16 egr_id;
+       __u32 ingr_wide;
+       __u32 egr_wide;
+       __u32 ns_data;
+       __u64 ns_wide;
+       __u32 sc_id;
+       __u8 hlim;
+       char *sc_data;
+};
+
+/*
+ * Be careful if you modify structs below - everything MUST be kept synchronized
+ * with configurations inside ioam6.sh and always reflect the same.
+ */
+
+static struct ioam_config node1 = {
+       .id = 1,
+       .wide = 11111111,
+       .ingr_id = 0xffff, /* default value */
+       .egr_id = 101,
+       .ingr_wide = 0xffffffff, /* default value */
+       .egr_wide = 101101,
+       .ns_data = 0xdeadbee0,
+       .ns_wide = 0xcafec0caf00dc0de,
+       .sc_id = 777,
+       .sc_data = "something that will be 4n-aligned",
+       .hlim = 64,
+};
+
+static struct ioam_config node2 = {
+       .id = 2,
+       .wide = 22222222,
+       .ingr_id = 201,
+       .egr_id = 202,
+       .ingr_wide = 201201,
+       .egr_wide = 202202,
+       .ns_data = 0xdeadbee1,
+       .ns_wide = 0xcafec0caf11dc0de,
+       .sc_id = 666,
+       .sc_data = "Hello there -Obi",
+       .hlim = 63,
+};
+
+static struct ioam_config node3 = {
+       .id = 3,
+       .wide = 33333333,
+       .ingr_id = 301,
+       .egr_id = 0xffff, /* default value */
+       .ingr_wide = 301301,
+       .egr_wide = 0xffffffff, /* default value */
+       .ns_data = 0xdeadbee2,
+       .ns_wide = 0xcafec0caf22dc0de,
+       .sc_id = 0xffffff, /* default value */
+       .sc_data = NULL,
+       .hlim = 62,
+};
+
+enum {
+       /**********
+        * OUTPUT *
+        **********/
+       TEST_OUT_UNDEF_NS,
+       TEST_OUT_NO_ROOM,
+       TEST_OUT_BIT0,
+       TEST_OUT_BIT1,
+       TEST_OUT_BIT2,
+       TEST_OUT_BIT3,
+       TEST_OUT_BIT4,
+       TEST_OUT_BIT5,
+       TEST_OUT_BIT6,
+       TEST_OUT_BIT7,
+       TEST_OUT_BIT8,
+       TEST_OUT_BIT9,
+       TEST_OUT_BIT10,
+       TEST_OUT_BIT11,
+       TEST_OUT_BIT12,
+       TEST_OUT_BIT13,
+       TEST_OUT_BIT14,
+       TEST_OUT_BIT15,
+       TEST_OUT_BIT16,
+       TEST_OUT_BIT17,
+       TEST_OUT_BIT18,
+       TEST_OUT_BIT19,
+       TEST_OUT_BIT20,
+       TEST_OUT_BIT21,
+       TEST_OUT_BIT22,
+       TEST_OUT_FULL_SUPP_TRACE,
+
+       /*********
+        * INPUT *
+        *********/
+       TEST_IN_UNDEF_NS,
+       TEST_IN_NO_ROOM,
+       TEST_IN_OFLAG,
+       TEST_IN_BIT0,
+       TEST_IN_BIT1,
+       TEST_IN_BIT2,
+       TEST_IN_BIT3,
+       TEST_IN_BIT4,
+       TEST_IN_BIT5,
+       TEST_IN_BIT6,
+       TEST_IN_BIT7,
+       TEST_IN_BIT8,
+       TEST_IN_BIT9,
+       TEST_IN_BIT10,
+       TEST_IN_BIT11,
+       TEST_IN_BIT12,
+       TEST_IN_BIT13,
+       TEST_IN_BIT14,
+       TEST_IN_BIT15,
+       TEST_IN_BIT16,
+       TEST_IN_BIT17,
+       TEST_IN_BIT18,
+       TEST_IN_BIT19,
+       TEST_IN_BIT20,
+       TEST_IN_BIT21,
+       TEST_IN_BIT22,
+       TEST_IN_FULL_SUPP_TRACE,
+
+       /**********
+        * GLOBAL *
+        **********/
+       TEST_FWD_FULL_SUPP_TRACE,
+
+       __TEST_MAX,
+};
+
+static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
+                            __u32 trace_type, __u16 ioam_ns)
+{
+       if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns ||
+           __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8))
+               return 1;
+
+       switch (tid) {
+       case TEST_OUT_UNDEF_NS:
+       case TEST_IN_UNDEF_NS:
+               return ioam6h->overflow ||
+                      ioam6h->nodelen != 1 ||
+                      ioam6h->remlen != 1;
+
+       case TEST_OUT_NO_ROOM:
+       case TEST_IN_NO_ROOM:
+       case TEST_IN_OFLAG:
+               return !ioam6h->overflow ||
+                      ioam6h->nodelen != 2 ||
+                      ioam6h->remlen != 1;
+
+       case TEST_OUT_BIT0:
+       case TEST_IN_BIT0:
+       case TEST_OUT_BIT1:
+       case TEST_IN_BIT1:
+       case TEST_OUT_BIT2:
+       case TEST_IN_BIT2:
+       case TEST_OUT_BIT3:
+       case TEST_IN_BIT3:
+       case TEST_OUT_BIT4:
+       case TEST_IN_BIT4:
+       case TEST_OUT_BIT5:
+       case TEST_IN_BIT5:
+       case TEST_OUT_BIT6:
+       case TEST_IN_BIT6:
+       case TEST_OUT_BIT7:
+       case TEST_IN_BIT7:
+       case TEST_OUT_BIT11:
+       case TEST_IN_BIT11:
+               return ioam6h->overflow ||
+                      ioam6h->nodelen != 1 ||
+                      ioam6h->remlen;
+
+       case TEST_OUT_BIT8:
+       case TEST_IN_BIT8:
+       case TEST_OUT_BIT9:
+       case TEST_IN_BIT9:
+       case TEST_OUT_BIT10:
+       case TEST_IN_BIT10:
+               return ioam6h->overflow ||
+                      ioam6h->nodelen != 2 ||
+                      ioam6h->remlen;
+
+       case TEST_OUT_BIT12:
+       case TEST_IN_BIT12:
+       case TEST_OUT_BIT13:
+       case TEST_IN_BIT13:
+       case TEST_OUT_BIT14:
+       case TEST_IN_BIT14:
+       case TEST_OUT_BIT15:
+       case TEST_IN_BIT15:
+       case TEST_OUT_BIT16:
+       case TEST_IN_BIT16:
+       case TEST_OUT_BIT17:
+       case TEST_IN_BIT17:
+       case TEST_OUT_BIT18:
+       case TEST_IN_BIT18:
+       case TEST_OUT_BIT19:
+       case TEST_IN_BIT19:
+       case TEST_OUT_BIT20:
+       case TEST_IN_BIT20:
+       case TEST_OUT_BIT21:
+       case TEST_IN_BIT21:
+               return ioam6h->overflow ||
+                      ioam6h->nodelen ||
+                      ioam6h->remlen != 1;
+
+       case TEST_OUT_BIT22:
+       case TEST_IN_BIT22:
+               return ioam6h->overflow ||
+                      ioam6h->nodelen ||
+                      ioam6h->remlen;
+
+       case TEST_OUT_FULL_SUPP_TRACE:
+       case TEST_IN_FULL_SUPP_TRACE:
+       case TEST_FWD_FULL_SUPP_TRACE:
+               return ioam6h->overflow ||
+                      ioam6h->nodelen != 15 ||
+                      ioam6h->remlen;
+
+       default:
+               break;
+       }
+
+       return 1;
+}
+
+static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
+                           const struct ioam_config cnf)
+{
+       unsigned int len;
+       __u8 aligned;
+       __u64 raw64;
+       __u32 raw32;
+
+       if (ioam6h->type.bit0) {
+               raw32 = __be32_to_cpu(*((__u32 *)*p));
+               if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff))
+                       return 1;
+               *p += sizeof(__u32);
+       }
+
+       if (ioam6h->type.bit1) {
+               raw32 = __be32_to_cpu(*((__u32 *)*p));
+               if (cnf.ingr_id != (raw32 >> 16) ||
+                   cnf.egr_id != (raw32 & 0xffff))
+                       return 1;
+               *p += sizeof(__u32);
+       }
+
+       if (ioam6h->type.bit2)
+               *p += sizeof(__u32);
+
+       if (ioam6h->type.bit3)
+               *p += sizeof(__u32);
+
+       if (ioam6h->type.bit4) {
+               if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+                       return 1;
+               *p += sizeof(__u32);
+       }
+
+       if (ioam6h->type.bit5) {
+               if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data)
+                       return 1;
+               *p += sizeof(__u32);
+       }
+
+       if (ioam6h->type.bit6) {
+               if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+                       return 1;
+               *p += sizeof(__u32);
+       }
+
+       if (ioam6h->type.bit7) {
+               if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+                       return 1;
+               *p += sizeof(__u32);
+       }
+
+       if (ioam6h->type.bit8) {
+               raw64 = __be64_to_cpu(*((__u64 *)*p));
+               if (cnf.hlim != (raw64 >> 56) ||
+                   cnf.wide != (raw64 & 0xffffffffffffff))
+                       return 1;
+               *p += sizeof(__u64);
+       }
+
+       if (ioam6h->type.bit9) {
+               if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide)
+                       return 1;
+               *p += sizeof(__u32);
+
+               if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide)
+                       return 1;
+               *p += sizeof(__u32);
+       }
+
+       if (ioam6h->type.bit10) {
+               if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide)
+                       return 1;
+               *p += sizeof(__u64);
+       }
+
+       if (ioam6h->type.bit11) {
+               if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+                       return 1;
+               *p += sizeof(__u32);
+       }
+
+       if (ioam6h->type.bit22) {
+               len = cnf.sc_data ? strlen(cnf.sc_data) : 0;
+               aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0;
+
+               raw32 = __be32_to_cpu(*((__u32 *)*p));
+               if (aligned != (raw32 >> 24) * 4 ||
+                   cnf.sc_id != (raw32 & 0xffffff))
+                       return 1;
+               *p += sizeof(__u32);
+
+               if (cnf.sc_data) {
+                       if (strncmp((char *)*p, cnf.sc_data, len))
+                               return 1;
+
+                       *p += len;
+                       aligned -= len;
+
+                       while (aligned--) {
+                               if (**p != '\0')
+                                       return 1;
+                               *p += sizeof(__u8);
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h,
+                                     __u32 trace_type, __u16 ioam_ns)
+{
+       __u8 *p;
+
+       if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns))
+               return 1;
+
+       p = ioam6h->data + ioam6h->remlen * 4;
+
+       switch (tid) {
+       case TEST_OUT_BIT0:
+       case TEST_OUT_BIT1:
+       case TEST_OUT_BIT2:
+       case TEST_OUT_BIT3:
+       case TEST_OUT_BIT4:
+       case TEST_OUT_BIT5:
+       case TEST_OUT_BIT6:
+       case TEST_OUT_BIT7:
+       case TEST_OUT_BIT8:
+       case TEST_OUT_BIT9:
+       case TEST_OUT_BIT10:
+       case TEST_OUT_BIT11:
+       case TEST_OUT_BIT22:
+       case TEST_OUT_FULL_SUPP_TRACE:
+               return check_ioam6_data(&p, ioam6h, node1);
+
+       case TEST_IN_BIT0:
+       case TEST_IN_BIT1:
+       case TEST_IN_BIT2:
+       case TEST_IN_BIT3:
+       case TEST_IN_BIT4:
+       case TEST_IN_BIT5:
+       case TEST_IN_BIT6:
+       case TEST_IN_BIT7:
+       case TEST_IN_BIT8:
+       case TEST_IN_BIT9:
+       case TEST_IN_BIT10:
+       case TEST_IN_BIT11:
+       case TEST_IN_BIT22:
+       case TEST_IN_FULL_SUPP_TRACE:
+       {
+               __u32 tmp32 = node2.egr_wide;
+               __u16 tmp16 = node2.egr_id;
+               int res;
+
+               node2.egr_id = 0xffff;
+               node2.egr_wide = 0xffffffff;
+
+               res = check_ioam6_data(&p, ioam6h, node2);
+
+               node2.egr_id = tmp16;
+               node2.egr_wide = tmp32;
+
+               return res;
+       }
+
+       case TEST_FWD_FULL_SUPP_TRACE:
+               if (check_ioam6_data(&p, ioam6h, node3))
+                       return 1;
+               if (check_ioam6_data(&p, ioam6h, node2))
+                       return 1;
+               return check_ioam6_data(&p, ioam6h, node1);
+
+       default:
+               break;
+       }
+
+       return 1;
+}
+
+static int str2id(const char *tname)
+{
+       if (!strcmp("out_undef_ns", tname))
+               return TEST_OUT_UNDEF_NS;
+       if (!strcmp("out_no_room", tname))
+               return TEST_OUT_NO_ROOM;
+       if (!strcmp("out_bit0", tname))
+               return TEST_OUT_BIT0;
+       if (!strcmp("out_bit1", tname))
+               return TEST_OUT_BIT1;
+       if (!strcmp("out_bit2", tname))
+               return TEST_OUT_BIT2;
+       if (!strcmp("out_bit3", tname))
+               return TEST_OUT_BIT3;
+       if (!strcmp("out_bit4", tname))
+               return TEST_OUT_BIT4;
+       if (!strcmp("out_bit5", tname))
+               return TEST_OUT_BIT5;
+       if (!strcmp("out_bit6", tname))
+               return TEST_OUT_BIT6;
+       if (!strcmp("out_bit7", tname))
+               return TEST_OUT_BIT7;
+       if (!strcmp("out_bit8", tname))
+               return TEST_OUT_BIT8;
+       if (!strcmp("out_bit9", tname))
+               return TEST_OUT_BIT9;
+       if (!strcmp("out_bit10", tname))
+               return TEST_OUT_BIT10;
+       if (!strcmp("out_bit11", tname))
+               return TEST_OUT_BIT11;
+       if (!strcmp("out_bit12", tname))
+               return TEST_OUT_BIT12;
+       if (!strcmp("out_bit13", tname))
+               return TEST_OUT_BIT13;
+       if (!strcmp("out_bit14", tname))
+               return TEST_OUT_BIT14;
+       if (!strcmp("out_bit15", tname))
+               return TEST_OUT_BIT15;
+       if (!strcmp("out_bit16", tname))
+               return TEST_OUT_BIT16;
+       if (!strcmp("out_bit17", tname))
+               return TEST_OUT_BIT17;
+       if (!strcmp("out_bit18", tname))
+               return TEST_OUT_BIT18;
+       if (!strcmp("out_bit19", tname))
+               return TEST_OUT_BIT19;
+       if (!strcmp("out_bit20", tname))
+               return TEST_OUT_BIT20;
+       if (!strcmp("out_bit21", tname))
+               return TEST_OUT_BIT21;
+       if (!strcmp("out_bit22", tname))
+               return TEST_OUT_BIT22;
+       if (!strcmp("out_full_supp_trace", tname))
+               return TEST_OUT_FULL_SUPP_TRACE;
+       if (!strcmp("in_undef_ns", tname))
+               return TEST_IN_UNDEF_NS;
+       if (!strcmp("in_no_room", tname))
+               return TEST_IN_NO_ROOM;
+       if (!strcmp("in_oflag", tname))
+               return TEST_IN_OFLAG;
+       if (!strcmp("in_bit0", tname))
+               return TEST_IN_BIT0;
+       if (!strcmp("in_bit1", tname))
+               return TEST_IN_BIT1;
+       if (!strcmp("in_bit2", tname))
+               return TEST_IN_BIT2;
+       if (!strcmp("in_bit3", tname))
+               return TEST_IN_BIT3;
+       if (!strcmp("in_bit4", tname))
+               return TEST_IN_BIT4;
+       if (!strcmp("in_bit5", tname))
+               return TEST_IN_BIT5;
+       if (!strcmp("in_bit6", tname))
+               return TEST_IN_BIT6;
+       if (!strcmp("in_bit7", tname))
+               return TEST_IN_BIT7;
+       if (!strcmp("in_bit8", tname))
+               return TEST_IN_BIT8;
+       if (!strcmp("in_bit9", tname))
+               return TEST_IN_BIT9;
+       if (!strcmp("in_bit10", tname))
+               return TEST_IN_BIT10;
+       if (!strcmp("in_bit11", tname))
+               return TEST_IN_BIT11;
+       if (!strcmp("in_bit12", tname))
+               return TEST_IN_BIT12;
+       if (!strcmp("in_bit13", tname))
+               return TEST_IN_BIT13;
+       if (!strcmp("in_bit14", tname))
+               return TEST_IN_BIT14;
+       if (!strcmp("in_bit15", tname))
+               return TEST_IN_BIT15;
+       if (!strcmp("in_bit16", tname))
+               return TEST_IN_BIT16;
+       if (!strcmp("in_bit17", tname))
+               return TEST_IN_BIT17;
+       if (!strcmp("in_bit18", tname))
+               return TEST_IN_BIT18;
+       if (!strcmp("in_bit19", tname))
+               return TEST_IN_BIT19;
+       if (!strcmp("in_bit20", tname))
+               return TEST_IN_BIT20;
+       if (!strcmp("in_bit21", tname))
+               return TEST_IN_BIT21;
+       if (!strcmp("in_bit22", tname))
+               return TEST_IN_BIT22;
+       if (!strcmp("in_full_supp_trace", tname))
+               return TEST_IN_FULL_SUPP_TRACE;
+       if (!strcmp("fwd_full_supp_trace", tname))
+               return TEST_FWD_FULL_SUPP_TRACE;
+
+       return -1;
+}
+
+static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+       return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
+               (a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
+               (a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
+               (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0;
+}
+
+static int get_u32(__u32 *val, const char *arg, int base)
+{
+       unsigned long res;
+       char *ptr;
+
+       if (!arg || !*arg)
+               return -1;
+       res = strtoul(arg, &ptr, base);
+
+       if (!ptr || ptr == arg || *ptr)
+               return -1;
+
+       if (res == ULONG_MAX && errno == ERANGE)
+               return -1;
+
+       if (res > 0xFFFFFFFFUL)
+               return -1;
+
+       *val = res;
+       return 0;
+}
+
+static int get_u16(__u16 *val, const char *arg, int base)
+{
+       unsigned long res;
+       char *ptr;
+
+       if (!arg || !*arg)
+               return -1;
+       res = strtoul(arg, &ptr, base);
+
+       if (!ptr || ptr == arg || *ptr)
+               return -1;
+
+       if (res == ULONG_MAX && errno == ERANGE)
+               return -1;
+
+       if (res > 0xFFFFUL)
+               return -1;
+
+       *val = res;
+       return 0;
+}
+
+static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
+       [TEST_OUT_UNDEF_NS]             = check_ioam_header,
+       [TEST_OUT_NO_ROOM]              = check_ioam_header,
+       [TEST_OUT_BIT0]         = check_ioam_header_and_data,
+       [TEST_OUT_BIT1]         = check_ioam_header_and_data,
+       [TEST_OUT_BIT2]         = check_ioam_header_and_data,
+       [TEST_OUT_BIT3]         = check_ioam_header_and_data,
+       [TEST_OUT_BIT4]         = check_ioam_header_and_data,
+       [TEST_OUT_BIT5]         = check_ioam_header_and_data,
+       [TEST_OUT_BIT6]         = check_ioam_header_and_data,
+       [TEST_OUT_BIT7]         = check_ioam_header_and_data,
+       [TEST_OUT_BIT8]         = check_ioam_header_and_data,
+       [TEST_OUT_BIT9]         = check_ioam_header_and_data,
+       [TEST_OUT_BIT10]                = check_ioam_header_and_data,
+       [TEST_OUT_BIT11]                = check_ioam_header_and_data,
+       [TEST_OUT_BIT12]                = check_ioam_header,
+       [TEST_OUT_BIT13]                = check_ioam_header,
+       [TEST_OUT_BIT14]                = check_ioam_header,
+       [TEST_OUT_BIT15]                = check_ioam_header,
+       [TEST_OUT_BIT16]                = check_ioam_header,
+       [TEST_OUT_BIT17]                = check_ioam_header,
+       [TEST_OUT_BIT18]                = check_ioam_header,
+       [TEST_OUT_BIT19]                = check_ioam_header,
+       [TEST_OUT_BIT20]                = check_ioam_header,
+       [TEST_OUT_BIT21]                = check_ioam_header,
+       [TEST_OUT_BIT22]                = check_ioam_header_and_data,
+       [TEST_OUT_FULL_SUPP_TRACE]      = check_ioam_header_and_data,
+       [TEST_IN_UNDEF_NS]              = check_ioam_header,
+       [TEST_IN_NO_ROOM]               = check_ioam_header,
+       [TEST_IN_OFLAG]         = check_ioam_header,
+       [TEST_IN_BIT0]                  = check_ioam_header_and_data,
+       [TEST_IN_BIT1]                  = check_ioam_header_and_data,
+       [TEST_IN_BIT2]                  = check_ioam_header_and_data,
+       [TEST_IN_BIT3]                  = check_ioam_header_and_data,
+       [TEST_IN_BIT4]                  = check_ioam_header_and_data,
+       [TEST_IN_BIT5]                  = check_ioam_header_and_data,
+       [TEST_IN_BIT6]                  = check_ioam_header_and_data,
+       [TEST_IN_BIT7]                  = check_ioam_header_and_data,
+       [TEST_IN_BIT8]                  = check_ioam_header_and_data,
+       [TEST_IN_BIT9]                  = check_ioam_header_and_data,
+       [TEST_IN_BIT10]         = check_ioam_header_and_data,
+       [TEST_IN_BIT11]         = check_ioam_header_and_data,
+       [TEST_IN_BIT12]         = check_ioam_header,
+       [TEST_IN_BIT13]         = check_ioam_header,
+       [TEST_IN_BIT14]         = check_ioam_header,
+       [TEST_IN_BIT15]         = check_ioam_header,
+       [TEST_IN_BIT16]         = check_ioam_header,
+       [TEST_IN_BIT17]         = check_ioam_header,
+       [TEST_IN_BIT18]         = check_ioam_header,
+       [TEST_IN_BIT19]         = check_ioam_header,
+       [TEST_IN_BIT20]         = check_ioam_header,
+       [TEST_IN_BIT21]         = check_ioam_header,
+       [TEST_IN_BIT22]         = check_ioam_header_and_data,
+       [TEST_IN_FULL_SUPP_TRACE]       = check_ioam_header_and_data,
+       [TEST_FWD_FULL_SUPP_TRACE]      = check_ioam_header_and_data,
+};
+
+int main(int argc, char **argv)
+{
+       int fd, size, hoplen, tid, ret = 1;
+       struct in6_addr src, dst;
+       struct ioam6_hdr *opt;
+       struct ipv6hdr *ip6h;
+       __u8 buffer[400], *p;
+       __u16 ioam_ns;
+       __u32 tr_type;
+
+       if (argc != 7)
+               goto out;
+
+       tid = str2id(argv[2]);
+       if (tid < 0 || !func[tid])
+               goto out;
+
+       if (inet_pton(AF_INET6, argv[3], &src) != 1 ||
+           inet_pton(AF_INET6, argv[4], &dst) != 1)
+               goto out;
+
+       if (get_u32(&tr_type, argv[5], 16) ||
+           get_u16(&ioam_ns, argv[6], 0))
+               goto out;
+
+       fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6));
+       if (!fd)
+               goto out;
+
+       if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+                      argv[1], strlen(argv[1])))
+               goto close;
+
+recv:
+       size = recv(fd, buffer, sizeof(buffer), 0);
+       if (size <= 0)
+               goto close;
+
+       ip6h = (struct ipv6hdr *)buffer;
+
+       if (!ipv6_addr_equal(&ip6h->saddr, &src) ||
+           !ipv6_addr_equal(&ip6h->daddr, &dst))
+               goto recv;
+
+       if (ip6h->nexthdr != IPPROTO_HOPOPTS)
+               goto close;
+
+       p = buffer + sizeof(*ip6h);
+       hoplen = (p[1] + 1) << 3;
+       p += sizeof(struct ipv6_hopopt_hdr);
+
+       while (hoplen > 0) {
+               opt = (struct ioam6_hdr *)p;
+
+               if (opt->opt_type == IPV6_TLV_IOAM &&
+                   opt->type == IOAM6_TYPE_PREALLOC) {
+                       p += sizeof(*opt);
+                       ret = func[tid](tid, (struct ioam6_trace_hdr *)p,
+                                          tr_type, ioam_ns);
+                       break;
+               }
+
+               p += opt->opt_len + 2;
+               hoplen -= opt->opt_len + 2;
+       }
+close:
+       close(fd);
+out:
+       return ret;
+}
index f02f4de..255793c 100755 (executable)
@@ -3,8 +3,10 @@
 
 ret=0
 sin=""
+sinfail=""
 sout=""
 cin=""
+cinfail=""
 cinsent=""
 cout=""
 ksft_skip=4
@@ -76,6 +78,14 @@ init()
        done
 }
 
+init_shapers()
+{
+       for i in `seq 1 4`; do
+               tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
+               tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
+       done
+}
+
 cleanup_partial()
 {
        rm -f "$capout"
@@ -88,8 +98,8 @@ cleanup_partial()
 
 cleanup()
 {
-       rm -f "$cin" "$cout"
-       rm -f "$sin" "$sout" "$cinsent"
+       rm -f "$cin" "$cout" "$sinfail"
+       rm -f "$sin" "$sout" "$cinsent" "$cinfail"
        cleanup_partial
 }
 
@@ -211,11 +221,15 @@ link_failure()
 {
        ns="$1"
 
-       l=$((RANDOM%4))
-       l=$((l+1))
+       if [ -z "$FAILING_LINKS" ]; then
+               l=$((RANDOM%4))
+               FAILING_LINKS=$((l+1))
+       fi
 
-       veth="ns1eth$l"
-       ip -net "$ns" link set "$veth" down
+       for l in $FAILING_LINKS; do
+               veth="ns1eth$l"
+               ip -net "$ns" link set "$veth" down
+       done
 }
 
 # $1: IP address
@@ -280,10 +294,17 @@ do_transfer()
                local_addr="0.0.0.0"
        fi
 
-       timeout ${timeout_test} \
-               ip netns exec ${listener_ns} \
-                       $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-                               ${local_addr} < "$sin" > "$sout" &
+       if [ "$test_link_fail" -eq 2 ];then
+               timeout ${timeout_test} \
+                       ip netns exec ${listener_ns} \
+                               $mptcp_connect -t ${timeout_poll} -l -p $port -s ${cl_proto} \
+                                       ${local_addr} < "$sinfail" > "$sout" &
+       else
+               timeout ${timeout_test} \
+                       ip netns exec ${listener_ns} \
+                               $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+                                       ${local_addr} < "$sin" > "$sout" &
+       fi
        spid=$!
 
        sleep 1
@@ -294,7 +315,7 @@ do_transfer()
                                $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
                                        $connect_addr < "$cin" > "$cout" &
        else
-               ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \
+               ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
                        tee "$cinsent" | \
                        timeout ${timeout_test} \
                                ip netns exec ${connector_ns} \
@@ -323,17 +344,18 @@ do_transfer()
                let rm_nr_ns1=-addr_nr_ns1
                if [ $rm_nr_ns1 -lt 8 ]; then
                        counter=1
+                       pos=1
                        dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
                        if [ ${#dump[@]} -gt 0 ]; then
-                               id=${dump[1]}
                                sleep 1
 
                                while [ $counter -le $rm_nr_ns1 ]
                                do
+                                       id=${dump[$pos]}
                                        ip netns exec ${listener_ns} ./pm_nl_ctl del $id
                                        sleep 1
                                        let counter+=1
-                                       let id+=1
+                                       let pos+=5
                                done
                        fi
                elif [ $rm_nr_ns1 -eq 8 ]; then
@@ -345,6 +367,12 @@ do_transfer()
                fi
        fi
 
+       flags="subflow"
+       if [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
+               flags="${flags},fullmesh"
+               addr_nr_ns2=${addr_nr_ns2:9}
+       fi
+
        if [ $addr_nr_ns2 -gt 0 ]; then
                let add_nr_ns2=addr_nr_ns2
                counter=3
@@ -356,7 +384,7 @@ do_transfer()
                        else
                                addr="10.0.$counter.2"
                        fi
-                       ip netns exec $ns2 ./pm_nl_ctl add $addr flags subflow
+                       ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags
                        let counter+=1
                        let add_nr_ns2-=1
                done
@@ -365,17 +393,18 @@ do_transfer()
                let rm_nr_ns2=-addr_nr_ns2
                if [ $rm_nr_ns2 -lt 8 ]; then
                        counter=1
+                       pos=1
                        dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
                        if [ ${#dump[@]} -gt 0 ]; then
-                               id=${dump[1]}
                                sleep 1
 
                                while [ $counter -le $rm_nr_ns2 ]
                                do
+                                       id=${dump[$pos]}
                                        ip netns exec ${connector_ns} ./pm_nl_ctl del $id
                                        sleep 1
                                        let counter+=1
-                                       let id+=1
+                                       let pos+=5
                                done
                        fi
                elif [ $rm_nr_ns2 -eq 8 ]; then
@@ -434,7 +463,11 @@ do_transfer()
                return 1
        fi
 
-       check_transfer $sin $cout "file received by client"
+       if [ "$test_link_fail" -eq 2 ];then
+               check_transfer $sinfail $cout "file received by client"
+       else
+               check_transfer $sin $cout "file received by client"
+       fi
        retc=$?
        if [ "$test_link_fail" -eq 0 ];then
                check_transfer $cin $sout "file received by server"
@@ -477,29 +510,33 @@ run_tests()
        lret=0
        oldin=""
 
-       if [ "$test_linkfail" -eq 1 ];then
-               size=$((RANDOM%1024))
+       # create the input file for the failure test when
+       # the first failure test run
+       if [ "$test_linkfail" -ne 0 -a -z "$cinfail" ]; then
+               # the client file must be considerably larger
+               # of the maximum expected cwin value, or the
+               # link utilization will be not predicable
+               size=$((RANDOM%2))
                size=$((size+1))
-               size=$((size*128))
+               size=$((size*8192))
+               size=$((size + ( $RANDOM % 8192) ))
 
-               oldin=$(mktemp)
-               cp "$cin" "$oldin"
-               make_file "$cin" "client" $size
+               cinfail=$(mktemp)
+               make_file "$cinfail" "client" $size
        fi
 
-       do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
-               ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
-       lret=$?
+       if [ "$test_linkfail" -eq 2 -a -z "$sinfail" ]; then
+               size=$((RANDOM%16))
+               size=$((size+1))
+               size=$((size*2048))
 
-       if [ "$test_linkfail" -eq 1 ];then
-               cp "$oldin" "$cin"
-               rm -f "$oldin"
+               sinfail=$(mktemp)
+               make_file "$sinfail" "server" $size
        fi
 
-       if [ $lret -ne 0 ]; then
-               ret=$lret
-               return
-       fi
+       do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
+               ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
+       lret=$?
 }
 
 chk_csum_nr()
@@ -541,6 +578,43 @@ chk_csum_nr()
        fi
 }
 
+chk_fail_nr()
+{
+       local mp_fail_nr_tx=$1
+       local mp_fail_nr_rx=$2
+       local count
+       local dump_stats
+
+       printf "%-39s %s" " " "ftx"
+       count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
+       [ -z "$count" ] && count=0
+       if [ "$count" != "$mp_fail_nr_tx" ]; then
+               echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
+               ret=1
+               dump_stats=1
+       else
+               echo -n "[ ok ]"
+       fi
+
+       echo -n " - frx   "
+       count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
+       [ -z "$count" ] && count=0
+       if [ "$count" != "$mp_fail_nr_rx" ]; then
+               echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
+               ret=1
+               dump_stats=1
+       else
+               echo "[ ok ]"
+       fi
+
+       if [ "${dump_stats}" = 1 ]; then
+               echo Server ns stats
+               ip netns exec $ns1 nstat -as | grep MPTcp
+               echo Client ns stats
+               ip netns exec $ns2 nstat -as | grep MPTcp
+       fi
+}
+
 chk_join_nr()
 {
        local msg="$1"
@@ -590,6 +664,47 @@ chk_join_nr()
        fi
        if [ $checksum -eq 1 ]; then
                chk_csum_nr
+               chk_fail_nr 0 0
+       fi
+}
+
+# a negative value for 'stale_max' means no upper bound:
+# for bidirectional transfer, if one peer sleep for a while
+# - as these tests do - we can have a quite high number of
+# stale/recover conversions, proportional to
+# sleep duration/ MPTCP-level RTX interval.
+chk_stale_nr()
+{
+       local ns=$1
+       local stale_min=$2
+       local stale_max=$3
+       local stale_delta=$4
+       local dump_stats
+       local stale_nr
+       local recover_nr
+
+       printf "%-39s %-18s" " " "stale"
+       stale_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}'`
+       [ -z "$stale_nr" ] && stale_nr=0
+       recover_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}'`
+       [ -z "$recover_nr" ] && recover_nr=0
+
+       if [ $stale_nr -lt $stale_min ] ||
+          [ $stale_max -gt 0 -a $stale_nr -gt $stale_max ] ||
+          [ $((stale_nr - $recover_nr)) -ne $stale_delta ]; then
+               echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \
+                    " expected stale in range [$stale_min..$stale_max]," \
+                    " stale-recover delta $stale_delta "
+               ret=1
+               dump_stats=1
+       else
+               echo "[ ok ]"
+       fi
+
+       if [ "${dump_stats}" = 1 ]; then
+               echo $ns stats
+               ip netns exec $ns ip -s link show
+               ip netns exec $ns nstat -as | grep MPTcp
        fi
 }
 
@@ -801,6 +916,27 @@ chk_prio_nr()
        fi
 }
 
+chk_link_usage()
+{
+       local ns=$1
+       local link=$2
+       local out=$3
+       local expected_rate=$4
+       local tx_link=`ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes`
+       local tx_total=`ls -l $out | awk '{print $5}'`
+       local tx_rate=$((tx_link * 100 / $tx_total))
+       local tolerance=5
+
+       printf "%-39s %-18s" " " "link usage"
+       if [ $tx_rate -lt $((expected_rate - $tolerance)) -o \
+            $tx_rate -gt $((expected_rate + $tolerance)) ]; then
+               echo "[fail] got $tx_rate% usage, expected $expected_rate%"
+               ret=1
+       else
+               echo "[ ok ]"
+       fi
+}
+
 subflows_tests()
 {
        reset
@@ -918,20 +1054,101 @@ signal_address_tests()
        run_tests $ns1 $ns2 10.0.1.1
        chk_join_nr "signal invalid addresses" 1 1 1
        chk_add_nr 3 3
+
+       # signal addresses race test
+       reset
+       ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+       ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
+       run_tests $ns1 $ns2 10.0.1.1
+       chk_add_nr 4 4
 }
 
 link_failure_tests()
 {
        # accept and use add_addr with additional subflows and link loss
        reset
+
+       # without any b/w limit each veth could spool the packets and get
+       # them acked at xmit time, so that the corresponding subflow will
+       # have almost always no outstanding pkts, the scheduler will pick
+       # always the first subflow and we will have hard time testing
+       # active backup and link switch-over.
+       # Let's set some arbitrary (low) virtual link limits.
+       init_shapers
        ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
        ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-       ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-       ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
        run_tests $ns1 $ns2 10.0.1.1 1
        chk_join_nr "multiple flows, signal, link failure" 3 3 3
        chk_add_nr 1 1
+       chk_stale_nr $ns2 1 5 1
+
+       # accept and use add_addr with additional subflows and link loss
+       # for bidirectional transfer
+       reset
+       init_shapers
+       ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+       ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+       run_tests $ns1 $ns2 10.0.1.1 2
+       chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
+       chk_add_nr 1 1
+       chk_stale_nr $ns2 1 -1 1
+
+       # 2 subflows plus 1 backup subflow with a lossy link, backup
+       # will never be used
+       reset
+       init_shapers
+       ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+       ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+       export FAILING_LINKS="1"
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+       run_tests $ns1 $ns2 10.0.1.1 1
+       chk_join_nr "backup subflow unused, link failure" 2 2 2
+       chk_add_nr 1 1
+       chk_link_usage $ns2 ns2eth3 $cinsent 0
+
+       # 2 lossy links after half transfer, backup will get half of
+       # the traffic
+       reset
+       init_shapers
+       ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+       ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+       export FAILING_LINKS="1 2"
+       run_tests $ns1 $ns2 10.0.1.1 1
+       chk_join_nr "backup flow used, multi links fail" 2 2 2
+       chk_add_nr 1 1
+       chk_stale_nr $ns2 2 4 2
+       chk_link_usage $ns2 ns2eth3 $cinsent 50
+
+       # use a backup subflow with the first subflow on a lossy link
+       # for bidirectional transfer
+       reset
+       init_shapers
+       ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+       ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+       run_tests $ns1 $ns2 10.0.1.1 2
+       chk_join_nr "backup flow used, bidi, link failure" 2 2 2
+       chk_add_nr 1 1
+       chk_stale_nr $ns2 1 -1 2
+       chk_link_usage $ns2 ns2eth3 $cinsent 50
 }
 
 add_addr_timeout_tests()
@@ -1530,6 +1747,55 @@ deny_join_id0_tests()
        chk_join_nr "subflow and address allow join id0 2" 1 1 1
 }
 
+fullmesh_tests()
+{
+       # fullmesh 1
+       # 2 fullmesh addrs in ns2, added before the connection,
+       # 1 non-fullmesh addr in ns1, added during the connection.
+       reset
+       ip netns exec $ns1 ./pm_nl_ctl limits 0 4
+       ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh
+       run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
+       chk_join_nr "fullmesh test 2x1" 4 4 4
+       chk_add_nr 1 1
+
+       # fullmesh 2
+       # 1 non-fullmesh addr in ns1, added before the connection,
+       # 1 fullmesh addr in ns2, added during the connection.
+       reset
+       ip netns exec $ns1 ./pm_nl_ctl limits 1 3
+       ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+       run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
+       chk_join_nr "fullmesh test 1x1" 3 3 3
+       chk_add_nr 1 1
+
+       # fullmesh 3
+       # 1 non-fullmesh addr in ns1, added before the connection,
+       # 2 fullmesh addrs in ns2, added during the connection.
+       reset
+       ip netns exec $ns1 ./pm_nl_ctl limits 2 5
+       ip netns exec $ns2 ./pm_nl_ctl limits 1 5
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+       run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+       chk_join_nr "fullmesh test 1x2" 5 5 5
+       chk_add_nr 1 1
+
+       # fullmesh 4
+       # 1 non-fullmesh addr in ns1, added before the connection,
+       # 2 fullmesh addrs in ns2, added during the connection,
+       # limit max_subflows to 4.
+       reset
+       ip netns exec $ns1 ./pm_nl_ctl limits 2 4
+       ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+       ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+       run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+       chk_join_nr "fullmesh test 1x2, limited" 4 4 4
+       chk_add_nr 1 1
+}
+
 all_tests()
 {
        subflows_tests
@@ -1545,6 +1811,7 @@ all_tests()
        syncookies_tests
        checksum_tests
        deny_join_id0_tests
+       fullmesh_tests
 }
 
 usage()
@@ -1563,6 +1830,7 @@ usage()
        echo "  -k syncookies_tests"
        echo "  -S checksum_tests"
        echo "  -d deny_join_id0_tests"
+       echo "  -m fullmesh_tests"
        echo "  -c capture pcap files"
        echo "  -C enable data checksum"
        echo "  -h help"
@@ -1598,7 +1866,7 @@ if [ $do_all_tests -eq 1 ]; then
        exit $ret
 fi
 
-while getopts 'fsltra64bpkdchCS' opt; do
+while getopts 'fsltra64bpkdmchCS' opt; do
        case $opt in
                f)
                        subflows_tests
@@ -1639,6 +1907,9 @@ while getopts 'fsltra64bpkdchCS' opt; do
                d)
                        deny_join_id0_tests
                        ;;
+               m)
+                       fullmesh_tests
+                       ;;
                c)
                        ;;
                C)
index 115decf..3547845 100644 (file)
@@ -25,7 +25,7 @@
 static void syntax(char *argv[])
 {
        fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
-       fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
+       fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
        fprintf(stderr, "\tdel <id> [<ip>]\n");
        fprintf(stderr, "\tget <id>\n");
        fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
@@ -236,11 +236,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
                                        flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
                                else if (!strcmp(tok, "backup"))
                                        flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+                               else if (!strcmp(tok, "fullmesh"))
+                                       flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
                                else
                                        error(1, errno,
                                              "unknown flag %s", argv[arg]);
                        }
 
+                       if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
+                           flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+                               error(1, errno, "error flag fullmesh");
+                       }
+
                        rta = (void *)(data + off);
                        rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
                        rta->rta_len = RTA_LENGTH(4);
@@ -422,6 +429,13 @@ static void print_addr(struct rtattr *attrs, int len)
                                        printf(",");
                        }
 
+                       if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+                               printf("fullmesh");
+                               flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH;
+                               if (flags)
+                                       printf(",");
+                       }
+
                        /* bump unknown flags, if any */
                        if (flags)
                                printf("0x%x", flags);
index db45213..3653d64 100644 (file)
@@ -111,8 +111,8 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 static void sock_fanout_set_cbpf(int fd)
 {
        struct sock_filter bpf_filter[] = {
-               BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80),           /* ldb [80] */
-               BPF_STMT(BPF_RET+BPF_A, 0),                   /* ret A */
+               BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 80),       /* ldb [80] */
+               BPF_STMT(BPF_RET | BPF_A, 0),                 /* ret A */
        };
        struct sock_fprog bpf_prog;
 
index 170be65..1cbfeb5 100755 (executable)
@@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
 echo "raw gso min size"
 ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
 
-echo "raw gso min size - 1 (expected to fail)"
-(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
-
 echo "raw gso max size"
 ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
 
index 8b42e8b..a59cb6a 100755 (executable)
@@ -1,9 +1,12 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 if [ $(id -u) != 0 ]; then
        echo $msg must be run as root >&2
-       exit 0
+       exit $ksft_skip
 fi
 
 ret=0
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
new file mode 100755 (executable)
index 0000000..e57bbfb
--- /dev/null
@@ -0,0 +1,118 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
+readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
+readonly HARD_IRQS="$(< ${IRQ_PATH})"
+
+netdev_check_for_carrier() {
+       local -r dev="$1"
+
+       for i in {1..5}; do
+               carrier="$(cat /sys/class/net/${dev}/carrier)"
+               if [[ "${carrier}" -ne 1 ]] ; then
+                       echo "carrier not ready yet..." >&2
+                       sleep 1
+               else
+                       echo "carrier ready" >&2
+                       break
+               fi
+       done
+       echo "${carrier}"
+}
+
+# Assumes that there is no existing ipvlan device on the physical device
+setup_loopback_environment() {
+       local dev="$1"
+
+       # Fail hard if cannot turn on loopback mode for current NIC
+       ethtool -K "${dev}" loopback on || exit 1
+       sleep 1
+
+       # Check for the carrier
+       carrier=$(netdev_check_for_carrier ${dev})
+       if [[ "${carrier}" -ne 1 ]] ; then
+               echo "setup_loopback_environment failed"
+               exit 1
+       fi
+}
+
+setup_macvlan_ns(){
+       local -r link_dev="$1"
+       local -r ns_name="$2"
+       local -r ns_dev="$3"
+       local -r ns_mac="$4"
+       local -r addr="$5"
+
+       ip link add link "${link_dev}" dev "${ns_dev}" \
+               address "${ns_mac}" type macvlan
+       exit_code=$?
+       if [[ "${exit_code}" -ne 0 ]]; then
+               echo "setup_macvlan_ns failed"
+               exit $exit_code
+       fi
+
+       [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+       ip link set dev "${ns_dev}" netns "${ns_name}"
+       ip -netns "${ns_name}" link set dev "${ns_dev}" up
+       if [[ -n "${addr}" ]]; then
+               ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
+       fi
+
+       sleep 1
+}
+
+cleanup_macvlan_ns(){
+       while (( $# >= 2 )); do
+               ns_name="$1"
+               ns_dev="$2"
+               ip -netns "${ns_name}" link del dev "${ns_dev}"
+               ip netns del "${ns_name}"
+               shift 2
+       done
+}
+
+cleanup_loopback(){
+       local -r dev="$1"
+
+       ethtool -K "${dev}" loopback off
+       sleep 1
+
+       # Check for the carrier
+       carrier=$(netdev_check_for_carrier ${dev})
+       if [[ "${carrier}" -ne 1 ]] ; then
+               echo "setup_loopback_environment failed"
+               exit 1
+       fi
+}
+
+setup_interrupt() {
+       # Use timer on  host to trigger the network stack
+       # Also disable device interrupt to not depend on NIC interrupt
+       # Reduce test flakiness caused by unexpected interrupts
+       echo 100000 >"${FLUSH_PATH}"
+       echo 50 >"${IRQ_PATH}"
+}
+
+setup_ns() {
+       # Set up server_ns namespace and client_ns namespace
+       setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}"
+       setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+       cleanup_macvlan_ns server_ns server client_ns client
+}
+
+setup() {
+       setup_loopback_environment "${dev}"
+       setup_interrupt
+}
+
+cleanup() {
+       cleanup_loopback "${dev}"
+
+       echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+       echo "${HARD_IRQS}" >"${IRQ_PATH}"
+}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
new file mode 100644 (file)
index 0000000..1003ddf
--- /dev/null
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+setup_veth_ns() {
+       local -r link_dev="$1"
+       local -r ns_name="$2"
+       local -r ns_dev="$3"
+       local -r ns_mac="$4"
+
+       [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+       echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+       ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
+       ip -netns "${ns_name}" link set dev "${ns_dev}" up
+
+       ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
+}
+
+setup_ns() {
+       # Set up server_ns namespace and client_ns namespace
+       ip link add name server type veth peer name client
+
+       setup_veth_ns "${dev}" server_ns server "${SERVER_MAC}"
+       setup_veth_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+       local ns_name
+
+       for ns_name in client_ns server_ns; do
+               [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
+       done
+}
+
+setup() {
+       # no global init setup step needed
+       :
+}
+
+cleanup() {
+       cleanup_ns
+}
index 75ada17..aebaab8 100755 (executable)
 # +---------------------------------------------------+
 #
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 readonly LOCALSID_TABLE_ID=90
 readonly IPv6_RT_NETWORK=fd00
 readonly IPv6_HS_NETWORK=cafe
@@ -543,18 +546,18 @@ host_vpn_isolation_tests()
 
 if [ "$(id -u)" -ne 0 ];then
        echo "SKIP: Need root privileges"
-       exit 0
+       exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
        echo "SKIP: Could not run test without ip tool"
-       exit 0
+       exit $ksft_skip
 fi
 
 modprobe vrf &>/dev/null
 if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
         echo "SKIP: vrf sysctl does not exist"
-        exit 0
+        exit $ksft_skip
 fi
 
 cleanup &>/dev/null
index ad7a9fc..1003119 100755 (executable)
 # +---------------------------------------------------+
 #
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 readonly LOCALSID_TABLE_ID=90
 readonly IPv6_RT_NETWORK=fd00
 readonly IPv4_HS_NETWORK=10.0.0
@@ -464,18 +467,18 @@ host_vpn_isolation_tests()
 
 if [ "$(id -u)" -ne 0 ];then
        echo "SKIP: Need root privileges"
-       exit 0
+       exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
        echo "SKIP: Could not run test without ip tool"
-       exit 0
+       exit $ksft_skip
 fi
 
 modprobe vrf &>/dev/null
 if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
         echo "SKIP: vrf sysctl does not exist"
-        exit 0
+        exit $ksft_skip
 fi
 
 cleanup &>/dev/null
index 68708f5..b9b06ef 100755 (executable)
 # +---------------------------------------------------+
 #
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 readonly LOCALSID_TABLE_ID=90
 readonly IPv6_RT_NETWORK=fd00
 readonly IPv6_HS_NETWORK=cafe
@@ -472,18 +475,18 @@ host_vpn_isolation_tests()
 
 if [ "$(id -u)" -ne 0 ];then
        echo "SKIP: Need root privileges"
-       exit 0
+       exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
        echo "SKIP: Could not run test without ip tool"
-       exit 0
+       exit $ksft_skip
 fi
 
 modprobe vrf &>/dev/null
 if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
         echo "SKIP: vrf sysctl does not exist"
-        exit 0
+        exit $ksft_skip
 fi
 
 cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
new file mode 100644 (file)
index 0000000..710ac95
--- /dev/null
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define TOEPLITZ_KEY_MIN_LEN   40
+#define TOEPLITZ_KEY_MAX_LEN   60
+
+#define TOEPLITZ_STR_LEN(K)    (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN   TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN   TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN     ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+
+#define RPS_MAX_CPUS 16UL      /* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport =    8000;
+static int cfg_family =                AF_INET6;
+static char *cfg_ifname =      "eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type =          SOCK_STREAM;
+static int cfg_timeout_msec =  1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...)   do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+       int fd;
+       char *mmap;
+       int idx;
+       int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+                               const unsigned char *key)
+{
+       int i, bit, ret = 0;
+       uint32_t key32;
+
+       key32 = ntohl(*((uint32_t *)key));
+       key += 4;
+
+       for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+               for (bit = 7; bit >= 0; bit--) {
+                       if (four_tuple[i] & (1 << bit))
+                               ret ^= key32;
+
+                       key32 <<= 1;
+                       key32 |= !!(key[0] & (1 << bit));
+               }
+               key++;
+       }
+
+       return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+       int queue = rx_hash % cfg_num_queues;
+
+       log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+       if (rx_irq_cpus[queue] != cpu) {
+               log_verbose(". error: rss cpu mismatch (%d)", cpu);
+               frames_error++;
+       }
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+       int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+       log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+       if (rps_silo_to_cpu[silo] != cpu) {
+               log_verbose(". error: rps cpu mismatch (%d)", cpu);
+               frames_error++;
+       }
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+                      const char *addrs, int addr_len)
+{
+       char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+       uint16_t *ports;
+
+       if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+           !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+               error(1, 0, "address parse error");
+
+       ports = (void *)addrs + (addr_len * 2);
+       log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+                   cpu, rx_hash, saddr, daddr,
+                   ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+       unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+       uint32_t rx_hash_sw;
+       const char *addrs;
+       int addr_len;
+
+       if (cfg_family == AF_INET) {
+               addr_len = sizeof(struct in_addr);
+               addrs = pkt + offsetof(struct iphdr, saddr);
+       } else {
+               addr_len = sizeof(struct in6_addr);
+               addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+       }
+
+       memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+       rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+       if (cfg_verbose)
+               log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+       if (rx_hash != rx_hash_sw) {
+               log_verbose(" != expected 0x%x\n", rx_hash_sw);
+               frames_error++;
+               return;
+       }
+
+       log_verbose(" OK");
+       if (cfg_num_queues)
+               verify_rss(rx_hash, cpu);
+       else if (cfg_num_rps_cpus)
+               verify_rps(rx_hash, cpu);
+       log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+       struct tpacket3_hdr *hdr = (void *)frame;
+
+       if (hdr->hv1.tp_rxhash)
+               verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+                             ring->cpu);
+       else
+               frames_nohash++;
+
+       return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static void recv_block(struct ring_state *ring)
+{
+       struct tpacket_block_desc *block;
+       char *frame;
+       int i;
+
+       block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+       if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+               return;
+
+       frame = (char *)block;
+       frame += block->hdr.bh1.offset_to_first_pkt;
+
+       for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+               frame = recv_frame(ring, frame);
+               frames_received++;
+       }
+
+       block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+       ring->idx = (ring->idx + 1) % ring_block_nr;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+       int i;
+
+       usleep(1000 * cfg_timeout_msec);
+
+       for (i = 0; i < num_cpus; i++)
+               recv_block(&rings[i]);
+
+       fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+               frames_received - frames_nohash - frames_error,
+               frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+       struct tpacket_req3 req3 = {0};
+       void *ring;
+
+       req3.tp_retire_blk_tov = cfg_timeout_msec;
+       req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+       req3.tp_frame_size = 2048;
+       req3.tp_frame_nr = 1 << 10;
+       req3.tp_block_nr = 2;
+
+       req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+       req3.tp_block_size /= req3.tp_block_nr;
+
+       if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+               error(1, errno, "setsockopt PACKET_RX_RING");
+
+       ring_block_sz = req3.tp_block_size;
+       ring_block_nr = req3.tp_block_nr;
+
+       ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+                   PROT_READ | PROT_WRITE,
+                   MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+       if (ring == MAP_FAILED)
+               error(1, 0, "mmap failed");
+
+       return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+               BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+               BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, off_proto),
+               BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+               BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, off_dport),
+               BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+               BPF_STMT(BPF_RET + BPF_K, 0),
+               BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+       };
+       struct sock_fprog prog = {};
+
+       prog.filter = filter;
+       prog.len = sizeof(filter) / sizeof(struct sock_filter);
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+               error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+       const int off_dport = offsetof(struct tcphdr, dest);    /* same for udp */
+       uint8_t proto;
+
+       proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+       if (cfg_family == AF_INET)
+               __set_filter(fd, offsetof(struct iphdr, protocol), proto,
+                            sizeof(struct iphdr) + off_dport);
+       else
+               __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+                            sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_RET + BPF_K, 0),
+       };
+       struct sock_fprog prog = {};
+
+       prog.filter = filter;
+       prog.len = sizeof(filter) / sizeof(struct sock_filter);
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+               error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+       struct fanout_args args = {
+               .id = 1,
+               .type_flags = PACKET_FANOUT_CPU,
+               .max_num_members = RSS_MAX_CPUS
+       };
+       struct sockaddr_ll ll = { 0 };
+       int fd, val;
+
+       fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+       if (fd == -1)
+               error(1, errno, "socket creation failed");
+
+       val = TPACKET_V3;
+       if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+               error(1, errno, "setsockopt PACKET_VERSION");
+       *ring = setup_ring(fd);
+
+       /* block packets until all rings are added to the fanout group:
+        * else packets can arrive during setup and get misclassified
+        */
+       set_filter_null(fd);
+
+       ll.sll_family = AF_PACKET;
+       ll.sll_ifindex = if_nametoindex(cfg_ifname);
+       ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+                                                 htons(ETH_P_IPV6);
+       if (bind(fd, (void *)&ll, sizeof(ll)))
+               error(1, errno, "bind");
+
+       /* must come after bind: verifies all programs in group match */
+       if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+               /* on failure, retry using old API if that is sufficient:
+                * it has a hard limit of 256 sockets, so only try if
+                * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+                * in this API, the third argument is left implicit.
+                */
+               if (cfg_num_queues || num_cpus > 256 ||
+                   setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+                              &args, sizeof(uint32_t)))
+                       error(1, errno, "setsockopt PACKET_FANOUT cpu");
+       }
+
+       return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+       int fd, val;
+
+       fd = socket(cfg_family, cfg_type, 0);
+       if (fd == -1)
+               error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+       val = 1 << 20;
+       if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+               error(1, errno, "setsockopt rcvbuf");
+
+       return fd;
+}
+
+static void setup_rings(void)
+{
+       int i;
+
+       for (i = 0; i < num_cpus; i++) {
+               rings[i].cpu = i;
+               rings[i].fd = create_ring(&rings[i].mmap);
+       }
+
+       /* accept packets once all rings in the fanout group are up */
+       for (i = 0; i < num_cpus; i++)
+               set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+       int i;
+
+       for (i = 0; i < num_cpus; i++) {
+               if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+                       error(1, errno, "munmap");
+               if (close(rings[i].fd))
+                       error(1, errno, "close");
+       }
+}
+
+static void parse_cpulist(const char *arg)
+{
+       do {
+               rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+               arg = strchr(arg, ',');
+               if (!arg)
+                       break;
+               arg++;                  // skip ','
+       } while (1);
+}
+
+static void show_cpulist(void)
+{
+       int i;
+
+       for (i = 0; i < cfg_num_queues; i++)
+               fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+       int i;
+
+       for (i = 0; i < cfg_num_rps_cpus; i++)
+               fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+       int i, ret, off;
+
+       if (slen < TOEPLITZ_STR_MIN_LEN ||
+           slen > TOEPLITZ_STR_MAX_LEN + 1)
+               error(1, 0, "invalid toeplitz key");
+
+       for (i = 0, off = 0; off < slen; i++, off += 3) {
+               ret = sscanf(str + off, "%hhx", &key[i]);
+               if (ret != 1)
+                       error(1, 0, "key parse error at %d off %d len %d",
+                             i, off, slen);
+       }
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+       unsigned long bitmap;
+       int i;
+
+       bitmap = strtoul(arg, NULL, 0);
+
+       if (bitmap & ~(RPS_MAX_CPUS - 1))
+               error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+                     bitmap, RPS_MAX_CPUS - 1);
+
+       for (i = 0; i < RPS_MAX_CPUS; i++)
+               if (bitmap & 1UL << i)
+                       rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+       static struct option long_options[] = {
+           {"dport",   required_argument, 0, 'd'},
+           {"cpus",    required_argument, 0, 'C'},
+           {"key",     required_argument, 0, 'k'},
+           {"iface",   required_argument, 0, 'i'},
+           {"ipv4",    no_argument, 0, '4'},
+           {"ipv6",    no_argument, 0, '6'},
+           {"sink",    no_argument, 0, 's'},
+           {"tcp",     no_argument, 0, 't'},
+           {"timeout", required_argument, 0, 'T'},
+           {"udp",     no_argument, 0, 'u'},
+           {"verbose", no_argument, 0, 'v'},
+           {"rps",     required_argument, 0, 'r'},
+           {0, 0, 0, 0}
+       };
+       bool have_toeplitz = false;
+       int index, c;
+
+       while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
+               switch (c) {
+               case '4':
+                       cfg_family = AF_INET;
+                       break;
+               case '6':
+                       cfg_family = AF_INET6;
+                       break;
+               case 'C':
+                       parse_cpulist(optarg);
+                       break;
+               case 'd':
+                       cfg_dport = strtol(optarg, NULL, 0);
+                       break;
+               case 'i':
+                       cfg_ifname = optarg;
+                       break;
+               case 'k':
+                       parse_toeplitz_key(optarg, strlen(optarg),
+                                          toeplitz_key);
+                       have_toeplitz = true;
+                       break;
+               case 'r':
+                       parse_rps_bitmap(optarg);
+                       break;
+               case 's':
+                       cfg_sink = true;
+                       break;
+               case 't':
+                       cfg_type = SOCK_STREAM;
+                       break;
+               case 'T':
+                       cfg_timeout_msec = strtol(optarg, NULL, 0);
+                       break;
+               case 'u':
+                       cfg_type = SOCK_DGRAM;
+                       break;
+               case 'v':
+                       cfg_verbose = true;
+                       break;
+
+               default:
+                       error(1, 0, "unknown option %c", optopt);
+                       break;
+               }
+       }
+
+       if (!have_toeplitz)
+               error(1, 0, "Must supply rss key ('-k')");
+
+       num_cpus = get_nprocs();
+       if (num_cpus > RSS_MAX_CPUS)
+               error(1, 0, "increase RSS_MAX_CPUS");
+
+       if (cfg_num_queues && cfg_num_rps_cpus)
+               error(1, 0,
+                     "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+       if (cfg_verbose) {
+               show_cpulist();
+               show_silos();
+       }
+}
+
+int main(int argc, char **argv)
+{
+       const int min_tests = 10;
+       int fd_sink = -1;
+
+       parse_opts(argc, argv);
+
+       if (cfg_sink)
+               fd_sink = setup_sink();
+
+       setup_rings();
+       process_rings();
+       cleanup_rings();
+
+       if (cfg_sink && close(fd_sink))
+               error(1, errno, "close sink");
+
+       if (frames_received - frames_nohash < min_tests)
+               error(1, 0, "too few frames for verification");
+
+       return frames_error;
+}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
new file mode 100755 (executable)
index 0000000..0a49907
--- /dev/null
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
+# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
+# ('-rps <rps_map>')
+#
+# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
+# which is a driver-specific encoding.
+#
+# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
+# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
+
+source setup_loopback.sh
+readonly SERVER_IP4="192.168.1.200/24"
+readonly SERVER_IP6="fda8::1/64"
+readonly SERVER_MAC="aa:00:00:00:00:02"
+
+readonly CLIENT_IP4="192.168.1.100/24"
+readonly CLIENT_IP6="fda8::2/64"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+PORT=8000
+KEY="$(</proc/sys/net/core/netdev_rss_key)"
+TEST_RSS=false
+RPS_MAP=""
+PROTO_FLAG=""
+IP_FLAG=""
+DEV="eth0"
+
+# Return the number of rxqs among which RSS is configured to spread packets.
+# This is determined by reading the RSS indirection table using ethtool.
+get_rss_cfg_num_rxqs() {
+       echo $(ethtool -x "${DEV}" |
+               egrep [[:space:]]+[0-9]+:[[:space:]]+ |
+               cut -d: -f2- |
+               awk '{$1=$1};1' |
+               tr ' ' '\n' |
+               sort -u |
+               wc -l)
+}
+
+# Return a list of the receive irq handler cpus.
+# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
+# Reads /sys/kernel/irq/ in order, so algorithm depends on
+# irq_{rxq-0} < irq_{rxq-1}, etc.
+get_rx_irq_cpus() {
+       CPUS=""
+       # sort so that irq 2 is read before irq 10
+       SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
+       # Consider only as many queues as RSS actually uses. We assume that
+       # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
+       RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
+       RXQ_COUNT=0
+
+       for i in ${SORTED_IRQS}
+       do
+               [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
+               # lookup relevant IRQs by action name
+               [[ -e "$i/actions" ]] || continue
+               cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
+               irqname=$(<"$i/actions")
+
+               # does the IRQ get called
+               irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
+               [[ -n "${irqcount}" ]] || continue
+
+               # lookup CPU
+               irq=$(basename "$i")
+               cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
+
+               if [[ -z "${CPUS}" ]]; then
+                       CPUS="${cpu}"
+               else
+                       CPUS="${CPUS},${cpu}"
+               fi
+               RXQ_COUNT=$((RXQ_COUNT+1))
+       done
+
+       echo "${CPUS}"
+}
+
+get_disable_rfs_cmd() {
+       echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
+}
+
+get_set_rps_bitmaps_cmd() {
+       CMD=""
+       for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
+       do
+               CMD="${CMD} echo $1 > ${i};"
+       done
+
+       echo "${CMD}"
+}
+
+get_disable_rps_cmd() {
+       echo "$(get_set_rps_bitmaps_cmd 0)"
+}
+
+die() {
+       echo "$1"
+       exit 1
+}
+
+check_nic_rxhash_enabled() {
+       local -r pattern="receive-hashing:\ on"
+
+       ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
+}
+
+parse_opts() {
+       local prog=$0
+       shift 1
+
+       while [[ "$1" =~ "-" ]]; do
+               if [[ "$1" = "-irq_prefix" ]]; then
+                       shift
+                       IRQ_PATTERN="^$1-[0-9]*$"
+               elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
+                       PROTO_FLAG="$1"
+               elif [[ "$1" = "-4" ]]; then
+                       IP_FLAG="$1"
+                       SERVER_IP="${SERVER_IP4}"
+                       CLIENT_IP="${CLIENT_IP4}"
+               elif [[ "$1" = "-6" ]]; then
+                       IP_FLAG="$1"
+                       SERVER_IP="${SERVER_IP6}"
+                       CLIENT_IP="${CLIENT_IP6}"
+               elif [[ "$1" = "-rss" ]]; then
+                       TEST_RSS=true
+               elif [[ "$1" = "-rps" ]]; then
+                       shift
+                       RPS_MAP="$1"
+               elif [[ "$1" = "-i" ]]; then
+                       shift
+                       DEV="$1"
+               else
+                       die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
+                            [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
+               fi
+               shift
+       done
+}
+
+setup() {
+       setup_loopback_environment "${DEV}"
+
+       # Set up server_ns namespace and client_ns namespace
+       setup_macvlan_ns "${DEV}" server_ns server \
+       "${SERVER_MAC}" "${SERVER_IP}"
+       setup_macvlan_ns "${DEV}" client_ns client \
+       "${CLIENT_MAC}" "${CLIENT_IP}"
+}
+
+cleanup() {
+       cleanup_macvlan_ns server_ns server client_ns client
+       cleanup_loopback "${DEV}"
+}
+
+parse_opts $0 $@
+
+setup
+trap cleanup EXIT
+
+check_nic_rxhash_enabled
+
+# Actual test starts here
+if [[ "${TEST_RSS}" = true ]]; then
+       # RPS/RFS must be disabled because they move packets between cpus,
+       # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
+       eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
+         ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+         -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+         -C "$(get_rx_irq_cpus)" -s -v &
+elif [[ ! -z "${RPS_MAP}" ]]; then
+       eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
+         ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+         -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+         -r "0x${RPS_MAP}" -s -v &
+else
+       ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+         -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
+fi
+
+server_pid=$!
+
+ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+  "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
+
+client_pid=$!
+
+wait "${server_pid}"
+exit_code=$?
+kill -9 "${client_pid}"
+if [[ "${exit_code}" -eq 0 ]]; then
+       echo "Test Succeeded!"
+fi
+exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
new file mode 100755 (executable)
index 0000000..2fef34f
--- /dev/null
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A simple program for generating traffic for the toeplitz test.
+#
+# This program sends packets periodically for, conservatively, 20 seconds. The
+# intent is for the calling program to kill this program once it is no longer
+# needed, rather than waiting for the 20 second expiration.
+
+send_traffic() {
+       expiration=$((SECONDS+20))
+       while [[ "${SECONDS}" -lt "${expiration}" ]]
+       do
+               if [[ "${PROTO}" == "-u" ]]; then
+                       echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
+               else
+                       echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
+               fi
+               sleep 0.001
+       done
+}
+
+PROTO=$1
+IPVER=$2
+ADDR=$3
+PORT=$4
+
+send_traffic
index 66354cd..2d10cca 100755 (executable)
 # These tests provide an easy way to flip the expected result of any
 # of these behaviors for testing kernel patches that change them.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # nettest can be run from PATH or from same directory as this selftest
 if ! which nettest >/dev/null; then
        PATH=$PWD:$PATH
        if ! which nettest >/dev/null; then
                echo "'nettest' command not found; skipping tests"
-               exit 0
+               exit $ksft_skip
        fi
 fi
 
index 11d7cdb..19eac3e 100755 (executable)
@@ -13,7 +13,7 @@ readonly NS_DST=$BASE$DST
 readonly BM_NET_V4=192.168.1.
 readonly BM_NET_V6=2001:db8::
 
-readonly NPROCS=`nproc`
+readonly CPUS=`nproc`
 ret=0
 
 cleanup() {
@@ -75,6 +75,29 @@ chk_tso_flag() {
        __chk_flag "$1" $2 $3 tcp-segmentation-offload
 }
 
+chk_channels() {
+       local msg="$1"
+       local target=$2
+       local rx=$3
+       local tx=$4
+
+       local dev=veth$target
+
+       local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\
+               grep RX: | tail -n 1 | awk '{print $2}' `
+               local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\
+               grep TX: | tail -n 1 | awk '{print $2}'`
+       local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\
+               grep Combined: | tail -n 1 | awk '{print $2}'`
+
+       printf "%-60s" "$msg"
+       if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then
+               echo " ok "
+       else
+               echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined"
+       fi
+}
+
 chk_gro() {
        local msg="$1"
        local expected=$2
@@ -107,11 +130,100 @@ chk_gro() {
        fi
 }
 
+__change_channels()
+{
+       local cur_cpu
+       local end=$1
+       local cur
+       local i
+
+       while true; do
+               printf -v cur '%(%s)T'
+               [ $cur -le $end ] || break
+
+               for i in `seq 1 $CPUS`; do
+                       ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i
+                       ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i
+               done
+
+               for i in `seq 1 $((CPUS - 1))`; do
+                       cur_cpu=$((CPUS - $i))
+                       ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu
+                       ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu
+               done
+       done
+}
+
+__send_data() {
+       local end=$1
+
+       while true; do
+               printf -v cur '%(%s)T'
+               [ $cur -le $end ] || break
+
+               ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST
+       done
+}
+
+do_stress() {
+       local end
+       printf -v end '%(%s)T'
+       end=$((end + $STRESS))
+
+       ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3
+       ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+
+       ip netns exec $NS_DST ./udpgso_bench_rx &
+       local rx_pid=$!
+
+       echo "Running stress test for $STRESS seconds..."
+       __change_channels $end &
+       local ch_pid=$!
+       __send_data $end &
+       local data_pid_1=$!
+       __send_data $end &
+       local data_pid_2=$!
+       __send_data $end &
+       local data_pid_3=$!
+       __send_data $end &
+       local data_pid_4=$!
+
+       wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4
+       kill -9 $rx_pid
+       echo "done"
+
+       # restore previous setting
+       ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2
+       ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1
+}
+
+usage() {
+       echo "Usage: $0 [-h] [-s <seconds>]"
+       echo -e "\t-h: show this help"
+       echo -e "\t-s: run optional stress tests for the given amount of seconds"
+}
+
+STRESS=0
+while getopts "hs:" option; do
+       case "$option" in
+       "h")
+               usage $0
+               exit 0
+               ;;
+       "s")
+               STRESS=$OPTARG
+               ;;
+       esac
+done
+
 if [ ! -f ../bpf/xdp_dummy.o ]; then
        echo "Missing xdp_dummy helper. Build bpf selftest first"
        exit 1
 fi
 
+[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped"
+[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too"
+
 create_ns
 chk_gro_flag "default - gro flag" $SRC off
 chk_gro_flag "        - peer gro flag" $DST off
@@ -134,6 +246,8 @@ chk_gro "        - aggregation with TSO off" 1
 cleanup
 
 create_ns
+chk_channels "default channels" $DST 1 1
+
 ip -n $NS_DST link set dev veth$DST down
 ip netns exec $NS_DST ethtool -K veth$DST gro on
 chk_gro_flag "with gro enabled on link down - gro flag" $DST on
@@ -147,6 +261,56 @@ chk_gro "        - aggregation with TSO off" 1
 cleanup
 
 create_ns
+
+CUR_TX=1
+CUR_RX=1
+if [ $CPUS -gt 1 ]; then
+       ip netns exec $NS_DST ethtool -L veth$DST tx 2
+       chk_channels "setting tx channels" $DST 1 2
+       CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+       ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+       chk_channels "setting both rx and tx channels" $DST 3 3
+       CUR_RX=3
+       CUR_TX=3
+fi
+
+ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null
+chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX
+
+ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null
+chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX
+
+if [ $CPUS -gt 1 ]; then
+       # this also tests queues nr reduction
+       ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null
+       ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
+       printf "%-60s" "bad setting: XDP with RX nr less than TX"
+       ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+               section xdp_dummy 2>/dev/null &&\
+               echo "fail - set operation successful ?!?" || echo " ok "
+
+       # the following tests will run with multiple channels active
+       ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
+       ip netns exec $NS_DST ethtool -L veth$DST rx 2
+       ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+               section xdp_dummy 2>/dev/null
+       printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
+       ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
+               echo "fail - set operation successful ?!?" || echo " ok "
+       CUR_RX=2
+       CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+       printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set"
+       ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\
+               echo "fail - set operation successful ?!?" || echo " ok "
+       chk_channels "setting invalid channels nr" $DST 2 2
+fi
+
 ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
 chk_gro_flag "with xdp attached - gro flag" $DST on
 chk_gro_flag "        - peer gro flag" $SRC off
@@ -167,10 +331,27 @@ chk_gro_flag "        - after gro on xdp off, gro flag" $DST on
 chk_gro_flag "        - peer gro flag" $SRC off
 chk_tso_flag "        - tso flag" $SRC on
 chk_tso_flag "        - peer tso flag" $DST on
+
+if [ $CPUS -gt 1 ]; then
+       ip netns exec $NS_DST ethtool -L veth$DST tx 1
+       chk_channels "decreasing tx channels with device down" $DST 2 1
+fi
+
 ip -n $NS_DST link set dev veth$DST up
 ip -n $NS_SRC link set dev veth$SRC up
 chk_gro "        - aggregation" 1
 
+if [ $CPUS -gt 1 ]; then
+       [ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress
+
+       ip -n $NS_DST link set dev veth$DST down
+       ip -n $NS_SRC link set dev veth$SRC down
+       ip netns exec $NS_DST ethtool -L veth$DST tx 2
+       chk_channels "increasing tx channels with device down" $DST 2 2
+       ip -n $NS_DST link set dev veth$DST up
+       ip -n $NS_SRC link set dev veth$SRC up
+fi
+
 ip netns exec $NS_DST ethtool -K veth$DST gro off
 ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
 chk_gro "aggregation again with default and TSO off" 10
index 18b982d..865d53c 100755 (executable)
@@ -3,6 +3,9 @@
 
 # This test is designed for testing the new VRF strict_mode functionality.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 ret=0
 
 # identifies the "init" network namespace which is often called root network
@@ -371,18 +374,18 @@ vrf_strict_mode_check_support()
 
 if [ "$(id -u)" -ne 0 ];then
        echo "SKIP: Need root privileges"
-       exit 0
+       exit $ksft_skip
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
        echo "SKIP: Could not run test without ip tool"
-       exit 0
+       exit $ksft_skip
 fi
 
 modprobe vrf &>/dev/null
 if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
        echo "SKIP: vrf sysctl does not exist"
-       exit 0
+       exit $ksft_skip
 fi
 
 cleanup &> /dev/null
index 6eb4c4f..742f229 100644 (file)
         "teardown": [
             "$TC actions flush action skbmod"
         ]
+    },
+    {
+        "id": "fe09",
+        "name": "Add skbmod action to mark ECN bits",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod ecn",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbmod index 1",
+        "matchPattern": "action order [0-9]*: skbmod pipe ecn",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
new file mode 100644 (file)
index 0000000..88a20c7
--- /dev/null
@@ -0,0 +1,137 @@
+[
+       {
+           "id": "ce7d",
+           "name": "Add mq Qdisc to multi-queue device (4 queues)",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "0",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "4",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "2f82",
+           "name": "Add mq Qdisc to multi-queue device (256 queues)",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 256\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "0",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "256",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "c525",
+           "name": "Add duplicate mq Qdisc",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+            "$TC qdisc add dev $ETH root handle 1: mq"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "4",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "128a",
+           "name": "Delete nonexistent mq Qdisc",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "0",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "03a9",
+           "name": "Delete mq Qdisc twice",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+            "$TC qdisc add dev $ETH root handle 1: mq",
+            "$TC qdisc del dev $ETH root handle 1: mq"
+           ],
+           "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "0",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+    {
+           "id": "be0f",
+           "name": "Add mq Qdisc to single-queue device",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "0",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       }
+]
index cd4a27e..ea04f04 100644 (file)
@@ -17,6 +17,7 @@ NAMES = {
           'DEV1': 'v0p1',
           'DEV2': '',
           'DUMMY': 'dummy1',
+         'ETH': 'eth0',
           'BATCH_FILE': './batch.txt',
           'BATCH_DIR': 'tmp',
           # Length of time in seconds to wait before terminating a command